Home
last modified time | relevance | path

Searched full:numel (Results 1 – 25 of 1138) sorted by relevance

12345678910>>...46

/aosp_15_r20/external/pytorch/aten/src/ATen/native/
H A DEmbeddingBag.cpp120 TORCH_CHECK(select_indices.numel() == add_indices.numel()); in index_select_add()
129 auto numel = add_indices.numel(); in index_select_add() local
137 for (const auto i : c10::irange(numel)) { in index_select_add()
208 int64_t output_size = offsets.numel() - 1; in index_select_add()
213 output_size = offsets.numel() - 1; in index_select_add()
215 output_size = offsets.numel(); in index_select_add()
216 offsets_include_last.resize(offsets.numel() + 1); in index_select_add()
217 if (offsets.numel() > 0) { in index_select_add()
221 sizeof(index_t) * offsets.numel()); in index_select_add()
223 offsets_include_last[offsets.numel()] = select_indices.numel(); in index_select_add()
[all …]
H A DTensorAdvancedIndexing.cpp176 auto is_index_empty = index.numel() == 0; in TORCH_META_FUNC()
284 int64_t numIndices = index.numel(); in TORCH_PRECOMPUTE_META_FUNC()
330 auto numel = index.numel(); in index_func_meta_impl() local
342 TORCH_CHECK(numel == (source.dim() == 0 ? 1 : source.size(dim)), in index_func_meta_impl()
343 func, "_(): Number of indices (", numel, ") should be equal to source.size(dim): (", in index_func_meta_impl()
686 if (self.numel() == 0) { in _unsafe_masked_index()
712 if (self.numel() == 0) { in _unsafe_masked_index_put_accumulate()
751numel() == index.numel(), "put_(): Expected source and index to have the same number of elements, … in put_()
752 …TORCH_CHECK_INDEX(!(self.numel() == 0 && index.numel() != 0), "put_(): Tried to put elements into … in put_()
759 if (index.numel() == 0) { in put_()
[all …]
H A DUnique.cpp42 int64_t numel = input.numel(); in unique_cpu_bool_template() local
47 if (numel == 0) { in unique_cpu_bool_template()
58 at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) { in unique_cpu_bool_template()
69 int64_t num_false = numel - num_true; in unique_cpu_bool_template()
99 at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) { in unique_cpu_bool_template()
165 int64_t numel = input.numel(); in unique_cpu_sorted_template() local
170 if (numel == 0) { in unique_cpu_sorted_template()
197 at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) { in unique_cpu_sorted_template()
231 unique_index_data[unique_count] = numel; in unique_cpu_sorted_template()
234 at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) { in unique_cpu_sorted_template()
[all …]
/aosp_15_r20/external/pytorch/test/cpp/api/
H A Dtensor.cpp158 ASSERT_EQ(tensor.numel(), 1); in TEST()
163 ASSERT_EQ(tensor.numel(), 1); in TEST()
168 ASSERT_EQ(tensor.numel(), 1); in TEST()
173 ASSERT_EQ(tensor.numel(), 1); in TEST()
178 ASSERT_EQ(tensor.numel(), 1); in TEST()
185 ASSERT_EQ(tensor.numel(), 1); in TEST()
190 ASSERT_EQ(tensor.numel(), 1); in TEST()
197 ASSERT_EQ(tensor.numel(), 1); in TEST()
204 ASSERT_EQ(tensor.numel(), 3); in TEST()
211 ASSERT_EQ(tensor.numel(), 3); in TEST()
[all …]
/aosp_15_r20/external/pytorch/torch/_inductor/codegen/
H A Dsimd.py87 numel: sympy.Expr,
99 self.numel = numel
114 numel: sympy.Expr,
132 numel=numel,
155 return f"IterationRangesRoot({self.name!r}, {self.numel}, ...)"
168 if V.graph.sizevars.statically_known_equals(divisor * length, self.numel):
223 if not V.graph.sizevars.statically_known_equals(self.numel, divisor):
225 add(self.lookup(divisor, FloorDiv(self.numel, divisor)))
241 numel=parent.numel / length,
454 sympy.Integer(1), tree_node.root.numel
[all …]
/aosp_15_r20/external/executorch/runtime/core/exec_aten/testing_util/
H A Dtensor_util.cpp41 size_t numel, in data_is_close() argument
45 numel == 0 || (a != nullptr && b != nullptr), in data_is_close()
46 "Pointers must not be null when numel > 0: numel %zu, a 0x%p, b 0x%p", in data_is_close()
47 numel, in data_is_close()
53 for (size_t i = 0; i < numel; i++) { in data_is_close()
120 a.numel(), in tensors_are_close()
127 a.numel(), in tensors_are_close()
134 a.numel(), in tensors_are_close()
141 a.numel(), in tensors_are_close()
152 * underlying data elements and same numel. Note that this function is mainly
[all …]
/aosp_15_r20/external/pytorch/c10/xpu/test/impl/
H A DXPUGuardTest.cpp61 constexpr int numel = 1024; in TEST() local
62 int hostData1[numel]; in TEST()
63 initHostData(hostData1, numel); in TEST()
64 int hostData2[numel]; in TEST()
65 clearHostData(hostData2, numel); in TEST()
68 int* deviceData1 = sycl::malloc_device<int>(numel, xpu_stream1); in TEST()
72 xpu_stream1.queue().memcpy(deviceData1, hostData1, sizeof(int) * numel); in TEST()
77 xpu_stream2.queue().memcpy(hostData2, deviceData1, sizeof(int) * numel); in TEST()
81 validateHostData(hostData2, numel); in TEST()
86 clearHostData(hostData2, numel); in TEST()
[all …]
/aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/
H A DScatterGatherKernel.cu24 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument
25 (void)numel; // suppress unused warning in operator ()()
34 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument
35 fastAtomicAdd(self_data_start, index, numel, *src_data, true); in operator ()()
43 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument
44 fastAtomicAdd(self_data_start, index, numel, *src_data, true); in operator ()()
52 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument
53 (void)numel; // suppress unused warning in operator ()()
62 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument
63 (void)numel; // suppress unused warning in operator ()()
[all …]
H A DSortStable.cu82 int numel, in C10_LAUNCH_BOUNDS_1()
84 CUDA_KERNEL_LOOP(idx, numel) { in C10_LAUNCH_BOUNDS_1()
95 int numel, in C10_LAUNCH_BOUNDS_1()
97 CUDA_KERNEL_LOOP(idx, numel) { in C10_LAUNCH_BOUNDS_1()
143 const auto numel = nsort * nsegments; in segmented_sort_pairs_by_full_sort() local
145 auto indices_and_segment = cuda_allocator->allocate(numel * sizeof(int2)); in segmented_sort_pairs_by_full_sort()
150 dim3 grid = GET_BLOCKS(numel); in segmented_sort_pairs_by_full_sort()
154 i_s_ptr, numel, nsort_divider); in segmented_sort_pairs_by_full_sort()
191 const auto numel = nsort * nsegments; in segmented_sort_pairs() local
193 auto reverse_indices = cuda_allocator->allocate(numel * sizeof(int64_t)); in segmented_sort_pairs()
[all …]
H A DIndexing.cu52 int64_t numel, int64_t stride, int64_t stride_before, int64_t outer_dim, bool accumulate) { in indexing_backward_kernel() argument
53 //numel is total number of flattened indices, not expanded to dimensions that are not indexed. in indexing_backward_kernel()
74 if (idx < numel in indexing_backward_kernel()
79 if (!accumulate && (idx < numel - 1) && sorted_indices[idx] == sorted_indices[idx + 1]) { in indexing_backward_kernel()
84 const int64_t grad_row = ((int64_t) indices[idx]) * stride + z * numel * stride; in indexing_backward_kernel()
122 } while (idx < numel && sorted_indices[idx] == sorted_indices[idx - 1]); in indexing_backward_kernel()
130 int64_t numel, int64_t stride, int64_t stride_before, int64_t outer_dim, bool accumulate) { in indexing_backward_kernel_stride_1() argument
138 if ((idx < numel) && in indexing_backward_kernel_stride_1()
143 …while (((idx + num_duplicates) < numel) && (sorted_indices[idx + num_duplicates] == crnt_sorted_id… in indexing_backward_kernel_stride_1()
153 grad_row = ((int64_t)indices[idx + num_duplicates - 1]) * stride + z * numel * stride; in indexing_backward_kernel_stride_1()
[all …]
H A DMultiTensorApply.cuh142 if (tensor_lists[0][t].numel() == 0) { in multi_tensor_apply()
147 tensor_lists[0][t].numel(); in multi_tensor_apply()
160 const auto numel = tensor_lists[0][t].numel(); in multi_tensor_apply() local
161 const auto chunks = numel / kChunkSize + (numel % kChunkSize != 0); in multi_tensor_apply()
233 if (tensor_lists[0][t].numel() == 0) { in multi_tensor_apply()
237 tensor_lists[0][t].numel(); in multi_tensor_apply()
245 const auto numel = tensor_lists[0][t].numel(); in multi_tensor_apply() local
246 const auto chunks = numel / kChunkSize + (numel % kChunkSize != 0); in multi_tensor_apply()
313 if (tensor_lists[0][tensor_index].numel() == 0) { in multi_tensor_apply_for_fused_optimizer()
319 tensor_lists[0][tensor_index].numel(); in multi_tensor_apply_for_fused_optimizer()
[all …]
H A DRNN.cu57 void getLaunchConfig(dim3* block, dim3* grid, int64_t numel) { in getLaunchConfig() argument
61 TORCH_INTERNAL_ASSERT(cuda::getApplyGrid(numel, *grid, curDevice), in getLaunchConfig()
377 int64_t numel = cx.numel(); in lstm_forward_impl() local
378 if (numel == 0) return; in lstm_forward_impl()
379 getLaunchConfig(&block, &grid, numel); in lstm_forward_impl()
396 …t_gatesI, hidden_gatesI, input_biasI, hidden_biasI, cxI, hyI, cyI, workspaceI, hidden_size, numel); in lstm_forward_impl()
401 …t_gatesI, hidden_gatesI, input_biasI, hidden_biasI, cxI, hyI, cyI, workspaceI, hidden_size, numel); in lstm_forward_impl()
414 int64_t numel = cx.numel(); in lstm_backward_impl() local
415 getLaunchConfig(&block, &grid, numel); in lstm_backward_impl()
416 if (numel == 0) return; in lstm_backward_impl()
[all …]
H A DIndexKernel.cu60 if (iter.numel() == 0) { in gpu_index_kernel()
84 launch_kernel<launch_size_nd, launch_bound2>(iter.numel(), [=]__device__(int idx) { in gpu_index_kernel()
115 if (0 == iter.numel()) { in index_fill_kernel_impl()
143 launch_kernel<launch_size_nd, launch_bound2>(iter.numel(), loop); in index_fill_kernel_impl()
152 if (iter.numel() == 0) { in index_copy_kernel_impl()
179 launch_kernel<launch_size_nd, launch_bound2>(iter.numel(), loop); in index_copy_kernel_impl()
287 const auto numel = indexed.numel(); in cuda_take_put_kernel() local
309 … CUDA_KERNEL_ASSERT(idx < numel && idx >= -numel && "cuda_take_put_kernel() index out of bounds"); in cuda_take_put_kernel()
312 offset += numel; in cuda_take_put_kernel()
320 launch_kernel<launch_size_nd, launch_bound2>(iter.numel(), loop); in cuda_take_put_kernel()
[all …]
/aosp_15_r20/external/executorch/extension/android/src/main/java/org/pytorch/executorch/
H A DTensor.java133 final ByteBuffer byteBuffer = allocateByteBuffer((int) numel(shape)); in fromBlobUnsigned()
150 final ByteBuffer byteBuffer = allocateByteBuffer((int) numel(shape)); in fromBlob()
167 final IntBuffer intBuffer = allocateIntBuffer((int) numel(shape)); in fromBlob()
184 final FloatBuffer floatBuffer = allocateFloatBuffer((int) numel(shape)); in fromBlob()
201 final LongBuffer longBuffer = allocateLongBuffer((int) numel(shape)); in fromBlob()
218 final DoubleBuffer doubleBuffer = allocateDoubleBuffer((int) numel(shape)); in fromBlob()
226 * @param data Direct buffer with native byte order that contains {@code Tensor.numel(shape)}
246 * @param data Direct buffer with native byte order that contains {@code Tensor.numel(shape)}
266 * @param data Direct buffer with native byte order that contains {@code Tensor.numel(shape)}
286 * @param data Direct buffer with native byte order that contains {@code Tensor.numel(shape)}
[all …]
/aosp_15_r20/external/pytorch/aten/src/ATen/native/quantized/
H A DQTensor.cpp164 if (self.numel() > 0) { in make_per_channel_quantized_tensor_cpu()
260 auto data_size = self.numel() * self.element_size(); in equal_quantized_cpu()
296 int numel, in calculate_quant_loss() argument
319 for (; i < numel; i++) { in calculate_quant_loss()
336 int64_t numel, in choose_qparams_optimized() argument
341 if (numel < 0 || numel > input_tensor.numel()) { in choose_qparams_optimized()
342 TORCH_CHECK(false, "numel is out of the bound of input tensor"); in choose_qparams_optimized()
345 TORCH_CHECK(numel <= input_tensor.numel(), "numel ", numel, in choose_qparams_optimized()
346 " greater than input_tensor.numel() ", input_tensor.numel()); in choose_qparams_optimized()
348 float xmin = *std::min_element(input_row, input_row + numel); in choose_qparams_optimized()
[all …]
/aosp_15_r20/external/pytorch/aten/src/ATen/test/
H A Dquantized_test.cpp107 int numel = 10; in TEST() local
109 {numel}, at::device(at::kCPU).dtype(kQUInt8), scale, zero_point); in TEST()
112 for (const auto i : c10::irange(numel)) { in TEST()
119 for (const auto i : c10::irange(numel)) { in TEST()
125 int numel = 10; in TEST() local
126 auto scales = rand({numel}).toType(kDouble); in TEST()
131 {numel}, in TEST()
138 for (const auto i : c10::irange(numel)) { in TEST()
145 for (const auto i : c10::irange(numel)) { in TEST()
218 auto numel = c10::multiply_integers(shape); in TEST() local
[all …]
H A Dscalar_tensor_test.cpp59 const auto numel = c10::multiply_integers(s->begin(), s->end()); in test() local
60 ASSERT_EQ(t.numel(), numel); in test()
128 if (t.numel() != 0) { in test()
135 if (t.numel() != 0) { in test()
145 if (t.dim() > 0 && t.numel() != 0) { in test()
219 ASSERT_NE(lhs.numel(), rhs.numel()), in test()
220 ASSERT_EQ(lhs.numel(), rhs.numel()); in test()
229 ASSERT_EQ(lhs.numel(), 0); ASSERT_NE(rhs.numel(), 0), in test()
239 ASSERT_EQ(lhs.numel(), 0); ASSERT_NE(rhs1.numel(), 0), in test()
249 (lhs.numel() == 0 || rhs.numel() == 0 || in test()
/aosp_15_r20/external/pytorch/aten/src/ATen/
H A DTensorUtils.cpp126 void checkNumel(CheckedFrom c, const TensorGeometryArg& t, int64_t numel) { in checkNumel() argument
128 t->numel() == numel, in checkNumel()
129 "Expected tensor for ", t, " to have ", numel, in checkNumel()
130 " elements; but it actually has ", t->numel(), " elements", in checkNumel()
136 t1->numel() == t2->numel(), in checkSameNumel()
139 t1->numel(), " does not equal ", t2->numel(), in checkSameNumel()
323 // ``numel'', i.e., number of subspaces, as the corresponding chunk of
329 template <typename ResultVec, typename NewShapeVec, typename Numel>
340 // NOTE: stride is arbitrary in the numel() == 0 case; in computeStride_impl()
345 const Numel numel = c10::multiply_integers(oldshape); in computeStride_impl() local
[all …]
/aosp_15_r20/external/pytorch/test/inductor/
H A Dtest_triton_kernels.py59 grid = (x.numel(),)
80 n_elements = output.numel()
132 {"n_elements": output.numel(), "BLOCK_SIZE": 16}
134 grid=[(x.numel(),)],
204 {"n_elements": x_func.numel(), "BLOCK_SIZE": 16}
206 grid=[(x_func.numel(),)],
225 {"n_elements": x_func.numel(), "BLOCK_SIZE": 16}
227 grid=[(x_func.numel(),)],
243 n_elements = output.numel()
250 n_elements = output.numel()
[all …]
/aosp_15_r20/external/pytorch/test/distributed/fsdp/
H A Dtest_fsdp_ignored_modules.py179 total_numel = sum(p.numel() for p in nonwrapped_model.parameters())
181 p.numel() for p in nonwrapped_model.transformer.parameters()
188 flat_param_numel = flat_param.numel()
190 # Subtract the numel contributed from alignment padding
192 numel
193 for (numel, is_padding) in zip(
253 total_numel = sum(p.numel() for p in nonwrapped_model.parameters())
254 ignored_numel = sum(p.numel() for p in nonwrapped_model.layer1.parameters())
262 flat_param_numel = flat_param.numel()
264 # Subtract the numel contributed from alignment padding
[all …]
H A Dtest_fsdp_flatten_params.py94 numel = sum(p.numel() for p in module.parameters())
100 numel_to_flatten = sum(p.numel() for p in params_to_flatten)
108 self.assertEqual(sum(fp.numel() for fp in flat_params), numel_to_flatten)
109 self.assertEqual(sum(p.numel() for p in module.parameters()), numel)
193 Tests that numel is preserved after flattening when there are no shared
209 Tests that numel is preserved after flattening when there are shared
224 ref_numel = sum(p.numel() for p in module.parameters())
231 self.assertEqual(ref_numel, flat_param_handle.flat_param.numel())
456 # For 32-bit full precision, FSDP pads up to 3 numel after each
457 # original parameter to achieve 0 mod 4 numel (i.e. 0 mod 16 bytes).
[all …]
/aosp_15_r20/external/executorch/runtime/core/exec_aten/util/
H A Dtensor_util.h65 const size_t a_numel__ = (a__).numel(); \
66 const size_t b_numel__ = (b__).numel(); \
72 ET_TENSOR_CHECK_PREFIX__ ": numel={%zu, %zu}, dim={%zu, %zu}", \
91 const size_t a_numel__ = (a__).numel(); \
92 const size_t b_numel__ = (b__).numel(); \
93 const size_t c_numel__ = (c__).numel(); \
102 ": numel={%zu, %zu, %zu}, dim={%zu, %zu, %zu}", \
158 const size_t a_numel__ = (a__).numel(); \
159 const size_t b_numel__ = (b__).numel(); \
170 ": numel={%zu, %zu}, dim={%zu, %zu}, dtype={%" PRId8 ", %" PRId8 "}", \
[all …]
/aosp_15_r20/external/pytorch/torch/csrc/cuda/
H A Dnccl.cpp318 if (input.numel() != ref_numel) { in check_tensor()
331 if (output->numel() * output_multiplier != ref_numel * input_multiplier) { in check_tensor()
358 int64_t numel = inputs[0].numel(); in check_inputs() local
366 input, output, input_multiplier, output_multiplier, numel, dtype); in check_inputs()
390 int64_t numel = inputs[0].numel(); in check_inputs() local
403 numel, in check_inputs()
582 int64_t numel = tensors[0].numel(); in broadcast() local
597 static_cast<uint64_t>(numel) <= static_cast<uint64_t>(count_max), in broadcast()
599 numel, in broadcast()
607 numel, in broadcast()
[all …]
/aosp_15_r20/external/pytorch/test/quantization/core/
H A Dtest_quantized_tensor.py271 self.assertEqual(int_repr.numel(), 1)
300 self.assertEqual(int_repr.numel(), len(expected_packed_vals))
446 numel = 10
448 scales = torch.rand(numel, device=device)
449 zero_points_int = torch.randint(0, 10, size=(numel,), device=device)
450 zero_points_float = torch.randn(numel, device=device)
453 … [numel], scales=scales, zero_points=zero_points, axis=ch_axis, dtype=dtype, device=device)
460 int_tensor = torch.randint(0, 100, size=(numel,), dtype=torch.uint8, device=device)
470 numel = 10
472 q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point,
[all …]
/aosp_15_r20/external/executorch/backends/vulkan/test/utils/
H A Dtest_utils.h141 int numel = -1) {
142 if (numel < 0) {
143 numel = staging.numel();
145 std::vector<float> data(numel);
147 staging.copy_from(data.data(), sizeof(float) * numel);
155 const size_t numel,
160 const size_t numel,
181 int numel = -1) {
182 if (numel < 0) {
183 numel = staging.numel();
[all …]

12345678910>>...46