/aosp_15_r20/external/pytorch/aten/src/ATen/native/ |
H A D | EmbeddingBag.cpp | 120 TORCH_CHECK(select_indices.numel() == add_indices.numel()); in index_select_add() 129 auto numel = add_indices.numel(); in index_select_add() local 137 for (const auto i : c10::irange(numel)) { in index_select_add() 208 int64_t output_size = offsets.numel() - 1; in index_select_add() 213 output_size = offsets.numel() - 1; in index_select_add() 215 output_size = offsets.numel(); in index_select_add() 216 offsets_include_last.resize(offsets.numel() + 1); in index_select_add() 217 if (offsets.numel() > 0) { in index_select_add() 221 sizeof(index_t) * offsets.numel()); in index_select_add() 223 offsets_include_last[offsets.numel()] = select_indices.numel(); in index_select_add() [all …]
|
H A D | TensorAdvancedIndexing.cpp | 176 auto is_index_empty = index.numel() == 0; in TORCH_META_FUNC() 284 int64_t numIndices = index.numel(); in TORCH_PRECOMPUTE_META_FUNC() 330 auto numel = index.numel(); in index_func_meta_impl() local 342 TORCH_CHECK(numel == (source.dim() == 0 ? 1 : source.size(dim)), in index_func_meta_impl() 343 func, "_(): Number of indices (", numel, ") should be equal to source.size(dim): (", in index_func_meta_impl() 686 if (self.numel() == 0) { in _unsafe_masked_index() 712 if (self.numel() == 0) { in _unsafe_masked_index_put_accumulate() 751 …numel() == index.numel(), "put_(): Expected source and index to have the same number of elements, … in put_() 752 …TORCH_CHECK_INDEX(!(self.numel() == 0 && index.numel() != 0), "put_(): Tried to put elements into … in put_() 759 if (index.numel() == 0) { in put_() [all …]
|
H A D | Unique.cpp | 42 int64_t numel = input.numel(); in unique_cpu_bool_template() local 47 if (numel == 0) { in unique_cpu_bool_template() 58 at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) { in unique_cpu_bool_template() 69 int64_t num_false = numel - num_true; in unique_cpu_bool_template() 99 at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) { in unique_cpu_bool_template() 165 int64_t numel = input.numel(); in unique_cpu_sorted_template() local 170 if (numel == 0) { in unique_cpu_sorted_template() 197 at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) { in unique_cpu_sorted_template() 231 unique_index_data[unique_count] = numel; in unique_cpu_sorted_template() 234 at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) { in unique_cpu_sorted_template() [all …]
|
/aosp_15_r20/external/pytorch/test/cpp/api/ |
H A D | tensor.cpp | 158 ASSERT_EQ(tensor.numel(), 1); in TEST() 163 ASSERT_EQ(tensor.numel(), 1); in TEST() 168 ASSERT_EQ(tensor.numel(), 1); in TEST() 173 ASSERT_EQ(tensor.numel(), 1); in TEST() 178 ASSERT_EQ(tensor.numel(), 1); in TEST() 185 ASSERT_EQ(tensor.numel(), 1); in TEST() 190 ASSERT_EQ(tensor.numel(), 1); in TEST() 197 ASSERT_EQ(tensor.numel(), 1); in TEST() 204 ASSERT_EQ(tensor.numel(), 3); in TEST() 211 ASSERT_EQ(tensor.numel(), 3); in TEST() [all …]
|
/aosp_15_r20/external/pytorch/torch/_inductor/codegen/ |
H A D | simd.py | 87 numel: sympy.Expr, 99 self.numel = numel 114 numel: sympy.Expr, 132 numel=numel, 155 return f"IterationRangesRoot({self.name!r}, {self.numel}, ...)" 168 if V.graph.sizevars.statically_known_equals(divisor * length, self.numel): 223 if not V.graph.sizevars.statically_known_equals(self.numel, divisor): 225 add(self.lookup(divisor, FloorDiv(self.numel, divisor))) 241 numel=parent.numel / length, 454 sympy.Integer(1), tree_node.root.numel [all …]
|
/aosp_15_r20/external/executorch/runtime/core/exec_aten/testing_util/ |
H A D | tensor_util.cpp | 41 size_t numel, in data_is_close() argument 45 numel == 0 || (a != nullptr && b != nullptr), in data_is_close() 46 "Pointers must not be null when numel > 0: numel %zu, a 0x%p, b 0x%p", in data_is_close() 47 numel, in data_is_close() 53 for (size_t i = 0; i < numel; i++) { in data_is_close() 120 a.numel(), in tensors_are_close() 127 a.numel(), in tensors_are_close() 134 a.numel(), in tensors_are_close() 141 a.numel(), in tensors_are_close() 152 * underlying data elements and same numel. Note that this function is mainly [all …]
|
/aosp_15_r20/external/pytorch/c10/xpu/test/impl/ |
H A D | XPUGuardTest.cpp | 61 constexpr int numel = 1024; in TEST() local 62 int hostData1[numel]; in TEST() 63 initHostData(hostData1, numel); in TEST() 64 int hostData2[numel]; in TEST() 65 clearHostData(hostData2, numel); in TEST() 68 int* deviceData1 = sycl::malloc_device<int>(numel, xpu_stream1); in TEST() 72 xpu_stream1.queue().memcpy(deviceData1, hostData1, sizeof(int) * numel); in TEST() 77 xpu_stream2.queue().memcpy(hostData2, deviceData1, sizeof(int) * numel); in TEST() 81 validateHostData(hostData2, numel); in TEST() 86 clearHostData(hostData2, numel); in TEST() [all …]
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/ |
H A D | ScatterGatherKernel.cu | 24 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument 25 (void)numel; // suppress unused warning in operator ()() 34 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument 35 fastAtomicAdd(self_data_start, index, numel, *src_data, true); in operator ()() 43 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument 44 fastAtomicAdd(self_data_start, index, numel, *src_data, true); in operator ()() 52 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument 53 (void)numel; // suppress unused warning in operator ()() 62 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument 63 (void)numel; // suppress unused warning in operator ()() [all …]
|
H A D | SortStable.cu | 82 int numel, in C10_LAUNCH_BOUNDS_1() 84 CUDA_KERNEL_LOOP(idx, numel) { in C10_LAUNCH_BOUNDS_1() 95 int numel, in C10_LAUNCH_BOUNDS_1() 97 CUDA_KERNEL_LOOP(idx, numel) { in C10_LAUNCH_BOUNDS_1() 143 const auto numel = nsort * nsegments; in segmented_sort_pairs_by_full_sort() local 145 auto indices_and_segment = cuda_allocator->allocate(numel * sizeof(int2)); in segmented_sort_pairs_by_full_sort() 150 dim3 grid = GET_BLOCKS(numel); in segmented_sort_pairs_by_full_sort() 154 i_s_ptr, numel, nsort_divider); in segmented_sort_pairs_by_full_sort() 191 const auto numel = nsort * nsegments; in segmented_sort_pairs() local 193 auto reverse_indices = cuda_allocator->allocate(numel * sizeof(int64_t)); in segmented_sort_pairs() [all …]
|
H A D | Indexing.cu | 52 int64_t numel, int64_t stride, int64_t stride_before, int64_t outer_dim, bool accumulate) { in indexing_backward_kernel() argument 53 //numel is total number of flattened indices, not expanded to dimensions that are not indexed. in indexing_backward_kernel() 74 if (idx < numel in indexing_backward_kernel() 79 if (!accumulate && (idx < numel - 1) && sorted_indices[idx] == sorted_indices[idx + 1]) { in indexing_backward_kernel() 84 const int64_t grad_row = ((int64_t) indices[idx]) * stride + z * numel * stride; in indexing_backward_kernel() 122 } while (idx < numel && sorted_indices[idx] == sorted_indices[idx - 1]); in indexing_backward_kernel() 130 int64_t numel, int64_t stride, int64_t stride_before, int64_t outer_dim, bool accumulate) { in indexing_backward_kernel_stride_1() argument 138 if ((idx < numel) && in indexing_backward_kernel_stride_1() 143 …while (((idx + num_duplicates) < numel) && (sorted_indices[idx + num_duplicates] == crnt_sorted_id… in indexing_backward_kernel_stride_1() 153 grad_row = ((int64_t)indices[idx + num_duplicates - 1]) * stride + z * numel * stride; in indexing_backward_kernel_stride_1() [all …]
|
H A D | MultiTensorApply.cuh | 142 if (tensor_lists[0][t].numel() == 0) { in multi_tensor_apply() 147 tensor_lists[0][t].numel(); in multi_tensor_apply() 160 const auto numel = tensor_lists[0][t].numel(); in multi_tensor_apply() local 161 const auto chunks = numel / kChunkSize + (numel % kChunkSize != 0); in multi_tensor_apply() 233 if (tensor_lists[0][t].numel() == 0) { in multi_tensor_apply() 237 tensor_lists[0][t].numel(); in multi_tensor_apply() 245 const auto numel = tensor_lists[0][t].numel(); in multi_tensor_apply() local 246 const auto chunks = numel / kChunkSize + (numel % kChunkSize != 0); in multi_tensor_apply() 313 if (tensor_lists[0][tensor_index].numel() == 0) { in multi_tensor_apply_for_fused_optimizer() 319 tensor_lists[0][tensor_index].numel(); in multi_tensor_apply_for_fused_optimizer() [all …]
|
H A D | RNN.cu | 57 void getLaunchConfig(dim3* block, dim3* grid, int64_t numel) { in getLaunchConfig() argument 61 TORCH_INTERNAL_ASSERT(cuda::getApplyGrid(numel, *grid, curDevice), in getLaunchConfig() 377 int64_t numel = cx.numel(); in lstm_forward_impl() local 378 if (numel == 0) return; in lstm_forward_impl() 379 getLaunchConfig(&block, &grid, numel); in lstm_forward_impl() 396 …t_gatesI, hidden_gatesI, input_biasI, hidden_biasI, cxI, hyI, cyI, workspaceI, hidden_size, numel); in lstm_forward_impl() 401 …t_gatesI, hidden_gatesI, input_biasI, hidden_biasI, cxI, hyI, cyI, workspaceI, hidden_size, numel); in lstm_forward_impl() 414 int64_t numel = cx.numel(); in lstm_backward_impl() local 415 getLaunchConfig(&block, &grid, numel); in lstm_backward_impl() 416 if (numel == 0) return; in lstm_backward_impl() [all …]
|
H A D | IndexKernel.cu | 60 if (iter.numel() == 0) { in gpu_index_kernel() 84 launch_kernel<launch_size_nd, launch_bound2>(iter.numel(), [=]__device__(int idx) { in gpu_index_kernel() 115 if (0 == iter.numel()) { in index_fill_kernel_impl() 143 launch_kernel<launch_size_nd, launch_bound2>(iter.numel(), loop); in index_fill_kernel_impl() 152 if (iter.numel() == 0) { in index_copy_kernel_impl() 179 launch_kernel<launch_size_nd, launch_bound2>(iter.numel(), loop); in index_copy_kernel_impl() 287 const auto numel = indexed.numel(); in cuda_take_put_kernel() local 309 … CUDA_KERNEL_ASSERT(idx < numel && idx >= -numel && "cuda_take_put_kernel() index out of bounds"); in cuda_take_put_kernel() 312 offset += numel; in cuda_take_put_kernel() 320 launch_kernel<launch_size_nd, launch_bound2>(iter.numel(), loop); in cuda_take_put_kernel() [all …]
|
/aosp_15_r20/external/executorch/extension/android/src/main/java/org/pytorch/executorch/ |
H A D | Tensor.java | 133 final ByteBuffer byteBuffer = allocateByteBuffer((int) numel(shape)); in fromBlobUnsigned() 150 final ByteBuffer byteBuffer = allocateByteBuffer((int) numel(shape)); in fromBlob() 167 final IntBuffer intBuffer = allocateIntBuffer((int) numel(shape)); in fromBlob() 184 final FloatBuffer floatBuffer = allocateFloatBuffer((int) numel(shape)); in fromBlob() 201 final LongBuffer longBuffer = allocateLongBuffer((int) numel(shape)); in fromBlob() 218 final DoubleBuffer doubleBuffer = allocateDoubleBuffer((int) numel(shape)); in fromBlob() 226 * @param data Direct buffer with native byte order that contains {@code Tensor.numel(shape)} 246 * @param data Direct buffer with native byte order that contains {@code Tensor.numel(shape)} 266 * @param data Direct buffer with native byte order that contains {@code Tensor.numel(shape)} 286 * @param data Direct buffer with native byte order that contains {@code Tensor.numel(shape)} [all …]
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/quantized/ |
H A D | QTensor.cpp | 164 if (self.numel() > 0) { in make_per_channel_quantized_tensor_cpu() 260 auto data_size = self.numel() * self.element_size(); in equal_quantized_cpu() 296 int numel, in calculate_quant_loss() argument 319 for (; i < numel; i++) { in calculate_quant_loss() 336 int64_t numel, in choose_qparams_optimized() argument 341 if (numel < 0 || numel > input_tensor.numel()) { in choose_qparams_optimized() 342 TORCH_CHECK(false, "numel is out of the bound of input tensor"); in choose_qparams_optimized() 345 TORCH_CHECK(numel <= input_tensor.numel(), "numel ", numel, in choose_qparams_optimized() 346 " greater than input_tensor.numel() ", input_tensor.numel()); in choose_qparams_optimized() 348 float xmin = *std::min_element(input_row, input_row + numel); in choose_qparams_optimized() [all …]
|
/aosp_15_r20/external/pytorch/aten/src/ATen/test/ |
H A D | quantized_test.cpp | 107 int numel = 10; in TEST() local 109 {numel}, at::device(at::kCPU).dtype(kQUInt8), scale, zero_point); in TEST() 112 for (const auto i : c10::irange(numel)) { in TEST() 119 for (const auto i : c10::irange(numel)) { in TEST() 125 int numel = 10; in TEST() local 126 auto scales = rand({numel}).toType(kDouble); in TEST() 131 {numel}, in TEST() 138 for (const auto i : c10::irange(numel)) { in TEST() 145 for (const auto i : c10::irange(numel)) { in TEST() 218 auto numel = c10::multiply_integers(shape); in TEST() local [all …]
|
H A D | scalar_tensor_test.cpp | 59 const auto numel = c10::multiply_integers(s->begin(), s->end()); in test() local 60 ASSERT_EQ(t.numel(), numel); in test() 128 if (t.numel() != 0) { in test() 135 if (t.numel() != 0) { in test() 145 if (t.dim() > 0 && t.numel() != 0) { in test() 219 ASSERT_NE(lhs.numel(), rhs.numel()), in test() 220 ASSERT_EQ(lhs.numel(), rhs.numel()); in test() 229 ASSERT_EQ(lhs.numel(), 0); ASSERT_NE(rhs.numel(), 0), in test() 239 ASSERT_EQ(lhs.numel(), 0); ASSERT_NE(rhs1.numel(), 0), in test() 249 (lhs.numel() == 0 || rhs.numel() == 0 || in test()
|
/aosp_15_r20/external/pytorch/aten/src/ATen/ |
H A D | TensorUtils.cpp | 126 void checkNumel(CheckedFrom c, const TensorGeometryArg& t, int64_t numel) { in checkNumel() argument 128 t->numel() == numel, in checkNumel() 129 "Expected tensor for ", t, " to have ", numel, in checkNumel() 130 " elements; but it actually has ", t->numel(), " elements", in checkNumel() 136 t1->numel() == t2->numel(), in checkSameNumel() 139 t1->numel(), " does not equal ", t2->numel(), in checkSameNumel() 323 // ``numel'', i.e., number of subspaces, as the corresponding chunk of 329 template <typename ResultVec, typename NewShapeVec, typename Numel> 340 // NOTE: stride is arbitrary in the numel() == 0 case; in computeStride_impl() 345 const Numel numel = c10::multiply_integers(oldshape); in computeStride_impl() local [all …]
|
/aosp_15_r20/external/pytorch/test/inductor/ |
H A D | test_triton_kernels.py | 59 grid = (x.numel(),) 80 n_elements = output.numel() 132 {"n_elements": output.numel(), "BLOCK_SIZE": 16} 134 grid=[(x.numel(),)], 204 {"n_elements": x_func.numel(), "BLOCK_SIZE": 16} 206 grid=[(x_func.numel(),)], 225 {"n_elements": x_func.numel(), "BLOCK_SIZE": 16} 227 grid=[(x_func.numel(),)], 243 n_elements = output.numel() 250 n_elements = output.numel() [all …]
|
/aosp_15_r20/external/pytorch/test/distributed/fsdp/ |
H A D | test_fsdp_ignored_modules.py | 179 total_numel = sum(p.numel() for p in nonwrapped_model.parameters()) 181 p.numel() for p in nonwrapped_model.transformer.parameters() 188 flat_param_numel = flat_param.numel() 190 # Subtract the numel contributed from alignment padding 192 numel 193 for (numel, is_padding) in zip( 253 total_numel = sum(p.numel() for p in nonwrapped_model.parameters()) 254 ignored_numel = sum(p.numel() for p in nonwrapped_model.layer1.parameters()) 262 flat_param_numel = flat_param.numel() 264 # Subtract the numel contributed from alignment padding [all …]
|
H A D | test_fsdp_flatten_params.py | 94 numel = sum(p.numel() for p in module.parameters()) 100 numel_to_flatten = sum(p.numel() for p in params_to_flatten) 108 self.assertEqual(sum(fp.numel() for fp in flat_params), numel_to_flatten) 109 self.assertEqual(sum(p.numel() for p in module.parameters()), numel) 193 Tests that numel is preserved after flattening when there are no shared 209 Tests that numel is preserved after flattening when there are shared 224 ref_numel = sum(p.numel() for p in module.parameters()) 231 self.assertEqual(ref_numel, flat_param_handle.flat_param.numel()) 456 # For 32-bit full precision, FSDP pads up to 3 numel after each 457 # original parameter to achieve 0 mod 4 numel (i.e. 0 mod 16 bytes). [all …]
|
/aosp_15_r20/external/executorch/runtime/core/exec_aten/util/ |
H A D | tensor_util.h | 65 const size_t a_numel__ = (a__).numel(); \ 66 const size_t b_numel__ = (b__).numel(); \ 72 ET_TENSOR_CHECK_PREFIX__ ": numel={%zu, %zu}, dim={%zu, %zu}", \ 91 const size_t a_numel__ = (a__).numel(); \ 92 const size_t b_numel__ = (b__).numel(); \ 93 const size_t c_numel__ = (c__).numel(); \ 102 ": numel={%zu, %zu, %zu}, dim={%zu, %zu, %zu}", \ 158 const size_t a_numel__ = (a__).numel(); \ 159 const size_t b_numel__ = (b__).numel(); \ 170 ": numel={%zu, %zu}, dim={%zu, %zu}, dtype={%" PRId8 ", %" PRId8 "}", \ [all …]
|
/aosp_15_r20/external/pytorch/torch/csrc/cuda/ |
H A D | nccl.cpp | 318 if (input.numel() != ref_numel) { in check_tensor() 331 if (output->numel() * output_multiplier != ref_numel * input_multiplier) { in check_tensor() 358 int64_t numel = inputs[0].numel(); in check_inputs() local 366 input, output, input_multiplier, output_multiplier, numel, dtype); in check_inputs() 390 int64_t numel = inputs[0].numel(); in check_inputs() local 403 numel, in check_inputs() 582 int64_t numel = tensors[0].numel(); in broadcast() local 597 static_cast<uint64_t>(numel) <= static_cast<uint64_t>(count_max), in broadcast() 599 numel, in broadcast() 607 numel, in broadcast() [all …]
|
/aosp_15_r20/external/pytorch/test/quantization/core/ |
H A D | test_quantized_tensor.py | 271 self.assertEqual(int_repr.numel(), 1) 300 self.assertEqual(int_repr.numel(), len(expected_packed_vals)) 446 numel = 10 448 scales = torch.rand(numel, device=device) 449 zero_points_int = torch.randint(0, 10, size=(numel,), device=device) 450 zero_points_float = torch.randn(numel, device=device) 453 … [numel], scales=scales, zero_points=zero_points, axis=ch_axis, dtype=dtype, device=device) 460 int_tensor = torch.randint(0, 100, size=(numel,), dtype=torch.uint8, device=device) 470 numel = 10 472 q = torch._empty_affine_quantized([numel], scale=scale, zero_point=zero_point, [all …]
|
/aosp_15_r20/external/executorch/backends/vulkan/test/utils/ |
H A D | test_utils.h | 141 int numel = -1) { 142 if (numel < 0) { 143 numel = staging.numel(); 145 std::vector<float> data(numel); 147 staging.copy_from(data.data(), sizeof(float) * numel); 155 const size_t numel, 160 const size_t numel, 181 int numel = -1) { 182 if (numel < 0) { 183 numel = staging.numel(); [all …]
|