Searched refs:input_calc (Results 1 – 3 of 3) sorted by relevance
/aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/ |
H A D | Reduce.cuh | 295 InputCalculator input_calc; member 312 InputCalculator input_calc, in ReduceJitOp() 325 input_calc(input_calc), in ReduceJitOp() 357 InputCalculator input_calc; member 375 InputCalculator input_calc, in ReduceOp() 389 input_calc(input_calc), in ReduceOp() 490 index_t element_stride = input_calc.strides_[0][0] / sizeof(scalar_t); in thread_reduce() 491 bool is_contiguous = (input_calc.dims == 1 && element_stride == 1); in thread_reduce() 494 } else if (input_calc.dims == 1) { in thread_reduce() 497 …return thread_reduce_impl<output_vec_size>(data, [&](index_t idx) { return input_calc.get(idx)[0] … in thread_reduce() [all …]
|
H A D | CUDALoops.cuh | 64 auto input_calc = TrivialOffsetCalculator<traits::arity>(); in C10_LAUNCH_BOUNDS_1() local 70 decltype(input_calc), in C10_LAUNCH_BOUNDS_1() 74 data, remaining, input_calc, output_calc, loader, storer); in C10_LAUNCH_BOUNDS_1() 130 auto input_calc = TrivialOffsetCalculator<traits::arity>(); in launch_vectorized_kernel() local 136 N, f, data, input_calc, output_calc, loader, storer); in launch_vectorized_kernel()
|
H A D | Loops.cuh | 293 auto input_calc = TrivialOffsetCalculator<num_inputs>(); in gpu_kernel_multiple_outputs_impl() local 295 launch_unrolled_kernel_for_multi_outputs<num_outputs>(numel, f, data, input_calc, output_calc); in gpu_kernel_multiple_outputs_impl() 297 auto input_calc = make_input_offset_calculator<num_inputs>(iter); in gpu_kernel_multiple_outputs_impl() local 299 launch_unrolled_kernel_for_multi_outputs<num_outputs>(numel, f, data, input_calc, output_calc); in gpu_kernel_multiple_outputs_impl()
|