Home
last modified time | relevance | path

Searched refs:input_calc (Results 1 – 3 of 3) sorted by relevance

/aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/
H A DReduce.cuh295 InputCalculator input_calc; member
312 InputCalculator input_calc, in ReduceJitOp()
325 input_calc(input_calc), in ReduceJitOp()
357 InputCalculator input_calc; member
375 InputCalculator input_calc, in ReduceOp()
389 input_calc(input_calc), in ReduceOp()
490 index_t element_stride = input_calc.strides_[0][0] / sizeof(scalar_t); in thread_reduce()
491 bool is_contiguous = (input_calc.dims == 1 && element_stride == 1); in thread_reduce()
494 } else if (input_calc.dims == 1) { in thread_reduce()
497 …return thread_reduce_impl<output_vec_size>(data, [&](index_t idx) { return input_calc.get(idx)[0] … in thread_reduce()
[all …]
H A DCUDALoops.cuh64 auto input_calc = TrivialOffsetCalculator<traits::arity>(); in C10_LAUNCH_BOUNDS_1() local
70 decltype(input_calc), in C10_LAUNCH_BOUNDS_1()
74 data, remaining, input_calc, output_calc, loader, storer); in C10_LAUNCH_BOUNDS_1()
130 auto input_calc = TrivialOffsetCalculator<traits::arity>(); in launch_vectorized_kernel() local
136 N, f, data, input_calc, output_calc, loader, storer); in launch_vectorized_kernel()
H A DLoops.cuh293 auto input_calc = TrivialOffsetCalculator<num_inputs>(); in gpu_kernel_multiple_outputs_impl() local
295 launch_unrolled_kernel_for_multi_outputs<num_outputs>(numel, f, data, input_calc, output_calc); in gpu_kernel_multiple_outputs_impl()
297 auto input_calc = make_input_offset_calculator<num_inputs>(iter); in gpu_kernel_multiple_outputs_impl() local
299 launch_unrolled_kernel_for_multi_outputs<num_outputs>(numel, f, data, input_calc, output_calc); in gpu_kernel_multiple_outputs_impl()