Home
last modified time | relevance | path

Searched refs:kCUDABlockReduceNumThreads (Results 1 – 3 of 3) sorted by relevance

/aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/
H A Dgroup_norm_kernel.cu580 const int64_t num_threads = D * HxW < cuda_utils::kCUDABlockReduceNumThreads in GroupNormKernelImplInternal()
582 : cuda_utils::kCUDABlockReduceNumThreads; in GroupNormKernelImplInternal()
701 const int64_t num_threads = (C / G) < cuda_utils::kCUDABlockReduceNumThreads in GroupNorm1dBackward()
703 : cuda_utils::kCUDABlockReduceNumThreads; in GroupNorm1dBackward()
850 int64_t num_threads = HxW < cuda_utils::kCUDABlockReduceNumThreads in GroupNormBackwardKernelImplInternal()
852 : cuda_utils::kCUDABlockReduceNumThreads; in GroupNormBackwardKernelImplInternal()
876 num_threads = (C / G) < cuda_utils::kCUDABlockReduceNumThreads in GroupNormBackwardKernelImplInternal()
878 : cuda_utils::kCUDABlockReduceNumThreads; in GroupNormBackwardKernelImplInternal()
H A Dblock_reduce.cuh12 constexpr int kCUDABlockReduceNumThreads = 512; variable
H A Dlayer_norm_kernel.cu794 <<<M, cuda_utils::kCUDABlockReduceNumThreads, 0, cuda_stream>>>( in LayerNormKernelImplInternal()