Searched refs:kCUDABlockReduceNumThreads (Results 1 – 3 of 3) sorted by relevance
580 const int64_t num_threads = D * HxW < cuda_utils::kCUDABlockReduceNumThreads in GroupNormKernelImplInternal()582 : cuda_utils::kCUDABlockReduceNumThreads; in GroupNormKernelImplInternal()701 const int64_t num_threads = (C / G) < cuda_utils::kCUDABlockReduceNumThreads in GroupNorm1dBackward()703 : cuda_utils::kCUDABlockReduceNumThreads; in GroupNorm1dBackward()850 int64_t num_threads = HxW < cuda_utils::kCUDABlockReduceNumThreads in GroupNormBackwardKernelImplInternal()852 : cuda_utils::kCUDABlockReduceNumThreads; in GroupNormBackwardKernelImplInternal()876 num_threads = (C / G) < cuda_utils::kCUDABlockReduceNumThreads in GroupNormBackwardKernelImplInternal()878 : cuda_utils::kCUDABlockReduceNumThreads; in GroupNormBackwardKernelImplInternal()
12 constexpr int kCUDABlockReduceNumThreads = 512; variable
794 <<<M, cuda_utils::kCUDABlockReduceNumThreads, 0, cuda_stream>>>( in LayerNormKernelImplInternal()