xref: /aosp_15_r20/external/llvm-libc/src/__support/GPU/utils.h (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===---------------- Implementation of GPU utils ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
10 #define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
11 
12 #include "src/__support/macros/config.h"
13 #include "src/__support/macros/properties/architectures.h"
14 
15 #if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
16 #include "amdgpu/utils.h"
17 #elif defined(LIBC_TARGET_ARCH_IS_NVPTX)
18 #include "nvptx/utils.h"
19 #else
20 #include "generic/utils.h"
21 #endif
22 
23 namespace LIBC_NAMESPACE_DECL {
24 namespace gpu {
25 /// Get the first active thread inside the lane.
get_first_lane_id(uint64_t lane_mask)26 LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) {
27   return __builtin_ffsll(lane_mask) - 1;
28 }
29 
30 /// Conditional that is only true for a single thread in a lane.
is_first_lane(uint64_t lane_mask)31 LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
32   return gpu::get_lane_id() == get_first_lane_id(lane_mask);
33 }
34 
35 /// Gets the sum of all lanes inside the warp or wavefront.
reduce(uint64_t lane_mask,uint32_t x)36 LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) {
37   for (uint32_t step = gpu::get_lane_size() / 2; step > 0; step /= 2) {
38     uint32_t index = step + gpu::get_lane_id();
39     x += gpu::shuffle(lane_mask, index, x);
40   }
41   return gpu::broadcast_value(lane_mask, x);
42 }
43 
44 /// Gets the accumulator scan of the threads in the warp or wavefront.
scan(uint64_t lane_mask,uint32_t x)45 LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) {
46   for (uint32_t step = 1; step < gpu::get_lane_size(); step *= 2) {
47     uint32_t index = gpu::get_lane_id() - step;
48     uint32_t bitmask = gpu::get_lane_id() >= step;
49     x += -bitmask & gpu::shuffle(lane_mask, index, x);
50   }
51   return x;
52 }
53 
54 } // namespace gpu
55 } // namespace LIBC_NAMESPACE_DECL
56 
57 #endif // LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
58