xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/CopysignKernel.cu (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #define TORCH_ASSERT_NO_OPERATORS
2 #include <ATen/Dispatch.h>
3 #include <ATen/native/DispatchStub.h>
4 #include <ATen/native/cuda/Loops.cuh>
5 #include <ATen/native/TensorIterator.h>
6 #include <ATen/native/BinaryOps.h>
7 
8 #if defined(__CUDACC__)
9 #include <cuda.h>
10 #include <cuda_fp16.h>
11 #include <c10/cuda/CUDAMathCompat.h>
12 #elif defined(__HIPCC__)
13 #include <hip/hip_runtime.h>
14 #include <hip/hip_fp16.h>
15 #include <c10/hip/HIPMathCompat.h>
16 #endif
17 
18 // NOTE: CUDA on Windows requires that the enclosing function
19 // of a __device__ lambda not have internal linkage.
20 
21 namespace at::native {
22 
copysign_kernel_cuda(TensorIteratorBase & iter)23 void copysign_kernel_cuda(TensorIteratorBase& iter) {
24   AT_DISPATCH_FLOATING_TYPES_AND2(kBFloat16, kHalf, iter.common_dtype(), "copysign_cuda", [&]() {
25     gpu_kernel_with_scalars(iter, []GPU_LAMBDA(scalar_t a, scalar_t b) -> scalar_t {
26       return c10::cuda::compat::copysign(a, b);
27     });
28   });
29 }
30 
31 REGISTER_DISPATCH(copysign_stub, &copysign_kernel_cuda);
32 
33 } // namespace at::native
34