xref: /aosp_15_r20/external/clang/test/CodeGenCUDA/fp-contract.cu (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // REQUIRES: x86-registered-target
2*67e74705SXin Li // REQUIRES: nvptx-registered-target
3*67e74705SXin Li 
4*67e74705SXin Li // By default we should fuse multiply/add into fma instruction.
5*67e74705SXin Li // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
6*67e74705SXin Li // RUN:   -disable-llvm-passes -o - %s | FileCheck -check-prefix ENABLED %s
7*67e74705SXin Li 
8*67e74705SXin Li // Explicit -ffp-contract=fast
9*67e74705SXin Li // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
10*67e74705SXin Li // RUN:   -ffp-contract=fast -disable-llvm-passes -o - %s \
11*67e74705SXin Li // RUN:   | FileCheck -check-prefix ENABLED %s
12*67e74705SXin Li 
13*67e74705SXin Li // Explicit -ffp-contract=on -- fusing by front-end (disabled).
14*67e74705SXin Li // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
15*67e74705SXin Li // RUN:   -ffp-contract=on -disable-llvm-passes -o - %s \
16*67e74705SXin Li // RUN:   | FileCheck -check-prefix DISABLED %s
17*67e74705SXin Li 
18*67e74705SXin Li // Explicit -ffp-contract=off should disable instruction fusing.
19*67e74705SXin Li // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
20*67e74705SXin Li // RUN:   -ffp-contract=off -disable-llvm-passes -o - %s \
21*67e74705SXin Li // RUN:   | FileCheck -check-prefix DISABLED %s
22*67e74705SXin Li 
23*67e74705SXin Li 
24*67e74705SXin Li #include "Inputs/cuda.h"
25*67e74705SXin Li 
func(float a,float b,float c)26*67e74705SXin Li __host__ __device__ float func(float a, float b, float c) { return a + b * c; }
27*67e74705SXin Li // ENABLED:       fma.rn.f32
28*67e74705SXin Li // ENABLED-NEXT:  st.param.f32
29*67e74705SXin Li 
30*67e74705SXin Li // DISABLED:      mul.rn.f32
31*67e74705SXin Li // DISABLED-NEXT: add.rn.f32
32*67e74705SXin Li // DISABLED-NEXT: st.param.f32
33