1*67e74705SXin Li // REQUIRES: x86-registered-target 2*67e74705SXin Li // REQUIRES: nvptx-registered-target 3*67e74705SXin Li 4*67e74705SXin Li // By default we should fuse multiply/add into fma instruction. 5*67e74705SXin Li // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ 6*67e74705SXin Li // RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix ENABLED %s 7*67e74705SXin Li 8*67e74705SXin Li // Explicit -ffp-contract=fast 9*67e74705SXin Li // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ 10*67e74705SXin Li // RUN: -ffp-contract=fast -disable-llvm-passes -o - %s \ 11*67e74705SXin Li // RUN: | FileCheck -check-prefix ENABLED %s 12*67e74705SXin Li 13*67e74705SXin Li // Explicit -ffp-contract=on -- fusing by front-end (disabled). 14*67e74705SXin Li // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ 15*67e74705SXin Li // RUN: -ffp-contract=on -disable-llvm-passes -o - %s \ 16*67e74705SXin Li // RUN: | FileCheck -check-prefix DISABLED %s 17*67e74705SXin Li 18*67e74705SXin Li // Explicit -ffp-contract=off should disable instruction fusing. 19*67e74705SXin Li // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ 20*67e74705SXin Li // RUN: -ffp-contract=off -disable-llvm-passes -o - %s \ 21*67e74705SXin Li // RUN: | FileCheck -check-prefix DISABLED %s 22*67e74705SXin Li 23*67e74705SXin Li 24*67e74705SXin Li #include "Inputs/cuda.h" 25*67e74705SXin Li func(float a,float b,float c)26*67e74705SXin Li__host__ __device__ float func(float a, float b, float c) { return a + b * c; } 27*67e74705SXin Li // ENABLED: fma.rn.f32 28*67e74705SXin Li // ENABLED-NEXT: st.param.f32 29*67e74705SXin Li 30*67e74705SXin Li // DISABLED: mul.rn.f32 31*67e74705SXin Li // DISABLED-NEXT: add.rn.f32 32*67e74705SXin Li // DISABLED-NEXT: st.param.f32 33