1*67e74705SXin Li // Tests CUDA compilation pipeline construction in Driver. 2*67e74705SXin Li // REQUIRES: clang-driver 3*67e74705SXin Li // REQUIRES: x86-registered-target 4*67e74705SXin Li // REQUIRES: nvptx-registered-target 5*67e74705SXin Li 6*67e74705SXin Li // Simple compilation case. Compile device-side to PTX assembly and make sure 7*67e74705SXin Li // we use it on the host side. 8*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \ 9*67e74705SXin Li // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 10*67e74705SXin Li // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 11*67e74705SXin Li // RUN: -check-prefix NOLINK %s 12*67e74705SXin Li 13*67e74705SXin Li // Typical compilation + link case. 14*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu %s 2>&1 \ 15*67e74705SXin Li // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 16*67e74705SXin Li // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 17*67e74705SXin Li // RUN: -check-prefix LINK %s 18*67e74705SXin Li 19*67e74705SXin Li // Verify that --cuda-host-only disables device-side compilation, but doesn't 20*67e74705SXin Li // disable host-side compilation/linking. 21*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only %s 2>&1 \ 22*67e74705SXin Li // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ 23*67e74705SXin Li // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s 24*67e74705SXin Li 25*67e74705SXin Li // Verify that --cuda-device-only disables host-side compilation and linking. 26*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only %s 2>&1 \ 27*67e74705SXin Li // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 28*67e74705SXin Li // RUN: -check-prefix NOHOST -check-prefix NOLINK %s 29*67e74705SXin Li 30*67e74705SXin Li // Check that the last of --cuda-compile-host-device, --cuda-host-only, and 31*67e74705SXin Li // --cuda-device-only wins. 32*67e74705SXin Li 33*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 34*67e74705SXin Li // RUN: --cuda-host-only %s 2>&1 \ 35*67e74705SXin Li // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ 36*67e74705SXin Li // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s 37*67e74705SXin Li 38*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu --cuda-compile-host-device \ 39*67e74705SXin Li // RUN: --cuda-host-only %s 2>&1 \ 40*67e74705SXin Li // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ 41*67e74705SXin Li // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s 42*67e74705SXin Li 43*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only \ 44*67e74705SXin Li // RUN: --cuda-device-only %s 2>&1 \ 45*67e74705SXin Li // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 46*67e74705SXin Li // RUN: -check-prefix NOHOST -check-prefix NOLINK %s 47*67e74705SXin Li 48*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu --cuda-compile-host-device \ 49*67e74705SXin Li // RUN: --cuda-device-only %s 2>&1 \ 50*67e74705SXin Li // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 51*67e74705SXin Li // RUN: -check-prefix NOHOST -check-prefix NOLINK %s 52*67e74705SXin Li 53*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only \ 54*67e74705SXin Li // RUN: --cuda-compile-host-device %s 2>&1 \ 55*67e74705SXin Li // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 56*67e74705SXin Li // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 57*67e74705SXin Li // RUN: -check-prefix LINK %s 58*67e74705SXin Li 59*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 60*67e74705SXin Li // RUN: --cuda-compile-host-device %s 2>&1 \ 61*67e74705SXin Li // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 62*67e74705SXin Li // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 63*67e74705SXin Li // RUN: -check-prefix LINK %s 64*67e74705SXin Li 65*67e74705SXin Li // Verify that --cuda-gpu-arch option passes the correct GPU archtecture to 66*67e74705SXin Li // device compilation. 67*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \ 68*67e74705SXin Li // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 69*67e74705SXin Li // RUN: -check-prefix DEVICE-SM35 -check-prefix HOST \ 70*67e74705SXin Li // RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s 71*67e74705SXin Li 72*67e74705SXin Li // Verify that there is one device-side compilation per --cuda-gpu-arch args 73*67e74705SXin Li // and that all results are included on the host side. 74*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu \ 75*67e74705SXin Li // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \ 76*67e74705SXin Li // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 77*67e74705SXin Li // RUN: -check-prefix DEVICE2 -check-prefix DEVICE-SM35 \ 78*67e74705SXin Li // RUN: -check-prefix DEVICE2-SM30 -check-prefix HOST \ 79*67e74705SXin Li // RUN: -check-prefix HOST-NOSAVE -check-prefix INCLUDES-DEVICE \ 80*67e74705SXin Li // RUN: -check-prefix NOLINK %s 81*67e74705SXin Li 82*67e74705SXin Li // Verify that device-side results are passed to the correct tool when 83*67e74705SXin Li // -save-temps is used. 84*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu -save-temps -c %s 2>&1 \ 85*67e74705SXin Li // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-SAVE \ 86*67e74705SXin Li // RUN: -check-prefix HOST -check-prefix HOST-SAVE -check-prefix NOLINK %s 87*67e74705SXin Li 88*67e74705SXin Li // Verify that device-side results are passed to the correct tool when 89*67e74705SXin Li // -fno-integrated-as is used. 90*67e74705SXin Li // RUN: %clang -### -target x86_64-linux-gnu -fno-integrated-as -c %s 2>&1 \ 91*67e74705SXin Li // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 92*67e74705SXin Li // RUN: -check-prefix HOST -check-prefix HOST-NOSAVE \ 93*67e74705SXin Li // RUN: -check-prefix HOST-AS -check-prefix NOLINK %s 94*67e74705SXin Li 95*67e74705SXin Li // Match device-side preprocessor and compiler phases with -save-temps. 96*67e74705SXin Li // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda" 97*67e74705SXin Li // DEVICE-SAVE-SAME: "-aux-triple" "x86_64--linux-gnu" 98*67e74705SXin Li // DEVICE-SAVE-SAME: "-fcuda-is-device" 99*67e74705SXin Li // DEVICE-SAVE-SAME: "-x" "cuda" 100*67e74705SXin Li 101*67e74705SXin Li // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda" 102*67e74705SXin Li // DEVICE-SAVE-SAME: "-aux-triple" "x86_64--linux-gnu" 103*67e74705SXin Li // DEVICE-SAVE-SAME: "-fcuda-is-device" 104*67e74705SXin Li // DEVICE-SAVE-SAME: "-x" "cuda-cpp-output" 105*67e74705SXin Li 106*67e74705SXin Li // Match the job that produces PTX assembly. 107*67e74705SXin Li // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda" 108*67e74705SXin Li // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64--linux-gnu" 109*67e74705SXin Li // DEVICE-SAME: "-fcuda-is-device" 110*67e74705SXin Li // DEVICE-SM35-SAME: "-target-cpu" "sm_35" 111*67e74705SXin Li // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]" 112*67e74705SXin Li // DEVICE-NOSAVE-SAME: "-x" "cuda" 113*67e74705SXin Li // DEVICE-SAVE-SAME: "-x" "ir" 114*67e74705SXin Li 115*67e74705SXin Li // Match the call to ptxas (which assembles PTX to SASS). 116*67e74705SXin Li // DEVICE:ptxas 117*67e74705SXin Li // DEVICE-SM35-DAG: "--gpu-name" "sm_35" 118*67e74705SXin Li // DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]" 119*67e74705SXin Li // DEVICE-DAG: "[[PTXFILE]]" 120*67e74705SXin Li 121*67e74705SXin Li // Match another device-side compilation. 122*67e74705SXin Li // DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda" 123*67e74705SXin Li // DEVICE2-SAME: "-aux-triple" "x86_64--linux-gnu" 124*67e74705SXin Li // DEVICE2-SAME: "-fcuda-is-device" 125*67e74705SXin Li // DEVICE2-SM30-SAME: "-target-cpu" "sm_30" 126*67e74705SXin Li // DEVICE2-SAME: "-o" "[[GPUBINARY2:[^"]*]]" 127*67e74705SXin Li // DEVICE2-SAME: "-x" "cuda" 128*67e74705SXin Li 129*67e74705SXin Li // Match no device-side compilation. 130*67e74705SXin Li // NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda" 131*67e74705SXin Li // NODEVICE-NOT: "-fcuda-is-device" 132*67e74705SXin Li 133*67e74705SXin Li // INCLUDES-DEVICE:fatbinary 134*67e74705SXin Li // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]" 135*67e74705SXin Li // INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]" 136*67e74705SXin Li // INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]" 137*67e74705SXin Li 138*67e74705SXin Li // Match host-side preprocessor job with -save-temps. 139*67e74705SXin Li // HOST-SAVE: "-cc1" "-triple" "x86_64--linux-gnu" 140*67e74705SXin Li // HOST-SAVE-SAME: "-aux-triple" "nvptx64-nvidia-cuda" 141*67e74705SXin Li // HOST-SAVE-NOT: "-fcuda-is-device" 142*67e74705SXin Li // HOST-SAVE-SAME: "-x" "cuda" 143*67e74705SXin Li 144*67e74705SXin Li // Match host-side compilation. 145*67e74705SXin Li // HOST: "-cc1" "-triple" "x86_64--linux-gnu" 146*67e74705SXin Li // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda" 147*67e74705SXin Li // HOST-NOT: "-fcuda-is-device" 148*67e74705SXin Li // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]" 149*67e74705SXin Li // HOST-NOSAVE-SAME: "-x" "cuda" 150*67e74705SXin Li // HOST-SAVE-SAME: "-x" "cuda-cpp-output" 151*67e74705SXin Li // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]" 152*67e74705SXin Li 153*67e74705SXin Li // Match external assembler that uses compilation output. 154*67e74705SXin Li // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]" 155*67e74705SXin Li 156*67e74705SXin Li // Match no GPU code inclusion. 157*67e74705SXin Li // NOINCLUDES-DEVICE-NOT: "-fcuda-include-gpubinary" 158*67e74705SXin Li 159*67e74705SXin Li // Match no host compilation. 160*67e74705SXin Li // NOHOST-NOT: "-cc1" "-triple" 161*67e74705SXin Li // NOHOST-NOT: "-x" "cuda" 162*67e74705SXin Li 163*67e74705SXin Li // Match linker. 164*67e74705SXin Li // LINK: "{{.*}}{{ld|link}}{{(.exe)?}}" 165*67e74705SXin Li // LINK-SAME: "[[HOSTOUTPUT]]" 166*67e74705SXin Li 167*67e74705SXin Li // Match no linker. 168*67e74705SXin Li // NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}" 169