1 #include <gmock/gmock.h>
2 #include <gtest/gtest.h>
3
4 #include <c10/cuda/CUDADeviceAssertion.h>
5 #include <c10/cuda/CUDAException.h>
6 #include <c10/cuda/CUDAFunctions.h>
7 #include <c10/cuda/CUDAStream.h>
8
9 #include <chrono>
10 #include <iostream>
11 #include <string>
12 #include <thread>
13
14 using ::testing::HasSubstr;
15
16 /**
17 * Device kernel that takes multiple integer parameters as arguments and
18 * will always trigger a device side assertion.
19 */
cuda_multiple_vars_always_fail_assertion_kernel(const int a,const int b,const int c,const int d,TORCH_DSA_KERNEL_ARGS)20 __global__ void cuda_multiple_vars_always_fail_assertion_kernel(
21 const int a,
22 const int b,
23 const int c,
24 const int d,
25 TORCH_DSA_KERNEL_ARGS) {
26 int i = a + b + c + d;
27 if (i != 0) {
28 CUDA_KERNEL_ASSERT2(i == -i);
29 } else {
30 CUDA_KERNEL_ASSERT2(i == i + 1);
31 }
32 }
33
34 /**
35 * Device kernel that takes a single integer parameter as argument and
36 * will always trigger a device side assertion.
37 */
cuda_always_fail_assertion_kernel(const int a,TORCH_DSA_KERNEL_ARGS)38 __global__ void cuda_always_fail_assertion_kernel(
39 const int a,
40 TORCH_DSA_KERNEL_ARGS) {
41 CUDA_KERNEL_ASSERT2(a != a);
42 }
43
44 /**
45 * TEST: Triggering device side assertion on a simple <<<1,1>>> config.
46 * kernel used takes multiple variables as parameters to the function.
47 */
cuda_device_assertions_catches_stream()48 void cuda_device_assertions_catches_stream() {
49 const auto stream = c10::cuda::getStreamFromPool();
50 TORCH_DSA_KERNEL_LAUNCH(
51 cuda_multiple_vars_always_fail_assertion_kernel,
52 1, /* Blocks */
53 1, /* Threads */
54 0, /* Shared mem */
55 stream, /* Stream */
56 1, /* const int a */
57 2, /* const int b */
58 3, /* const int c */
59 4 /* const int d */
60 );
61
62 try {
63 c10::cuda::device_synchronize();
64 throw std::runtime_error("Test didn't fail, but should have.");
65 } catch (const c10::Error& err) {
66 const auto err_str = std::string(err.what());
67 ASSERT_THAT(
68 err_str, HasSubstr("# of GPUs this process interacted with = 1"));
69 ASSERT_THAT(
70 err_str,
71 HasSubstr("CUDA device-side assertion failures were found on GPU #0!"));
72 ASSERT_THAT(
73 err_str, HasSubstr("Thread ID that failed assertion = [0,0,0]"));
74 ASSERT_THAT(err_str, HasSubstr("Block ID that failed assertion = [0,0,0]"));
75 ASSERT_THAT(err_str, HasSubstr("Device that launched kernel = 0"));
76 ASSERT_THAT(
77 err_str,
78 HasSubstr(
79 "Name of kernel launched that led to failure = cuda_multiple_vars_always_fail_assertion_kernel"));
80 ASSERT_THAT(
81 err_str, HasSubstr("File containing kernel launch = " __FILE__));
82 ASSERT_THAT(
83 err_str,
84 HasSubstr(
85 "Function containing kernel launch = " +
86 std::string(__FUNCTION__)));
87 ASSERT_THAT(
88 err_str,
89 HasSubstr(
90 "Stream kernel was launched on = " + std::to_string(stream.id())));
91 }
92 }
93
TEST(CUDATest,cuda_device_assertions_catches_stream)94 TEST(CUDATest, cuda_device_assertions_catches_stream) {
95 #ifdef TORCH_USE_CUDA_DSA
96 c10::cuda::CUDAKernelLaunchRegistry::get_singleton_ref().enabled_at_runtime = true;
97 cuda_device_assertions_catches_stream();
98 #else
99 GTEST_SKIP() << "CUDA device-side assertions (DSA) was not enabled at compile time.";
100 #endif
101 }
102