1 #pragma once 2 #include <c10/macros/Export.h> 3 #include <cstdint> 4 5 namespace at::cuda { 6 7 // enqueues a kernel that spins for the specified number of cycles 8 TORCH_CUDA_CU_API void sleep(int64_t cycles); 9 10 // flushes instruction cache for ROCm; no-op for CUDA 11 TORCH_CUDA_CU_API void flush_icache(); 12 13 } // namespace at::cuda 14