1 #pragma once 2 3 #include <c10/core/impl/PyInterpreter.h> 4 5 namespace c10::impl { 6 7 struct C10_API GPUTrace { 8 // On the x86 architecture the atomic operations are lock-less. 9 static std::atomic<const PyInterpreter*> gpuTraceState; 10 11 // When PyTorch migrates to C++20, this should be changed to an atomic flag. 12 // Currently, the access to this variable is not synchronized, on the basis 13 // that it will only be flipped once and by the first interpreter that 14 // accesses it. 15 static bool haveState; 16 17 // This function will only register the first interpreter that tries to invoke 18 // it. For all of the next ones it will be a no-op. 19 static void set_trace(const PyInterpreter*); 20 get_traceGPUTrace21 static const PyInterpreter* get_trace() { 22 if (!haveState) 23 return nullptr; 24 return gpuTraceState.load(std::memory_order_acquire); 25 } 26 }; 27 28 } // namespace c10::impl 29