xref: /aosp_15_r20/external/pytorch/c10/core/impl/GPUTrace.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <c10/core/impl/PyInterpreter.h>
4 
5 namespace c10::impl {
6 
7 struct C10_API GPUTrace {
8   // On the x86 architecture the atomic operations are lock-less.
9   static std::atomic<const PyInterpreter*> gpuTraceState;
10 
11   // When PyTorch migrates to C++20, this should be changed to an atomic flag.
12   // Currently, the access to this variable is not synchronized, on the basis
13   // that it will only be flipped once and by the first interpreter that
14   // accesses it.
15   static bool haveState;
16 
17   // This function will only register the first interpreter that tries to invoke
18   // it. For all of the next ones it will be a no-op.
19   static void set_trace(const PyInterpreter*);
20 
get_traceGPUTrace21   static const PyInterpreter* get_trace() {
22     if (!haveState)
23       return nullptr;
24     return gpuTraceState.load(std::memory_order_acquire);
25   }
26 };
27 
28 } // namespace c10::impl
29