xref: /aosp_15_r20/external/tensorflow/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h"
17 
18 #include <cstddef>
19 #include <vector>
20 
21 #include "tensorflow/core/common_runtime/device/device_id_utils.h"
22 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
23 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
24 #include "tensorflow/core/platform/stream_executor.h"
25 
26 #define MASK_WORDS 2
27 #define MASK_BYTES (MASK_WORDS * sizeof(int64_t))
28 
29 namespace tensorflow {
30 namespace {
31 
NewMask(int64_t word)32 int64_t* NewMask(int64_t word) {
33   int64_t* m = new int64_t[MASK_WORDS];
34   for (int i = 0; i < MASK_WORDS; ++i) {
35     m[i] = word;
36   }
37   return m;
38 }
39 
40 int64_t* before_mask = NewMask(0xabababababababab);
41 int64_t* after_mask = NewMask(0xcdcdcdcdcdcdcdcd);
42 
CheckMask(se::StreamExecutor * exec,void * ptr,int64_t * mask)43 bool CheckMask(se::StreamExecutor* exec, void* ptr, int64_t* mask) {
44   se::DeviceMemory<int64_t> gpu_ptr{se::DeviceMemoryBase{ptr, MASK_BYTES}};
45   int64_t tmp[MASK_WORDS];
46 
47   Status result = exec->SynchronousMemcpyD2H(gpu_ptr, MASK_BYTES, tmp);
48   if (!result.ok()) {
49     LOG(FATAL) << "Could not copy debug mask, " << result;
50   }
51 
52   bool ok = true;
53   for (int i = 0; i < MASK_WORDS; ++i) {
54     ok &= (mask[i] == tmp[i]);
55     if (!ok) {
56       LOG(ERROR) << "i=" << i
57                  << " mask=" << reinterpret_cast<const void*>(mask[i])
58                  << " field=" << reinterpret_cast<const void*>(tmp[i]);
59     }
60   }
61 
62   return ok;
63 }
64 
InitMask(se::StreamExecutor * exec,void * ptr,int64_t * mask)65 void InitMask(se::StreamExecutor* exec, void* ptr, int64_t* mask) {
66   se::DeviceMemory<int64_t> gpu_ptr{se::DeviceMemoryBase{ptr, MASK_BYTES}};
67   Status result = exec->SynchronousMemcpyH2D(mask, MASK_BYTES, &gpu_ptr);
68   if (!result.ok()) {
69     LOG(FATAL) << "Could not copy debug mask, " << result;
70   }
71 }
72 
73 }  // namespace
74 
75 // -----------------------------------------------------------------------------
76 // GPUDebugAllocator
77 // -----------------------------------------------------------------------------
GPUDebugAllocator(Allocator * allocator,PlatformDeviceId platform_device_id)78 GPUDebugAllocator::GPUDebugAllocator(Allocator* allocator,
79                                      PlatformDeviceId platform_device_id)
80     : base_allocator_(allocator) {
81   stream_exec_ = DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(),
82                                                            platform_device_id)
83                      .ValueOrDie();
84 }
85 
~GPUDebugAllocator()86 GPUDebugAllocator::~GPUDebugAllocator() { delete base_allocator_; }
87 
AllocateRaw(size_t alignment,size_t num_bytes)88 void* GPUDebugAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
89   num_bytes += (2 * MASK_BYTES);
90   void* allocated_ptr = base_allocator_->AllocateRaw(alignment, num_bytes);
91   if (allocated_ptr == nullptr) return allocated_ptr;
92 
93   // Return the pointer after the header
94   void* rv = static_cast<char*>(allocated_ptr) + MASK_BYTES;
95 
96   // Write the header at allocated_ptr
97   InitMask(stream_exec_, allocated_ptr, before_mask);
98 
99   // Write the footer at the end.
100   size_t req_size = base_allocator_->RequestedSize(allocated_ptr);
101   InitMask(stream_exec_,
102            static_cast<char*>(allocated_ptr) + req_size - MASK_BYTES,
103            after_mask);
104   return rv;
105 }
DeallocateRaw(void * ptr)106 void GPUDebugAllocator::DeallocateRaw(void* ptr) {
107   if (ptr != nullptr) {
108     CHECK(CheckHeader(ptr)) << "before_mask has been overwritten";
109     CHECK(CheckFooter(ptr)) << "after_mask has been overwritten";
110 
111     // Backtrack to the beginning of the header.
112     ptr = static_cast<void*>(static_cast<char*>(ptr) - MASK_BYTES);
113   }
114   // Deallocate the memory
115   base_allocator_->DeallocateRaw(ptr);
116 }
117 
TracksAllocationSizes() const118 bool GPUDebugAllocator::TracksAllocationSizes() const { return true; }
119 
RequestedSize(const void * ptr) const120 size_t GPUDebugAllocator::RequestedSize(const void* ptr) const {
121   auto req_size = base_allocator_->RequestedSize(static_cast<const char*>(ptr) -
122                                                  MASK_BYTES);
123   return req_size - 2 * MASK_BYTES;
124 }
125 
AllocatedSize(const void * ptr) const126 size_t GPUDebugAllocator::AllocatedSize(const void* ptr) const {
127   return base_allocator_->AllocatedSize(static_cast<const char*>(ptr) -
128                                         MASK_BYTES);
129 }
130 
AllocationId(const void * ptr) const131 int64_t GPUDebugAllocator::AllocationId(const void* ptr) const {
132   return base_allocator_->AllocationId(static_cast<const char*>(ptr) -
133                                        MASK_BYTES);
134 }
135 
GetStats()136 absl::optional<AllocatorStats> GPUDebugAllocator::GetStats() {
137   return base_allocator_->GetStats();
138 }
139 
ClearStats()140 bool GPUDebugAllocator::ClearStats() { return base_allocator_->ClearStats(); }
141 
CheckHeader(void * ptr)142 bool GPUDebugAllocator::CheckHeader(void* ptr) {
143   return CheckMask(stream_exec_, static_cast<char*>(ptr) - MASK_BYTES,
144                    before_mask);
145 }
146 
CheckFooter(void * ptr)147 bool GPUDebugAllocator::CheckFooter(void* ptr) {
148   char* original_ptr = static_cast<char*>(ptr) - MASK_BYTES;
149   size_t req_size = base_allocator_->RequestedSize(original_ptr);
150   return CheckMask(stream_exec_, original_ptr + req_size - MASK_BYTES,
151                    after_mask);
152 }
153 
154 // -----------------------------------------------------------------------------
155 // GPUNanResetAllocator
156 // -----------------------------------------------------------------------------
GPUNanResetAllocator(Allocator * allocator,PlatformDeviceId platform_device_id)157 GPUNanResetAllocator::GPUNanResetAllocator(Allocator* allocator,
158                                            PlatformDeviceId platform_device_id)
159     : base_allocator_(allocator) {
160   stream_exec_ = DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(),
161                                                            platform_device_id)
162                      .ValueOrDie();
163 }
164 
~GPUNanResetAllocator()165 GPUNanResetAllocator::~GPUNanResetAllocator() { delete base_allocator_; }
166 
AllocateRaw(size_t alignment,size_t num_bytes)167 void* GPUNanResetAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
168   void* allocated_ptr = base_allocator_->AllocateRaw(alignment, num_bytes);
169   if (allocated_ptr == nullptr) return allocated_ptr;
170 
171   // Initialize the buffer to Nans
172   size_t req_size = base_allocator_->RequestedSize(allocated_ptr);
173   std::vector<float> nans((req_size + sizeof(float) - 1) / sizeof(float),
174                           std::nanf(""));
175   se::DeviceMemory<float> nan_ptr{
176       se::DeviceMemoryBase{static_cast<float*>(allocated_ptr), req_size}};
177 
178   Status result =
179       stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, &nan_ptr);
180   if (!result.ok()) {
181     LOG(ERROR) << "Could not initialize to NaNs, " << result;
182   }
183 
184   return allocated_ptr;
185 }
DeallocateRaw(void * ptr)186 void GPUNanResetAllocator::DeallocateRaw(void* ptr) {
187   if (ptr != nullptr) {
188     // Reset the buffer to Nans
189     size_t req_size = base_allocator_->RequestedSize(ptr);
190     std::vector<float> nans((req_size + sizeof(float) - 1) / sizeof(float),
191                             std::nanf(""));
192     se::DeviceMemory<float> nan_ptr{
193         se::DeviceMemoryBase{static_cast<float*>(ptr), req_size}};
194     Status result =
195         stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, &nan_ptr);
196     if (!result.ok()) {
197       LOG(ERROR) << "Could not initialize to NaNs, " << result;
198     }
199   }
200 
201   // Deallocate the memory
202   base_allocator_->DeallocateRaw(ptr);
203 }
204 
RequestedSize(const void * ptr) const205 size_t GPUNanResetAllocator::RequestedSize(const void* ptr) const {
206   return base_allocator_->RequestedSize(ptr);
207 }
208 
AllocatedSize(const void * ptr) const209 size_t GPUNanResetAllocator::AllocatedSize(const void* ptr) const {
210   return base_allocator_->AllocatedSize(ptr);
211 }
212 
GetStats()213 absl::optional<AllocatorStats> GPUNanResetAllocator::GetStats() {
214   return base_allocator_->GetStats();
215 }
216 
ClearStats()217 bool GPUNanResetAllocator::ClearStats() {
218   return base_allocator_->ClearStats();
219 }
220 
221 }  // namespace tensorflow
222