xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/allocation_tracker.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_ALLOCATION_TRACKER_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_ALLOCATION_TRACKER_H_
18 
19 #include <map>
20 #include <memory>
21 #include <set>
22 #include <string>
23 #include <vector>
24 
25 #include "absl/container/flat_hash_map.h"
26 #include "tensorflow/compiler/xla/service/backend.h"
27 #include "tensorflow/compiler/xla/statusor.h"
28 #include "tensorflow/compiler/xla/types.h"
29 #include "tensorflow/compiler/xla/xla_data.pb.h"
30 
31 namespace xla {
32 
33 // Tracks allocations for the XLA service; allocations can be registered
34 // with shape/device/tag and resolved from a handle for later use.
35 class AllocationTracker {
36  public:
37   // The allocator is used for deallocating memory when allocations are
38   // deregistered. All registered allocations must have the same platform as the
39   // allocator.
AllocationTracker(Backend * backend)40   AllocationTracker(Backend* backend) : backend_(backend), next_handle_(1) {}
41 
42   // Registers a shaped buffer of device memory, and returns a corresponding
43   // handle that can be used for talking to XLA clients. The given shaped buffer
44   // will be treated as the buffer corresponding to the only replica.
45   StatusOr<GlobalDataHandle> Register(ScopedShapedBuffer shaped_buffer,
46                                       const std::string& tag);
47 
48   // Registers a vector of shaped buffers of device memory, one per replica, and
49   // returns a corresponding handle that can be used for talking to XLA clients.
50   StatusOr<GlobalDataHandle> RegisterReplicatedBuffers(
51       std::vector<ScopedShapedBuffer> replicated_buffers,
52       const std::string& tag);
53 
54   // Unregister the allocation for the given data handle.
55   Status Unregister(const GlobalDataHandle& data);
56 
57   // Returns a vector of global data handles that point to the tuple elements.
58   StatusOr<std::vector<GlobalDataHandle>> DeconstructTuple(
59       const GlobalDataHandle& Data);
60 
61   // Resolve a handle from an XLA client to a vector of shaped buffers, one per
62   // replica, or provide an error status to say whether any of those buffers
63   // were not found (or found, but found deallocated).
64   StatusOr<std::vector<const ShapedBuffer*>> Resolve(
65       const GlobalDataHandle& data) const;
66 
67   // Resolves a handle from an XLA client and replica id to a shaped buffer, or
68   // provide an error status to say whether it was not found (or found, but
69   // found deallocated).
70   StatusOr<const ShapedBuffer*> ResolveForReplica(const GlobalDataHandle& data,
71                                                   int replica_id) const;
72 
73  private:
74   // Data structure encapsulating single memory allocation on the device.
75   struct Allocation {
76     // The pointer to this allocation.
77     se::OwningDeviceMemory device_memory;
78 
79     // This is the number of times this memory allocation is referred to by
80     // registered data handles.
81     int ref_count;
82   };
83 
84   // Internal helper which resolves the given GlobalDataHandle to a
85   // list of ScopedShapedBuffers.
86   StatusOr<std::vector<const ShapedBuffer*>> ResolveInternal(
87       const GlobalDataHandle& data) const ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
88 
89   // Internal helper which registers a vector of shaped buffers, one per
90   // replica.  ShapedBufferTy is either ScopedShapedBuffer or ShapedBuffer.  If
91   // it's ShapedBuffer, all of the given buffers must already be tracked by this
92   // object -- presumably this is a call from DeconstructTuple.
93   template <typename ShapedBufferTy>
94   StatusOr<GlobalDataHandle> RegisterInternal(
95       std::vector<ShapedBufferTy> replicated_buffers, const std::string& tag)
96       ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
97 
98   // Adds the given device address to the allocation tracker, or if it already
99   // exists, then increment its reference count.
100   void AddAllocationOrIncrementRefCount(se::DeviceMemoryBase device_memory,
101                                         int device_ordinal)
102       ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
103 
104   // Decrements the reference count of the given device memory. Then, if it is
105   // zero, deallocate the memory.
106   Status DecrementRefCount(se::DeviceMemoryBase device_memory,
107                            int device_ordinal)
108       ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
109 
110   // A map from device memory opaque value to allocation. One such map is
111   // maintained per device ordinal.
112   using AllocationMap = absl::flat_hash_map<const void*, Allocation>;
113 
114   mutable absl::Mutex mutex_;
115 
116   // Backend to use with this tracker. The backend supplies the memory allocator
117   // to use when deallocating memory.
118   Backend* backend_;
119 
120   // The next handle to assign to an allocation, guarded by the same mutex as
121   // the mapping as they'll be mutated at the same time.
122   int64_t next_handle_ ABSL_GUARDED_BY(mutex_);
123 
124   // A map from device ordinal to AllocationMap.
125   absl::flat_hash_map<int, AllocationMap> opaque_to_allocation_map_
126       ABSL_GUARDED_BY(mutex_);
127 
128   // A map from data handle to a vector of shaped buffers that represent the
129   // buffers for different replicas.
130   //
131   // The ShapedBuffers in this map's vectors need to be unique_ptrs, because our
132   // public API returns pointers to them.  We expect the concrete class to be
133   // ShapedBuffer and never ScopedShapedBuffer; deallocation of buffers is
134   // handled by opaque_to_allocation_map_.
135   //
136   // The elements of the vectors need to be unique_ptrs because we return
137   // pointers to them.  (In theory we could use std::list or something instead,
138   // but we also want to be able to null out these elements.)
139   //
140   // The reason that the elements can't be unique_ptr<ScopedShapedBuffer>s is
141   // the existence of DeconstructTuple().  This function allows us to create a
142   // non-owning "view" into a tuple's sub-buffers.  The sub-buffers are then
143   // free'd when both the view *and* the original tuple are Unregistered.  This
144   // refcounting is managed in opaque_to_allocation_map_.
145   absl::flat_hash_map<int64_t, std::vector<std::unique_ptr<ShapedBuffer>>>
146       handle_to_shaped_buffers_ ABSL_GUARDED_BY(mutex_);
147 
148   AllocationTracker(const AllocationTracker&) = delete;
149   AllocationTracker& operator=(const AllocationTracker&) = delete;
150 };
151 
152 }  // namespace xla
153 
154 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_ALLOCATION_TRACKER_H_
155