xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/tpu/tpu_executor_c_api.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_
17 #define TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_
18 
19 #include <stddef.h>
20 #include <stdint.h>
21 
22 #include "tensorflow/c/tf_attrtype.h"
23 #include "tensorflow/compiler/xla/stream_executor/tpu/c_api_decl.h"
24 #include "tensorflow/core/tpu/libtftpu.h"
25 
26 extern "C" {
27 
28 SE_Platform* TpuPlatform_New();
29 void TpuPlatform_Free(SE_Platform* platform);
30 void TpuPlatform_Initialize(SE_Platform* platform, size_t options_size,
31                             const char** options_key,
32                             const char** options_value, TF_Status* status);
33 bool TpuPlatform_Initialized(SE_Platform* platform);
34 SE_StreamExecutor* TpuPlatform_GetExecutor(SE_Platform* platform,
35                                            SE_StreamExecutorConfig* config,
36                                            TF_Status* status);
37 SE_PlatformId TpuPlatform_Id(SE_Platform* platform);
38 int64_t TpuPlatform_VisibleDeviceCount(SE_Platform* platform);
39 int64_t TpuPlatform_TpuMemoryLimit(SE_Platform* platform);
40 bool TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy(SE_Platform* platform);
41 SE_TpuTopology* TpuPlatform_GetTopologyPtr(SE_Platform* platform);
42 SE_TpuTopology_Host* TpuPlatform_GetHostLocation(SE_Platform* platform);
43 TpuRuntimeVersion TpuPlatform_GetRuntimeVersion(SE_Platform* platform);
44 
45 void TpuExecutor_Init(SE_StreamExecutor* executor, int device_ordinal,
46                       SE_DeviceOptions* device_options, TF_Status* status);
47 void TpuExecutor_Free(SE_StreamExecutor* executor);
48 
49 int TpuExecutor_PlatformDeviceCount(SE_StreamExecutor* executor);
50 
51 SE_DeviceMemoryBase TpuExecutor_Allocate(SE_StreamExecutor* executor,
52                                          uint64_t size, int64_t memory_space);
53 void TpuExecutor_Deallocate(SE_StreamExecutor* executor,
54                             SE_DeviceMemoryBase* memory);
55 bool TpuExecutor_GetAllocatorStats(SE_StreamExecutor* executor,
56                                    SE_AllocatorStats* stats);
57 bool TpuExecutor_DeviceMemoryUsage(SE_StreamExecutor* executor, int64_t* free,
58                                    int64_t* total);
59 
60 bool TpuExecutor_AllocateStream(SE_StreamExecutor* executor, SE_Stream* stream);
61 void TpuExecutor_DeallocateStream(SE_StreamExecutor* executor,
62                                   SE_Stream* stream);
63 bool TpuExecutor_CreateStreamDependency(SE_StreamExecutor* executor,
64                                         SE_Stream* dependent, SE_Stream* other);
65 void TpuExecutor_GetStatus(SE_StreamExecutor* executor, SE_Stream* stream,
66                            TF_Status* status);
67 
68 SE_TpuTopology_Core* TpuExecutor_GetCoreLocation(SE_StreamExecutor* executor);
69 
70 void TpuExecutor_AllocateEvent(SE_StreamExecutor* executor, SE_Event* event,
71                                TF_Status* status);
72 void TpuExecutor_DeallocateEvent(SE_StreamExecutor* executor, SE_Event* event,
73                                  TF_Status* status);
74 int TpuExecutor_PollForEventStatus(SE_StreamExecutor* executor,
75                                    SE_Event* event);
76 void TpuExecutor_RecordEvent(SE_StreamExecutor* executor, SE_Stream* stream,
77                              SE_Event* event, TF_Status* status);
78 void TpuExecutor_WaitForEvent(SE_StreamExecutor* executor, SE_Stream* stream,
79                               SE_Event* event, TF_Status* status);
80 
81 bool TpuExecutor_AllocateTimer(SE_StreamExecutor* executor, SE_Timer* timer);
82 void TpuExecutor_DeallocateTimer(SE_StreamExecutor* executor, SE_Timer* timer);
83 bool TpuExecutor_StartTimer(SE_StreamExecutor* executor, SE_Stream* stream,
84                             SE_Timer* timer);
85 bool TpuExecutor_StopTimer(SE_StreamExecutor* executor, SE_Stream* stream,
86                            SE_Timer* timer);
87 
88 void TpuExecutor_SynchronousMemcpyToHost(SE_StreamExecutor* executor,
89                                          void* host_dst,
90                                          const SE_DeviceMemoryBase* device_src,
91                                          uint64_t size, TF_Status* status);
92 void TpuExecutor_SynchronousMemcpyFromHost(SE_StreamExecutor* executor,
93                                            SE_DeviceMemoryBase* device_dst,
94                                            const void* host_src, uint64_t size,
95                                            TF_Status* status);
96 bool TpuExecutor_MemcpyToHost(SE_StreamExecutor* executor, SE_Stream* stream,
97                               void* host_dst,
98                               const SE_DeviceMemoryBase* device_src,
99                               uint64_t size);
100 
101 bool TpuExecutor_MemcpyFromHost(SE_StreamExecutor* executor, SE_Stream* stream,
102                                 SE_DeviceMemoryBase* device_dst,
103                                 const void* host_src, uint64_t size);
104 
105 void TpuExecutor_EnqueueInfeed(SE_StreamExecutor* executor,
106                                int32_t infeed_queue_index, const uint8_t* data,
107                                int64_t size, TF_Status* status);
108 void TpuExecutor_DequeueOutfeed(SE_StreamExecutor* executor,
109                                 int32_t outfeed_queue_index, uint8_t* data,
110                                 int64_t size, TF_Status* status);
111 void TpuExecutor_WaitForInfeedReady(SE_StreamExecutor* executor,
112                                     int32_t infeed_queue_index,
113                                     TF_Status* status);
114 void TpuExecutor_WaitForOutfeedReady(SE_StreamExecutor* executor,
115                                      int32_t outfeed_queue_index,
116                                      TF_Status* status);
117 
118 void TpuExecutor_BlockHostUntilDone(SE_StreamExecutor* executor,
119                                     SE_Stream* stream, TF_Status* status);
120 void TpuExecutor_BlockUntilDoneOrFailed(SE_StreamExecutor* executor,
121                                         TF_Status* status);
122 void TpuExecutor_SyncAndForgetFailedStreams(SE_StreamExecutor* executor);
123 bool TpuExecutor_SynchronizeAllActivity(SE_StreamExecutor* executor);
124 
125 void TpuExecutor_UnloadAllPrograms(SE_StreamExecutor* executor,
126                                    TF_Status* status);
127 void TpuExecutor_EnqueueCompactionOnStreamForHbm(SE_StreamExecutor* executor,
128                                                  SE_Stream* compaction_stream,
129                                                  TF_Status* status);
130 
131 SE_Stream* TpuStream_New(SE_StreamExecutor* parent);
132 void TpuStream_Free(SE_Stream*);
133 void* TpuStream_Stream(SE_Stream*);
134 bool TpuStream_Status(SE_Stream*);
135 bool TpuStream_IsSameSharedMemoryLocation(SE_Stream*, SE_Stream*);
136 void TpuStream_EnqueueTransferHostToDevice(SE_Stream* stream,
137                                            SE_DeviceMemoryBase device_dst,
138                                            void* host_src, uint64_t size,
139                                            TF_Status* status);
140 void TpuStream_EnqueueTransferDeviceToHost(SE_Stream* stream,
141                                            SE_DeviceMemoryBase device_src,
142                                            void* host_dst, uint64_t size,
143                                            TF_Status* status);
144 void TpuStream_TpuEnqueueOnDeviceSendRecvLocal(SE_Stream* stream,
145                                                SE_DeviceMemoryBase send_buffer,
146                                                SE_DeviceMemoryBase recv_buffer,
147                                                TF_Status* status);
148 
149 SE_Event* TpuEvent_New(SE_StreamExecutor* parent);
150 void TpuEvent_Free(SE_Event*);
151 
152 SE_Timer* TpuTimer_New(SE_StreamExecutor* parent);
153 void TpuTimer_Free(SE_Timer*);
154 int64_t TpuTimer_Nanoseconds(SE_Timer*);
155 int64_t TpuTimer_Microseconds(SE_Timer*);
156 
157 TF_Status* TpuStatus_New();
158 TF_Status* TpuStatus_Create(int32_t code, const char* msg);
159 void TpuStatus_Set(TF_Status* status, int32_t code, const char* msg,
160                    int32_t len);
161 void TpuStatus_Free(TF_Status* status);
162 const char* TpuStatus_Message(TF_Status* status);
163 int TpuStatus_Code(TF_Status* status);
164 bool TpuStatus_Ok(TF_Status* status);
165 
166 SE_StreamExecutorConfig* TpuStreamExecutorConfig_Default();
167 void TpuStreamExecutorConfig_SetOrdinal(SE_StreamExecutorConfig*, int ordinal);
168 void TpuStreamExecutorConfig_Free(SE_StreamExecutorConfig*);
169 
170 SE_DeviceDescription* TpuDeviceDescription_New();
171 void TpuDeviceDescription_Free(SE_DeviceDescription* description);
172 void TpuExecutor_CreateDeviceDescription(SE_StreamExecutor* executor,
173                                          SE_DeviceDescription* description,
174                                          TF_Status* status);
175 
176 SE_DeviceOptions* TpuExecutor_NewDeviceOptions(unsigned flags);
177 void TpuExecutor_FreeDeviceOptions(SE_DeviceOptions* options);
178 
179 bool TpuExecutor_HostCallback(SE_StreamExecutor* executor, SE_Stream* stream,
180                               SE_StatusCallbackFn callback_fn, void* ctx);
181 
182 XLA_TransferManager* TpuTransferManager_New();
183 void TpuTransferManager_Free(XLA_TransferManager* manager);
184 SE_PlatformId TpuTransferManager_PlatformId(XLA_TransferManager* manager);
185 void TpuTransferManager_HostShapeToDeviceShape(XLA_TransferManager* manager,
186                                                XLA_Shape* host_shape,
187                                                XLA_Shape* device_shape);
188 void TpuTransferManager_TransferLiteralToDeviceAsync(
189     XLA_TransferManager* manager, SE_Stream* stream, XLA_Literal* literal,
190     XLA_ShapedBuffer* device_buffer, TF_Status* status);
191 void TpuTransferManager_TransferLiteralFromDevice(
192     XLA_TransferManager* manager, SE_Stream* stream,
193     XLA_ShapedBuffer* device_buffer, XLA_Literal* literal,
194     XLA_StatusCallbackFn callback, void* ctx);
195 int64_t TpuTransferManager_GetByteSizeRequirement(XLA_TransferManager* manager,
196                                                   XLA_Shape* shape);
197 void TpuTransferManager_ChooseCompactLayoutForShape(
198     XLA_TransferManager* manager, XLA_Shape* host_shape, XLA_Shape* output,
199     TF_Status* status);
200 bool TpuTransferManager_CanShapedBufferBeAccessedNow(
201     XLA_TransferManager* manager, SE_StreamExecutor* executor,
202     XLA_ShapedBuffer* device_buffer);
203 bool TpuTransferManager_CanBufferBeAccessedNow(
204     XLA_TransferManager* manager, SE_StreamExecutor* executor,
205     SE_DeviceMemoryBase* device_buffer);
206 void TpuTransferManager_WriteSingleTupleIndexTable(
207     XLA_TransferManager* manager, SE_Stream* stream,
208     SE_DeviceMemoryBase* elements, size_t elements_len, XLA_Shape* shape,
209     SE_DeviceMemoryBase* region, TF_Status* status);
210 void TpuTransferManager_GetInfeedLayout(XLA_Shape* shape,
211                                         XLA_Shape* infeed_shape);
212 void TpuTransferManager_LinearizeToBuffers(
213     XLA_TransferManager* manager, XLA_Literal* c_literal, char*** buffers_array,
214     int64_t** buffers_size, int64_t* buffers_array_size, TF_Status* status);
215 void TpuTransferManager_FreeBuffers(char** buffers_array, int64_t* buffers_size,
216                                     int64_t buffers_array_size);
217 void TpuTransferManager_TransferLiteralToInfeed(XLA_TransferManager* manager,
218                                                 SE_StreamExecutor* executor,
219                                                 XLA_Literal* c_literal,
220                                                 TF_Status* status);
221 void TpuTransferManager_TransferBuffersToInfeed(XLA_TransferManager* manager,
222                                                 SE_StreamExecutor* executor,
223                                                 uint32_t** buffers_array,
224                                                 int64_t* buffers_size_in_uint32,
225                                                 int64_t buffers_array_size,
226                                                 TF_Status* status);
227 void TpuTransferManager_TransferLiteralFromOutfeed(
228     XLA_TransferManager* manager, SE_StreamExecutor* executor,
229     XLA_Shape* shape /*deprecated*/, XLA_Literal* c_literal, TF_Status* status);
230 void TpuTransferManager_ResetDevices(XLA_TransferManager* manager,
231                                      SE_StreamExecutor** executors,
232                                      int64_t num_executors, TF_Status* status);
233 void TpuTransferManager_ReadDynamicShapes(SE_Stream* stream,
234                                           XLA_ShapedBuffer* buffer,
235                                           const XLA_Shape& original_shape,
236                                           XLA_Shape* updated_shape,
237                                           TF_Status* status);
238 
239 XLA_ComputationPlacer* TpuComputationPlacer_New();
240 void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer);
241 // `assignment` should be a preallocated array of size `replicate_count` *
242 // `computation_count`. The assignment will be constructed as a 2D array where
243 // assignment[replica][computation] = device_id.
244 void TpuComputationPlacer_AssignDevices(XLA_ComputationPlacer* placer,
245                                         int replica_count,
246                                         int computation_count, int* assignment,
247                                         TF_Status* status);
248 void TpuComputationPlacer_AssignLocalDevices(SE_TpuTopology_Host* host,
249                                              int replica_count,
250                                              int computation_count,
251                                              int* assignment,
252                                              TF_Status* status);
253 
254 int TpuTopology_LogicalDevicesPerHost(SE_TpuTopology* tpu_topology,
255                                       TpuCoreTypeEnum tpu_core_type);
256 int TpuTopology_LogicalDevicesPerChip(SE_TpuTopology* tpu_topology,
257                                       TpuCoreTypeEnum tpu_core_type);
258 int TpuTopology_HostCount(SE_TpuTopology* tpu_topology);
259 int TpuTopology_ChipsPerHost(SE_TpuTopology* tpu_topology);
260 
261 int TpuTopology_ChipBounds_X(SE_TpuTopology* tpu_topology);
262 int TpuTopology_ChipBounds_Y(SE_TpuTopology* tpu_topology);
263 int TpuTopology_ChipBounds_Z(SE_TpuTopology* tpu_topology);
264 bool TpuTopology_HasChip(SE_TpuTopology* tpu_topology, int x, int y, int z);
265 SE_TpuTopology_Core* TpuTopology_CoreForId(SE_TpuTopology* tpu_topology,
266                                            TpuCoreTypeEnum tpu_core_type,
267                                            int id);
268 SE_TpuTopology_Core* TpuTopology_Core(SE_TpuTopology* tpu_topology,
269                                       TpuCoreTypeEnum tpu_core_type, int x,
270                                       int y, int z, int index);
271 int TpuTopology_NumCores(SE_TpuTopology* tpu_topology,
272                          TpuCoreTypeEnum tpu_core_type);
273 // 'cores' should be a preallocated array of size TpuTopology_NumCores.
274 void TpuTopology_Cores(SE_TpuTopology* tpu_topology,
275                        TpuCoreTypeEnum tpu_core_type,
276                        SE_TpuTopology_Core** cores);
277 int TpuTopology_IdForHost(SE_TpuTopology* tpu_topology, int x, int y, int z);
278 TpuVersionEnum TpuTopology_Version(SE_TpuTopology* tpu_topology);
279 void TpuCoreLocation_ChipCoordinates(SE_TpuTopology_Core* tpu_core_location,
280                                      int* x, int* y, int* z);
281 void TpuCoreLocation_HostCoordinates(SE_TpuTopology_Core* tpu_core_location,
282                                      int* x, int* y, int* z);
283 int TpuCoreLocation_Index(SE_TpuTopology_Core* tpu_core_location);
284 int TpuCoreLocation_Id(SE_TpuTopology_Core* tpu_core_location);
285 
286 int TpuHostLocation_Id(SE_TpuTopology_Host* tpu_host_location);
287 int TpuHostLocation_NumCores(SE_TpuTopology_Host* tpu_host_location,
288                              TpuCoreTypeEnum tpu_core_type);
289 // 'cores' should be a preallocated array of size TpuHostLocation_NumCores.
290 void TpuHostLocation_Cores(SE_TpuTopology_Host* tpu_host_location,
291                            TpuCoreTypeEnum tpu_core_type,
292                            SE_TpuTopology_Core** cores);
293 
294 // Async collective offloading.
295 void TpuAsyncCollectiveOffloadHelper_Init();
296 void TpuAsyncCollectiveOffloadHelper_Shutdown();
297 
298 // C API for XLA::Compiler interface
299 
300 TFTPU_CAPI_EXPORT Tpu_Compiler* TpuCompiler_New();
301 TFTPU_CAPI_EXPORT void TpuCompiler_Free(Tpu_Compiler* compiler);
302 
303 TFTPU_CAPI_EXPORT void TpuCompiler_RunHloPasses(
304     Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module,
305     SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator,
306     XLA_HloModule* result, TF_Status* status);
307 
308 TFTPU_CAPI_EXPORT void TpuCompiler_RunBackend(
309     Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module,
310     SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator,
311     SE_Executable** result, TF_Status* status);
312 
313 TFTPU_CAPI_EXPORT void TpuCompiler_Compile(
314     Tpu_Compiler* compiler, XLA_HloModuleGroup* se_hlo_module_group,
315     SE_StreamExecutorList* stream_exec_lists, int num_lists,
316     SE_DeviceMemoryAllocator* allocator, SE_Executable** executables,
317     TF_Status* status);
318 
319 TFTPU_CAPI_EXPORT int64_t TpuCompiler_ShapeSize(Tpu_Compiler* compiler,
320                                                 XLA_Shape* c_shape);
321 
322 TFTPU_CAPI_EXPORT void TpuCompiler_DefaultDeviceShapeRepresentation(
323     Tpu_Compiler* compiler, XLA_Shape* host_shape, XLA_Shape* device_shape);
324 
325 TFTPU_CAPI_EXPORT void TpuExecutable_ExecuteAsyncOnStream(
326     SE_Executable* executable, SE_ExecutableRunOptions* se_options,
327     SE_ExecutionInput** se_arguments, int se_arguments_size,
328     SE_HloExecutionProfile* hlo_execution_profile,
329     SE_ExecutionOutput* se_output, TF_Status* status);
330 
331 // This frees the XLA_ShapeIndex* array allocated when se_output is returned by
332 // TpuExecutable_ExecuteAsyncOnStream.
333 TFTPU_CAPI_EXPORT void TpuExecutable_FreeXlaShapeIndexArray(
334     XLA_ShapeIndex* array);
335 
336 // This frees the SE_MaybeOwningDeviceMemory* array allocated when se_output is
337 // returned by TpuExecutable_ExecuteAsyncOnStream.
338 // Note that this only frees the heap-allocated array itself, and does not
339 // free any of the underlying device memory.
340 TFTPU_CAPI_EXPORT void TpuExecutable_FreeMaybeOwningDeviceMemoryArray(
341     SE_MaybeOwningDeviceMemory* array);
342 
343 TFTPU_CAPI_EXPORT void TpuExecutable_Fingerprint(SE_Executable* executable,
344                                                  const char** fingerprint,
345                                                  size_t* size);
346 
347 // The serialization format is not guaranteed to be stable over time and has no
348 // compatibility guarantees (i.e. this is not a suitable long-term storage
349 // format). TpuExecutableSerialize_FreeHandle should be called after 'handle' is
350 // no longer needed. 'handle' is set to nullptr on error.
351 TFTPU_CAPI_EXPORT void TpuExecutable_Serialize(
352     SE_Executable* executable, SE_ExecutableSerializationHandle** handle,
353     TF_Status* status);
354 
355 // Returns the size of the serialized executable in bytes, i.e. the size of the
356 // array that should be passed to TpuExecutableSerialize_WriteToArray. `handle`
357 // must be non-null.
358 TFTPU_CAPI_EXPORT size_t
359 TpuExecutableSerialize_GetByteSize(SE_ExecutableSerializationHandle* handle);
360 
361 // Writes the serialized executable to `serialized`, which must be of size
362 // `serialized_size`. `serialized_size` should must be at least
363 // `TpuExecutableSerialize_GetByteSize(handle)`. `handle` must be non-null.
364 TFTPU_CAPI_EXPORT void TpuExecutableSerialize_WriteToArray(
365     SE_ExecutableSerializationHandle* handle, int serialized_size,
366     uint8_t* serialized, TF_Status* status);
367 
368 // Safe to call if 'handle' is null.
369 TFTPU_CAPI_EXPORT void TpuExecutableSerialize_FreeHandle(
370     SE_ExecutableSerializationHandle* handle);
371 
372 TFTPU_CAPI_EXPORT void TpuExecutable_Deserialize(int serialized_size,
373                                                  const uint8_t* serialized,
374                                                  SE_Executable** executable,
375                                                  TF_Status* status);
376 
377 // Caller is responsible for freeing the returned module's proto and its
378 // config's proto.
379 TFTPU_CAPI_EXPORT XLA_HloModule
380 TpuExecutable_HloModule(SE_Executable* executable);
381 
382 TFTPU_CAPI_EXPORT void TpuExecutable_Free(SE_Executable*);
383 
384 // Converts an XLA `Shape` into its equivalent TPU `Shape` representation.
385 TFTPU_CAPI_EXPORT void XlaShapeToTpuShapeRepresentation(
386     XLA_Shape* serialized_xla_shape, int data_type, bool use_fast_memory,
387     XLA_Shape* serialized_tpu_shape, TF_Status* status);
388 
389 TFTPU_CAPI_EXPORT void XlaShapeToTpuPaddedShape(XLA_Shape* serialized_xla_shape,
390                                                 XLA_Shape* padded_shape,
391                                                 TF_Status* status);
392 
393 struct TfTpu_ExecutorApiFn {
394   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_New);
395   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Free);
396   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialize);
397   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialized);
398   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetExecutor);
399   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Id);
400   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_VisibleDeviceCount);
401   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_TpuMemoryLimit);
402   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy);
403   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetTopologyPtr);
404   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetHostLocation);
405   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetRuntimeVersion);
406 
407   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Init);
408   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Free);
409   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PlatformDeviceCount);
410   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Allocate);
411   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Deallocate);
412   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetAllocatorStats);
413   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeviceMemoryUsage);
414   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateStream);
415   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateStream);
416   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateStreamDependency);
417   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetStatus);
418   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetCoreLocation);
419   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateEvent);
420   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateEvent);
421   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PollForEventStatus);
422   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_RecordEvent);
423   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForEvent);
424   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateTimer);
425   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateTimer);
426   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StartTimer);
427   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StopTimer);
428   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyToHost);
429   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyFromHost);
430   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyToHost);
431   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyFromHost);
432   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_EnqueueInfeed);
433   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DequeueOutfeed);
434   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForInfeedReady);
435   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForOutfeedReady);
436   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockHostUntilDone);
437   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockUntilDoneOrFailed);
438   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SyncAndForgetFailedStreams);
439   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronizeAllActivity);
440   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_UnloadAllPrograms);
441   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_EnqueueCompactionOnStreamForHbm);
442 
443   TFTPU_ADD_FN_IN_STRUCT(TpuStream_New);
444   TFTPU_ADD_FN_IN_STRUCT(TpuStream_Free);
445   TFTPU_ADD_FN_IN_STRUCT(TpuStream_Stream);
446   TFTPU_ADD_FN_IN_STRUCT(TpuStream_Status);
447   TFTPU_ADD_FN_IN_STRUCT(TpuStream_IsSameSharedMemoryLocation);
448   TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferHostToDevice);
449   TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferDeviceToHost);
450   TFTPU_ADD_FN_IN_STRUCT(TpuStream_TpuEnqueueOnDeviceSendRecvLocal);
451 
452   TFTPU_ADD_FN_IN_STRUCT(TpuEvent_New);
453   TFTPU_ADD_FN_IN_STRUCT(TpuEvent_Free);
454 
455   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_New);
456   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Free);
457   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Nanoseconds);
458   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Microseconds);
459 
460   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_New);
461   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Create);
462   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Set);
463   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Free);
464   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Message);
465   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Code);
466   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Ok);
467 
468   TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Default);
469   TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_SetOrdinal);
470   TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Free);
471 
472   TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_New);
473   TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_Free);
474 
475   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateDeviceDescription);
476   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_NewDeviceOptions);
477   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_FreeDeviceOptions);
478   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_HostCallback);
479 
480   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_New);
481   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_Free);
482   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_PlatformId);
483   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_HostShapeToDeviceShape);
484   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToDeviceAsync);
485   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromDevice);
486   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetByteSizeRequirement);
487   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ChooseCompactLayoutForShape);
488   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanShapedBufferBeAccessedNow);
489   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanBufferBeAccessedNow);
490   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_WriteSingleTupleIndexTable);
491   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetInfeedLayout);
492   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_LinearizeToBuffers);
493   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_FreeBuffers);
494   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToInfeed);
495   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferBuffersToInfeed);
496   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromOutfeed);
497   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ResetDevices);
498   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ReadDynamicShapes);
499 
500   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_New);
501   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_Free);
502   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignDevices);
503   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignLocalDevices);
504 
505   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerHost);
506   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerChip);
507   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HostCount);
508   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipsPerHost);
509 
510   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_X);
511   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Y);
512   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Z);
513   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HasChip);
514   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_CoreForId);
515   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Core);
516   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_NumCores);
517   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Cores);
518   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_IdForHost);
519   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Version);
520 
521   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates);
522   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_HostCoordinates);
523   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Index);
524   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Id);
525 
526   TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Id);
527   TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_NumCores);
528   TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Cores);
529 
530   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_New);
531   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Free);
532   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunHloPasses);
533   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunBackend);
534   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Compile);
535   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_ShapeSize);
536   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_DefaultDeviceShapeRepresentation);
537 
538   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_ExecuteAsyncOnStream);
539   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeXlaShapeIndexArray);
540   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeMaybeOwningDeviceMemoryArray);
541   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Fingerprint);
542   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Serialize);
543   TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_GetByteSize);
544   TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_WriteToArray);
545   TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_FreeHandle);
546   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Deserialize);
547   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_HloModule);
548   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Free);
549 
550   TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuShapeRepresentation);
551   TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuPaddedShape);
552 
553   TFTPU_ADD_FN_IN_STRUCT(TpuAsyncCollectiveOffloadHelper_Init);
554   TFTPU_ADD_FN_IN_STRUCT(TpuAsyncCollectiveOffloadHelper_Shutdown);
555 };
556 }
557 
558 // extern "C"
559 
560 #endif  // TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_
561