1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ 17 #define TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ 18 19 #include <stddef.h> 20 #include <stdint.h> 21 22 #include "tensorflow/c/tf_attrtype.h" 23 #include "tensorflow/compiler/xla/stream_executor/tpu/c_api_decl.h" 24 #include "tensorflow/core/tpu/libtftpu.h" 25 26 extern "C" { 27 28 SE_Platform* TpuPlatform_New(); 29 void TpuPlatform_Free(SE_Platform* platform); 30 void TpuPlatform_Initialize(SE_Platform* platform, size_t options_size, 31 const char** options_key, 32 const char** options_value, TF_Status* status); 33 bool TpuPlatform_Initialized(SE_Platform* platform); 34 SE_StreamExecutor* TpuPlatform_GetExecutor(SE_Platform* platform, 35 SE_StreamExecutorConfig* config, 36 TF_Status* status); 37 SE_PlatformId TpuPlatform_Id(SE_Platform* platform); 38 int64_t TpuPlatform_VisibleDeviceCount(SE_Platform* platform); 39 int64_t TpuPlatform_TpuMemoryLimit(SE_Platform* platform); 40 bool TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy(SE_Platform* platform); 41 SE_TpuTopology* TpuPlatform_GetTopologyPtr(SE_Platform* platform); 42 SE_TpuTopology_Host* TpuPlatform_GetHostLocation(SE_Platform* platform); 43 TpuRuntimeVersion TpuPlatform_GetRuntimeVersion(SE_Platform* platform); 44 45 void TpuExecutor_Init(SE_StreamExecutor* executor, int device_ordinal, 46 SE_DeviceOptions* device_options, TF_Status* status); 47 void TpuExecutor_Free(SE_StreamExecutor* executor); 48 49 int TpuExecutor_PlatformDeviceCount(SE_StreamExecutor* executor); 50 51 SE_DeviceMemoryBase TpuExecutor_Allocate(SE_StreamExecutor* executor, 52 uint64_t size, int64_t memory_space); 53 void TpuExecutor_Deallocate(SE_StreamExecutor* executor, 54 SE_DeviceMemoryBase* memory); 55 bool TpuExecutor_GetAllocatorStats(SE_StreamExecutor* executor, 56 SE_AllocatorStats* stats); 57 bool TpuExecutor_DeviceMemoryUsage(SE_StreamExecutor* executor, int64_t* free, 58 int64_t* total); 59 60 bool TpuExecutor_AllocateStream(SE_StreamExecutor* executor, SE_Stream* stream); 61 void TpuExecutor_DeallocateStream(SE_StreamExecutor* executor, 62 SE_Stream* stream); 63 bool TpuExecutor_CreateStreamDependency(SE_StreamExecutor* executor, 64 SE_Stream* dependent, SE_Stream* other); 65 void TpuExecutor_GetStatus(SE_StreamExecutor* executor, SE_Stream* stream, 66 TF_Status* status); 67 68 SE_TpuTopology_Core* TpuExecutor_GetCoreLocation(SE_StreamExecutor* executor); 69 70 void TpuExecutor_AllocateEvent(SE_StreamExecutor* executor, SE_Event* event, 71 TF_Status* status); 72 void TpuExecutor_DeallocateEvent(SE_StreamExecutor* executor, SE_Event* event, 73 TF_Status* status); 74 int TpuExecutor_PollForEventStatus(SE_StreamExecutor* executor, 75 SE_Event* event); 76 void TpuExecutor_RecordEvent(SE_StreamExecutor* executor, SE_Stream* stream, 77 SE_Event* event, TF_Status* status); 78 void TpuExecutor_WaitForEvent(SE_StreamExecutor* executor, SE_Stream* stream, 79 SE_Event* event, TF_Status* status); 80 81 bool TpuExecutor_AllocateTimer(SE_StreamExecutor* executor, SE_Timer* timer); 82 void TpuExecutor_DeallocateTimer(SE_StreamExecutor* executor, SE_Timer* timer); 83 bool TpuExecutor_StartTimer(SE_StreamExecutor* executor, SE_Stream* stream, 84 SE_Timer* timer); 85 bool TpuExecutor_StopTimer(SE_StreamExecutor* executor, SE_Stream* stream, 86 SE_Timer* timer); 87 88 void TpuExecutor_SynchronousMemcpyToHost(SE_StreamExecutor* executor, 89 void* host_dst, 90 const SE_DeviceMemoryBase* device_src, 91 uint64_t size, TF_Status* status); 92 void TpuExecutor_SynchronousMemcpyFromHost(SE_StreamExecutor* executor, 93 SE_DeviceMemoryBase* device_dst, 94 const void* host_src, uint64_t size, 95 TF_Status* status); 96 bool TpuExecutor_MemcpyToHost(SE_StreamExecutor* executor, SE_Stream* stream, 97 void* host_dst, 98 const SE_DeviceMemoryBase* device_src, 99 uint64_t size); 100 101 bool TpuExecutor_MemcpyFromHost(SE_StreamExecutor* executor, SE_Stream* stream, 102 SE_DeviceMemoryBase* device_dst, 103 const void* host_src, uint64_t size); 104 105 void TpuExecutor_EnqueueInfeed(SE_StreamExecutor* executor, 106 int32_t infeed_queue_index, const uint8_t* data, 107 int64_t size, TF_Status* status); 108 void TpuExecutor_DequeueOutfeed(SE_StreamExecutor* executor, 109 int32_t outfeed_queue_index, uint8_t* data, 110 int64_t size, TF_Status* status); 111 void TpuExecutor_WaitForInfeedReady(SE_StreamExecutor* executor, 112 int32_t infeed_queue_index, 113 TF_Status* status); 114 void TpuExecutor_WaitForOutfeedReady(SE_StreamExecutor* executor, 115 int32_t outfeed_queue_index, 116 TF_Status* status); 117 118 void TpuExecutor_BlockHostUntilDone(SE_StreamExecutor* executor, 119 SE_Stream* stream, TF_Status* status); 120 void TpuExecutor_BlockUntilDoneOrFailed(SE_StreamExecutor* executor, 121 TF_Status* status); 122 void TpuExecutor_SyncAndForgetFailedStreams(SE_StreamExecutor* executor); 123 bool TpuExecutor_SynchronizeAllActivity(SE_StreamExecutor* executor); 124 125 void TpuExecutor_UnloadAllPrograms(SE_StreamExecutor* executor, 126 TF_Status* status); 127 void TpuExecutor_EnqueueCompactionOnStreamForHbm(SE_StreamExecutor* executor, 128 SE_Stream* compaction_stream, 129 TF_Status* status); 130 131 SE_Stream* TpuStream_New(SE_StreamExecutor* parent); 132 void TpuStream_Free(SE_Stream*); 133 void* TpuStream_Stream(SE_Stream*); 134 bool TpuStream_Status(SE_Stream*); 135 bool TpuStream_IsSameSharedMemoryLocation(SE_Stream*, SE_Stream*); 136 void TpuStream_EnqueueTransferHostToDevice(SE_Stream* stream, 137 SE_DeviceMemoryBase device_dst, 138 void* host_src, uint64_t size, 139 TF_Status* status); 140 void TpuStream_EnqueueTransferDeviceToHost(SE_Stream* stream, 141 SE_DeviceMemoryBase device_src, 142 void* host_dst, uint64_t size, 143 TF_Status* status); 144 void TpuStream_TpuEnqueueOnDeviceSendRecvLocal(SE_Stream* stream, 145 SE_DeviceMemoryBase send_buffer, 146 SE_DeviceMemoryBase recv_buffer, 147 TF_Status* status); 148 149 SE_Event* TpuEvent_New(SE_StreamExecutor* parent); 150 void TpuEvent_Free(SE_Event*); 151 152 SE_Timer* TpuTimer_New(SE_StreamExecutor* parent); 153 void TpuTimer_Free(SE_Timer*); 154 int64_t TpuTimer_Nanoseconds(SE_Timer*); 155 int64_t TpuTimer_Microseconds(SE_Timer*); 156 157 TF_Status* TpuStatus_New(); 158 TF_Status* TpuStatus_Create(int32_t code, const char* msg); 159 void TpuStatus_Set(TF_Status* status, int32_t code, const char* msg, 160 int32_t len); 161 void TpuStatus_Free(TF_Status* status); 162 const char* TpuStatus_Message(TF_Status* status); 163 int TpuStatus_Code(TF_Status* status); 164 bool TpuStatus_Ok(TF_Status* status); 165 166 SE_StreamExecutorConfig* TpuStreamExecutorConfig_Default(); 167 void TpuStreamExecutorConfig_SetOrdinal(SE_StreamExecutorConfig*, int ordinal); 168 void TpuStreamExecutorConfig_Free(SE_StreamExecutorConfig*); 169 170 SE_DeviceDescription* TpuDeviceDescription_New(); 171 void TpuDeviceDescription_Free(SE_DeviceDescription* description); 172 void TpuExecutor_CreateDeviceDescription(SE_StreamExecutor* executor, 173 SE_DeviceDescription* description, 174 TF_Status* status); 175 176 SE_DeviceOptions* TpuExecutor_NewDeviceOptions(unsigned flags); 177 void TpuExecutor_FreeDeviceOptions(SE_DeviceOptions* options); 178 179 bool TpuExecutor_HostCallback(SE_StreamExecutor* executor, SE_Stream* stream, 180 SE_StatusCallbackFn callback_fn, void* ctx); 181 182 XLA_TransferManager* TpuTransferManager_New(); 183 void TpuTransferManager_Free(XLA_TransferManager* manager); 184 SE_PlatformId TpuTransferManager_PlatformId(XLA_TransferManager* manager); 185 void TpuTransferManager_HostShapeToDeviceShape(XLA_TransferManager* manager, 186 XLA_Shape* host_shape, 187 XLA_Shape* device_shape); 188 void TpuTransferManager_TransferLiteralToDeviceAsync( 189 XLA_TransferManager* manager, SE_Stream* stream, XLA_Literal* literal, 190 XLA_ShapedBuffer* device_buffer, TF_Status* status); 191 void TpuTransferManager_TransferLiteralFromDevice( 192 XLA_TransferManager* manager, SE_Stream* stream, 193 XLA_ShapedBuffer* device_buffer, XLA_Literal* literal, 194 XLA_StatusCallbackFn callback, void* ctx); 195 int64_t TpuTransferManager_GetByteSizeRequirement(XLA_TransferManager* manager, 196 XLA_Shape* shape); 197 void TpuTransferManager_ChooseCompactLayoutForShape( 198 XLA_TransferManager* manager, XLA_Shape* host_shape, XLA_Shape* output, 199 TF_Status* status); 200 bool TpuTransferManager_CanShapedBufferBeAccessedNow( 201 XLA_TransferManager* manager, SE_StreamExecutor* executor, 202 XLA_ShapedBuffer* device_buffer); 203 bool TpuTransferManager_CanBufferBeAccessedNow( 204 XLA_TransferManager* manager, SE_StreamExecutor* executor, 205 SE_DeviceMemoryBase* device_buffer); 206 void TpuTransferManager_WriteSingleTupleIndexTable( 207 XLA_TransferManager* manager, SE_Stream* stream, 208 SE_DeviceMemoryBase* elements, size_t elements_len, XLA_Shape* shape, 209 SE_DeviceMemoryBase* region, TF_Status* status); 210 void TpuTransferManager_GetInfeedLayout(XLA_Shape* shape, 211 XLA_Shape* infeed_shape); 212 void TpuTransferManager_LinearizeToBuffers( 213 XLA_TransferManager* manager, XLA_Literal* c_literal, char*** buffers_array, 214 int64_t** buffers_size, int64_t* buffers_array_size, TF_Status* status); 215 void TpuTransferManager_FreeBuffers(char** buffers_array, int64_t* buffers_size, 216 int64_t buffers_array_size); 217 void TpuTransferManager_TransferLiteralToInfeed(XLA_TransferManager* manager, 218 SE_StreamExecutor* executor, 219 XLA_Literal* c_literal, 220 TF_Status* status); 221 void TpuTransferManager_TransferBuffersToInfeed(XLA_TransferManager* manager, 222 SE_StreamExecutor* executor, 223 uint32_t** buffers_array, 224 int64_t* buffers_size_in_uint32, 225 int64_t buffers_array_size, 226 TF_Status* status); 227 void TpuTransferManager_TransferLiteralFromOutfeed( 228 XLA_TransferManager* manager, SE_StreamExecutor* executor, 229 XLA_Shape* shape /*deprecated*/, XLA_Literal* c_literal, TF_Status* status); 230 void TpuTransferManager_ResetDevices(XLA_TransferManager* manager, 231 SE_StreamExecutor** executors, 232 int64_t num_executors, TF_Status* status); 233 void TpuTransferManager_ReadDynamicShapes(SE_Stream* stream, 234 XLA_ShapedBuffer* buffer, 235 const XLA_Shape& original_shape, 236 XLA_Shape* updated_shape, 237 TF_Status* status); 238 239 XLA_ComputationPlacer* TpuComputationPlacer_New(); 240 void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer); 241 // `assignment` should be a preallocated array of size `replicate_count` * 242 // `computation_count`. The assignment will be constructed as a 2D array where 243 // assignment[replica][computation] = device_id. 244 void TpuComputationPlacer_AssignDevices(XLA_ComputationPlacer* placer, 245 int replica_count, 246 int computation_count, int* assignment, 247 TF_Status* status); 248 void TpuComputationPlacer_AssignLocalDevices(SE_TpuTopology_Host* host, 249 int replica_count, 250 int computation_count, 251 int* assignment, 252 TF_Status* status); 253 254 int TpuTopology_LogicalDevicesPerHost(SE_TpuTopology* tpu_topology, 255 TpuCoreTypeEnum tpu_core_type); 256 int TpuTopology_LogicalDevicesPerChip(SE_TpuTopology* tpu_topology, 257 TpuCoreTypeEnum tpu_core_type); 258 int TpuTopology_HostCount(SE_TpuTopology* tpu_topology); 259 int TpuTopology_ChipsPerHost(SE_TpuTopology* tpu_topology); 260 261 int TpuTopology_ChipBounds_X(SE_TpuTopology* tpu_topology); 262 int TpuTopology_ChipBounds_Y(SE_TpuTopology* tpu_topology); 263 int TpuTopology_ChipBounds_Z(SE_TpuTopology* tpu_topology); 264 bool TpuTopology_HasChip(SE_TpuTopology* tpu_topology, int x, int y, int z); 265 SE_TpuTopology_Core* TpuTopology_CoreForId(SE_TpuTopology* tpu_topology, 266 TpuCoreTypeEnum tpu_core_type, 267 int id); 268 SE_TpuTopology_Core* TpuTopology_Core(SE_TpuTopology* tpu_topology, 269 TpuCoreTypeEnum tpu_core_type, int x, 270 int y, int z, int index); 271 int TpuTopology_NumCores(SE_TpuTopology* tpu_topology, 272 TpuCoreTypeEnum tpu_core_type); 273 // 'cores' should be a preallocated array of size TpuTopology_NumCores. 274 void TpuTopology_Cores(SE_TpuTopology* tpu_topology, 275 TpuCoreTypeEnum tpu_core_type, 276 SE_TpuTopology_Core** cores); 277 int TpuTopology_IdForHost(SE_TpuTopology* tpu_topology, int x, int y, int z); 278 TpuVersionEnum TpuTopology_Version(SE_TpuTopology* tpu_topology); 279 void TpuCoreLocation_ChipCoordinates(SE_TpuTopology_Core* tpu_core_location, 280 int* x, int* y, int* z); 281 void TpuCoreLocation_HostCoordinates(SE_TpuTopology_Core* tpu_core_location, 282 int* x, int* y, int* z); 283 int TpuCoreLocation_Index(SE_TpuTopology_Core* tpu_core_location); 284 int TpuCoreLocation_Id(SE_TpuTopology_Core* tpu_core_location); 285 286 int TpuHostLocation_Id(SE_TpuTopology_Host* tpu_host_location); 287 int TpuHostLocation_NumCores(SE_TpuTopology_Host* tpu_host_location, 288 TpuCoreTypeEnum tpu_core_type); 289 // 'cores' should be a preallocated array of size TpuHostLocation_NumCores. 290 void TpuHostLocation_Cores(SE_TpuTopology_Host* tpu_host_location, 291 TpuCoreTypeEnum tpu_core_type, 292 SE_TpuTopology_Core** cores); 293 294 // Async collective offloading. 295 void TpuAsyncCollectiveOffloadHelper_Init(); 296 void TpuAsyncCollectiveOffloadHelper_Shutdown(); 297 298 // C API for XLA::Compiler interface 299 300 TFTPU_CAPI_EXPORT Tpu_Compiler* TpuCompiler_New(); 301 TFTPU_CAPI_EXPORT void TpuCompiler_Free(Tpu_Compiler* compiler); 302 303 TFTPU_CAPI_EXPORT void TpuCompiler_RunHloPasses( 304 Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module, 305 SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator, 306 XLA_HloModule* result, TF_Status* status); 307 308 TFTPU_CAPI_EXPORT void TpuCompiler_RunBackend( 309 Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module, 310 SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator, 311 SE_Executable** result, TF_Status* status); 312 313 TFTPU_CAPI_EXPORT void TpuCompiler_Compile( 314 Tpu_Compiler* compiler, XLA_HloModuleGroup* se_hlo_module_group, 315 SE_StreamExecutorList* stream_exec_lists, int num_lists, 316 SE_DeviceMemoryAllocator* allocator, SE_Executable** executables, 317 TF_Status* status); 318 319 TFTPU_CAPI_EXPORT int64_t TpuCompiler_ShapeSize(Tpu_Compiler* compiler, 320 XLA_Shape* c_shape); 321 322 TFTPU_CAPI_EXPORT void TpuCompiler_DefaultDeviceShapeRepresentation( 323 Tpu_Compiler* compiler, XLA_Shape* host_shape, XLA_Shape* device_shape); 324 325 TFTPU_CAPI_EXPORT void TpuExecutable_ExecuteAsyncOnStream( 326 SE_Executable* executable, SE_ExecutableRunOptions* se_options, 327 SE_ExecutionInput** se_arguments, int se_arguments_size, 328 SE_HloExecutionProfile* hlo_execution_profile, 329 SE_ExecutionOutput* se_output, TF_Status* status); 330 331 // This frees the XLA_ShapeIndex* array allocated when se_output is returned by 332 // TpuExecutable_ExecuteAsyncOnStream. 333 TFTPU_CAPI_EXPORT void TpuExecutable_FreeXlaShapeIndexArray( 334 XLA_ShapeIndex* array); 335 336 // This frees the SE_MaybeOwningDeviceMemory* array allocated when se_output is 337 // returned by TpuExecutable_ExecuteAsyncOnStream. 338 // Note that this only frees the heap-allocated array itself, and does not 339 // free any of the underlying device memory. 340 TFTPU_CAPI_EXPORT void TpuExecutable_FreeMaybeOwningDeviceMemoryArray( 341 SE_MaybeOwningDeviceMemory* array); 342 343 TFTPU_CAPI_EXPORT void TpuExecutable_Fingerprint(SE_Executable* executable, 344 const char** fingerprint, 345 size_t* size); 346 347 // The serialization format is not guaranteed to be stable over time and has no 348 // compatibility guarantees (i.e. this is not a suitable long-term storage 349 // format). TpuExecutableSerialize_FreeHandle should be called after 'handle' is 350 // no longer needed. 'handle' is set to nullptr on error. 351 TFTPU_CAPI_EXPORT void TpuExecutable_Serialize( 352 SE_Executable* executable, SE_ExecutableSerializationHandle** handle, 353 TF_Status* status); 354 355 // Returns the size of the serialized executable in bytes, i.e. the size of the 356 // array that should be passed to TpuExecutableSerialize_WriteToArray. `handle` 357 // must be non-null. 358 TFTPU_CAPI_EXPORT size_t 359 TpuExecutableSerialize_GetByteSize(SE_ExecutableSerializationHandle* handle); 360 361 // Writes the serialized executable to `serialized`, which must be of size 362 // `serialized_size`. `serialized_size` should must be at least 363 // `TpuExecutableSerialize_GetByteSize(handle)`. `handle` must be non-null. 364 TFTPU_CAPI_EXPORT void TpuExecutableSerialize_WriteToArray( 365 SE_ExecutableSerializationHandle* handle, int serialized_size, 366 uint8_t* serialized, TF_Status* status); 367 368 // Safe to call if 'handle' is null. 369 TFTPU_CAPI_EXPORT void TpuExecutableSerialize_FreeHandle( 370 SE_ExecutableSerializationHandle* handle); 371 372 TFTPU_CAPI_EXPORT void TpuExecutable_Deserialize(int serialized_size, 373 const uint8_t* serialized, 374 SE_Executable** executable, 375 TF_Status* status); 376 377 // Caller is responsible for freeing the returned module's proto and its 378 // config's proto. 379 TFTPU_CAPI_EXPORT XLA_HloModule 380 TpuExecutable_HloModule(SE_Executable* executable); 381 382 TFTPU_CAPI_EXPORT void TpuExecutable_Free(SE_Executable*); 383 384 // Converts an XLA `Shape` into its equivalent TPU `Shape` representation. 385 TFTPU_CAPI_EXPORT void XlaShapeToTpuShapeRepresentation( 386 XLA_Shape* serialized_xla_shape, int data_type, bool use_fast_memory, 387 XLA_Shape* serialized_tpu_shape, TF_Status* status); 388 389 TFTPU_CAPI_EXPORT void XlaShapeToTpuPaddedShape(XLA_Shape* serialized_xla_shape, 390 XLA_Shape* padded_shape, 391 TF_Status* status); 392 393 struct TfTpu_ExecutorApiFn { 394 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_New); 395 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Free); 396 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialize); 397 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialized); 398 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetExecutor); 399 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Id); 400 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_VisibleDeviceCount); 401 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_TpuMemoryLimit); 402 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy); 403 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetTopologyPtr); 404 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetHostLocation); 405 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetRuntimeVersion); 406 407 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Init); 408 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Free); 409 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PlatformDeviceCount); 410 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Allocate); 411 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Deallocate); 412 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetAllocatorStats); 413 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeviceMemoryUsage); 414 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateStream); 415 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateStream); 416 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateStreamDependency); 417 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetStatus); 418 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetCoreLocation); 419 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateEvent); 420 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateEvent); 421 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PollForEventStatus); 422 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_RecordEvent); 423 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForEvent); 424 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateTimer); 425 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateTimer); 426 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StartTimer); 427 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StopTimer); 428 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyToHost); 429 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyFromHost); 430 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyToHost); 431 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyFromHost); 432 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_EnqueueInfeed); 433 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DequeueOutfeed); 434 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForInfeedReady); 435 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForOutfeedReady); 436 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockHostUntilDone); 437 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockUntilDoneOrFailed); 438 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SyncAndForgetFailedStreams); 439 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronizeAllActivity); 440 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_UnloadAllPrograms); 441 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_EnqueueCompactionOnStreamForHbm); 442 443 TFTPU_ADD_FN_IN_STRUCT(TpuStream_New); 444 TFTPU_ADD_FN_IN_STRUCT(TpuStream_Free); 445 TFTPU_ADD_FN_IN_STRUCT(TpuStream_Stream); 446 TFTPU_ADD_FN_IN_STRUCT(TpuStream_Status); 447 TFTPU_ADD_FN_IN_STRUCT(TpuStream_IsSameSharedMemoryLocation); 448 TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferHostToDevice); 449 TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferDeviceToHost); 450 TFTPU_ADD_FN_IN_STRUCT(TpuStream_TpuEnqueueOnDeviceSendRecvLocal); 451 452 TFTPU_ADD_FN_IN_STRUCT(TpuEvent_New); 453 TFTPU_ADD_FN_IN_STRUCT(TpuEvent_Free); 454 455 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_New); 456 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Free); 457 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Nanoseconds); 458 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Microseconds); 459 460 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_New); 461 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Create); 462 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Set); 463 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Free); 464 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Message); 465 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Code); 466 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Ok); 467 468 TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Default); 469 TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_SetOrdinal); 470 TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Free); 471 472 TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_New); 473 TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_Free); 474 475 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateDeviceDescription); 476 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_NewDeviceOptions); 477 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_FreeDeviceOptions); 478 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_HostCallback); 479 480 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_New); 481 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_Free); 482 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_PlatformId); 483 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_HostShapeToDeviceShape); 484 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToDeviceAsync); 485 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromDevice); 486 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetByteSizeRequirement); 487 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ChooseCompactLayoutForShape); 488 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanShapedBufferBeAccessedNow); 489 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanBufferBeAccessedNow); 490 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_WriteSingleTupleIndexTable); 491 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetInfeedLayout); 492 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_LinearizeToBuffers); 493 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_FreeBuffers); 494 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToInfeed); 495 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferBuffersToInfeed); 496 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromOutfeed); 497 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ResetDevices); 498 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ReadDynamicShapes); 499 500 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_New); 501 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_Free); 502 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignDevices); 503 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignLocalDevices); 504 505 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerHost); 506 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerChip); 507 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HostCount); 508 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipsPerHost); 509 510 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_X); 511 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Y); 512 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Z); 513 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HasChip); 514 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_CoreForId); 515 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Core); 516 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_NumCores); 517 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Cores); 518 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_IdForHost); 519 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Version); 520 521 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates); 522 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_HostCoordinates); 523 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Index); 524 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Id); 525 526 TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Id); 527 TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_NumCores); 528 TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Cores); 529 530 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_New); 531 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Free); 532 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunHloPasses); 533 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunBackend); 534 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Compile); 535 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_ShapeSize); 536 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_DefaultDeviceShapeRepresentation); 537 538 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_ExecuteAsyncOnStream); 539 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeXlaShapeIndexArray); 540 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeMaybeOwningDeviceMemoryArray); 541 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Fingerprint); 542 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Serialize); 543 TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_GetByteSize); 544 TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_WriteToArray); 545 TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_FreeHandle); 546 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Deserialize); 547 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_HloModule); 548 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Free); 549 550 TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuShapeRepresentation); 551 TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuPaddedShape); 552 553 TFTPU_ADD_FN_IN_STRUCT(TpuAsyncCollectiveOffloadHelper_Init); 554 TFTPU_ADD_FN_IN_STRUCT(TpuAsyncCollectiveOffloadHelper_Shutdown); 555 }; 556 } 557 558 // extern "C" 559 560 #endif // TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ 561