1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.tpu.v2alpha1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/longrunning/operations.proto"; 24import "google/protobuf/duration.proto"; 25import "google/protobuf/empty.proto"; 26import "google/protobuf/field_mask.proto"; 27import "google/protobuf/timestamp.proto"; 28import "google/rpc/status.proto"; 29import "google/type/interval.proto"; 30 31option go_package = "cloud.google.com/go/tpu/apiv2alpha1/tpupb;tpupb"; 32option java_multiple_files = true; 33option java_outer_classname = "CloudTpuProto"; 34option java_package = "com.google.cloud.tpu.v2alpha1"; 35 36// Manages TPU nodes and other resources 37// 38// TPU API v2alpha1 39service Tpu { 40 option (google.api.default_host) = "tpu.googleapis.com"; 41 option (google.api.oauth_scopes) = 42 "https://www.googleapis.com/auth/cloud-platform"; 43 44 // Lists nodes. 45 rpc ListNodes(ListNodesRequest) returns (ListNodesResponse) { 46 option (google.api.http) = { 47 get: "/v2alpha1/{parent=projects/*/locations/*}/nodes" 48 }; 49 option (google.api.method_signature) = "parent"; 50 } 51 52 // Gets the details of a node. 53 rpc GetNode(GetNodeRequest) returns (Node) { 54 option (google.api.http) = { 55 get: "/v2alpha1/{name=projects/*/locations/*/nodes/*}" 56 }; 57 option (google.api.method_signature) = "name"; 58 } 59 60 // Creates a node. 61 rpc CreateNode(CreateNodeRequest) returns (google.longrunning.Operation) { 62 option (google.api.http) = { 63 post: "/v2alpha1/{parent=projects/*/locations/*}/nodes" 64 body: "node" 65 }; 66 option (google.api.method_signature) = "parent,node,node_id"; 67 option (google.longrunning.operation_info) = { 68 response_type: "Node" 69 metadata_type: "OperationMetadata" 70 }; 71 } 72 73 // Deletes a node. 74 rpc DeleteNode(DeleteNodeRequest) returns (google.longrunning.Operation) { 75 option (google.api.http) = { 76 delete: "/v2alpha1/{name=projects/*/locations/*/nodes/*}" 77 }; 78 option (google.api.method_signature) = "name"; 79 option (google.longrunning.operation_info) = { 80 response_type: "google.protobuf.Empty" 81 metadata_type: "OperationMetadata" 82 }; 83 } 84 85 // Stops a node. This operation is only available with single TPU nodes. 86 rpc StopNode(StopNodeRequest) returns (google.longrunning.Operation) { 87 option (google.api.http) = { 88 post: "/v2alpha1/{name=projects/*/locations/*/nodes/*}:stop" 89 body: "*" 90 }; 91 option (google.longrunning.operation_info) = { 92 response_type: "Node" 93 metadata_type: "OperationMetadata" 94 }; 95 } 96 97 // Starts a node. 98 rpc StartNode(StartNodeRequest) returns (google.longrunning.Operation) { 99 option (google.api.http) = { 100 post: "/v2alpha1/{name=projects/*/locations/*/nodes/*}:start" 101 body: "*" 102 }; 103 option (google.longrunning.operation_info) = { 104 response_type: "Node" 105 metadata_type: "OperationMetadata" 106 }; 107 } 108 109 // Updates the configurations of a node. 110 rpc UpdateNode(UpdateNodeRequest) returns (google.longrunning.Operation) { 111 option (google.api.http) = { 112 patch: "/v2alpha1/{node.name=projects/*/locations/*/nodes/*}" 113 body: "node" 114 }; 115 option (google.api.method_signature) = "node,update_mask"; 116 option (google.longrunning.operation_info) = { 117 response_type: "Node" 118 metadata_type: "OperationMetadata" 119 }; 120 } 121 122 // Lists queued resources. 123 rpc ListQueuedResources(ListQueuedResourcesRequest) 124 returns (ListQueuedResourcesResponse) { 125 option (google.api.http) = { 126 get: "/v2alpha1/{parent=projects/*/locations/*}/queuedResources" 127 }; 128 option (google.api.method_signature) = "parent"; 129 } 130 131 // Gets details of a queued resource. 132 rpc GetQueuedResource(GetQueuedResourceRequest) returns (QueuedResource) { 133 option (google.api.http) = { 134 get: "/v2alpha1/{name=projects/*/locations/*/queuedResources/*}" 135 }; 136 option (google.api.method_signature) = "name"; 137 } 138 139 // Creates a QueuedResource TPU instance. 140 rpc CreateQueuedResource(CreateQueuedResourceRequest) 141 returns (google.longrunning.Operation) { 142 option (google.api.http) = { 143 post: "/v2alpha1/{parent=projects/*/locations/*}/queuedResources" 144 body: "queued_resource" 145 }; 146 option (google.api.method_signature) = 147 "parent,queued_resource,queued_resource_id"; 148 option (google.longrunning.operation_info) = { 149 response_type: "QueuedResource" 150 metadata_type: "OperationMetadata" 151 }; 152 } 153 154 // Deletes a QueuedResource TPU instance. 155 rpc DeleteQueuedResource(DeleteQueuedResourceRequest) 156 returns (google.longrunning.Operation) { 157 option (google.api.http) = { 158 delete: "/v2alpha1/{name=projects/*/locations/*/queuedResources/*}" 159 }; 160 option (google.api.method_signature) = "name"; 161 option (google.longrunning.operation_info) = { 162 response_type: "QueuedResource" 163 metadata_type: "OperationMetadata" 164 }; 165 } 166 167 // Resets a QueuedResource TPU instance 168 rpc ResetQueuedResource(ResetQueuedResourceRequest) 169 returns (google.longrunning.Operation) { 170 option (google.api.http) = { 171 post: "/v2alpha1/{name=projects/*/locations/*/queuedResources/*}:reset" 172 body: "*" 173 }; 174 option (google.api.method_signature) = "name"; 175 option (google.longrunning.operation_info) = { 176 response_type: "QueuedResource" 177 metadata_type: "OperationMetadata" 178 }; 179 } 180 181 // Generates the Cloud TPU service identity for the project. 182 rpc GenerateServiceIdentity(GenerateServiceIdentityRequest) 183 returns (GenerateServiceIdentityResponse) { 184 option (google.api.http) = { 185 post: "/v2alpha1/{parent=projects/*/locations/*}:generateServiceIdentity" 186 body: "*" 187 }; 188 } 189 190 // Lists accelerator types supported by this API. 191 rpc ListAcceleratorTypes(ListAcceleratorTypesRequest) 192 returns (ListAcceleratorTypesResponse) { 193 option (google.api.http) = { 194 get: "/v2alpha1/{parent=projects/*/locations/*}/acceleratorTypes" 195 }; 196 option (google.api.method_signature) = "parent"; 197 } 198 199 // Gets AcceleratorType. 200 rpc GetAcceleratorType(GetAcceleratorTypeRequest) returns (AcceleratorType) { 201 option (google.api.http) = { 202 get: "/v2alpha1/{name=projects/*/locations/*/acceleratorTypes/*}" 203 }; 204 option (google.api.method_signature) = "name"; 205 } 206 207 // Lists runtime versions supported by this API. 208 rpc ListRuntimeVersions(ListRuntimeVersionsRequest) 209 returns (ListRuntimeVersionsResponse) { 210 option (google.api.http) = { 211 get: "/v2alpha1/{parent=projects/*/locations/*}/runtimeVersions" 212 }; 213 option (google.api.method_signature) = "parent"; 214 } 215 216 // Gets a runtime version. 217 rpc GetRuntimeVersion(GetRuntimeVersionRequest) returns (RuntimeVersion) { 218 option (google.api.http) = { 219 get: "/v2alpha1/{name=projects/*/locations/*/runtimeVersions/*}" 220 }; 221 option (google.api.method_signature) = "name"; 222 } 223 224 // Retrieves the guest attributes for the node. 225 rpc GetGuestAttributes(GetGuestAttributesRequest) 226 returns (GetGuestAttributesResponse) { 227 option (google.api.http) = { 228 post: "/v2alpha1/{name=projects/*/locations/*/nodes/*}:getGuestAttributes" 229 body: "*" 230 }; 231 } 232 233 // Simulates a maintenance event. 234 rpc SimulateMaintenanceEvent(SimulateMaintenanceEventRequest) 235 returns (google.longrunning.Operation) { 236 option (google.api.http) = { 237 post: "/v2alpha1/{name=projects/*/locations/*/nodes/*}:simulateMaintenanceEvent" 238 body: "*" 239 }; 240 option (google.longrunning.operation_info) = { 241 response_type: "Node" 242 metadata_type: "OperationMetadata" 243 }; 244 } 245} 246 247// A guest attributes. 248message GuestAttributes { 249 // The path to be queried. This can be the default namespace ('/') or a 250 // nested namespace ('/\<namespace\>/') or a specified key 251 // ('/\<namespace\>/\<key\>') 252 string query_path = 1; 253 254 // The value of the requested queried path. 255 GuestAttributesValue query_value = 2; 256} 257 258// Array of guest attribute namespace/key/value tuples. 259message GuestAttributesValue { 260 // The list of guest attributes entries. 261 repeated GuestAttributesEntry items = 1; 262} 263 264// A guest attributes namespace/key/value entry. 265message GuestAttributesEntry { 266 // Namespace for the guest attribute entry. 267 string namespace = 1; 268 269 // Key for the guest attribute entry. 270 string key = 2; 271 272 // Value for the guest attribute entry. 273 string value = 3; 274} 275 276// A node-attached disk resource. 277// Next ID: 8; 278message AttachedDisk { 279 // The different mode of the attached disk. 280 enum DiskMode { 281 // The disk mode is not known/set. 282 DISK_MODE_UNSPECIFIED = 0; 283 284 // Attaches the disk in read-write mode. Only one TPU node can attach a disk 285 // in read-write mode at a time. 286 READ_WRITE = 1; 287 288 // Attaches the disk in read-only mode. Multiple TPU nodes can attach 289 // a disk in read-only mode at a time. 290 READ_ONLY = 2; 291 } 292 293 // Specifies the full path to an existing disk. 294 // For example: "projects/my-project/zones/us-central1-c/disks/my-disk". 295 string source_disk = 3; 296 297 // The mode in which to attach this disk. 298 // If not specified, the default is READ_WRITE mode. 299 // Only applicable to data_disks. 300 DiskMode mode = 4; 301} 302 303// Sets the scheduling options for this node. 304message SchedulingConfig { 305 // Defines whether the node is preemptible. 306 bool preemptible = 1; 307 308 // Whether the node is created under a reservation. 309 bool reserved = 2; 310} 311 312// A network endpoint over which a TPU worker can be reached. 313message NetworkEndpoint { 314 // The internal IP address of this network endpoint. 315 string ip_address = 1; 316 317 // The port of this network endpoint. 318 int32 port = 2; 319 320 // The access config for the TPU worker. 321 AccessConfig access_config = 5; 322} 323 324// An access config attached to the TPU worker. 325message AccessConfig { 326 // Output only. An external IP address associated with the TPU worker. 327 string external_ip = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 328} 329 330// Network related configurations. 331message NetworkConfig { 332 // The name of the network for the TPU node. It must be a preexisting Google 333 // Compute Engine network. If none is provided, "default" will be used. 334 string network = 1; 335 336 // The name of the subnetwork for the TPU node. It must be a preexisting 337 // Google Compute Engine subnetwork. If none is provided, "default" will be 338 // used. 339 string subnetwork = 2; 340 341 // Indicates that external IP addresses would be associated with the TPU 342 // workers. If set to false, the specified subnetwork or network should have 343 // Private Google Access enabled. 344 bool enable_external_ips = 3; 345 346 // Allows the TPU node to send and receive packets with non-matching 347 // destination or source IPs. This is required if you plan to use the TPU 348 // workers to forward routes. 349 bool can_ip_forward = 4; 350} 351 352// A service account. 353message ServiceAccount { 354 // Email address of the service account. If empty, default Compute service 355 // account will be used. 356 string email = 1; 357 358 // The list of scopes to be made available for this service account. If empty, 359 // access to all Cloud APIs will be allowed. 360 repeated string scope = 2; 361} 362 363// A TPU instance. 364message Node { 365 option (google.api.resource) = { 366 type: "tpu.googleapis.com/Node" 367 pattern: "projects/{project}/locations/{location}/nodes/{node}" 368 }; 369 370 // Represents the different states of a TPU node during its lifecycle. 371 enum State { 372 // TPU node state is not known/set. 373 STATE_UNSPECIFIED = 0; 374 375 // TPU node is being created. 376 CREATING = 1; 377 378 // TPU node has been created. 379 READY = 2; 380 381 // TPU node is restarting. 382 RESTARTING = 3; 383 384 // TPU node is undergoing reimaging. 385 REIMAGING = 4; 386 387 // TPU node is being deleted. 388 DELETING = 5; 389 390 // TPU node is being repaired and may be unusable. Details can be 391 // found in the 'help_description' field. 392 REPAIRING = 6; 393 394 // TPU node is stopped. 395 STOPPED = 8; 396 397 // TPU node is currently stopping. 398 STOPPING = 9; 399 400 // TPU node is currently starting. 401 STARTING = 10; 402 403 // TPU node has been preempted. Only applies to Preemptible TPU Nodes. 404 PREEMPTED = 11; 405 406 // TPU node has been terminated due to maintenance or has reached the end of 407 // its life cycle (for preemptible nodes). 408 TERMINATED = 12; 409 410 // TPU node is currently hiding. 411 HIDING = 13; 412 413 // TPU node has been hidden. 414 HIDDEN = 14; 415 416 // TPU node is currently unhiding. 417 UNHIDING = 15; 418 } 419 420 // Health defines the status of a TPU node as reported by 421 // Health Monitor. 422 enum Health { 423 // Health status is unknown: not initialized or failed to retrieve. 424 HEALTH_UNSPECIFIED = 0; 425 426 // The resource is healthy. 427 HEALTHY = 1; 428 429 // The resource is unresponsive. 430 TIMEOUT = 3; 431 432 // The in-guest ML stack is unhealthy. 433 UNHEALTHY_TENSORFLOW = 4; 434 435 // The node is under maintenance/priority boost caused rescheduling and 436 // will resume running once rescheduled. 437 UNHEALTHY_MAINTENANCE = 5; 438 } 439 440 // TPU API Version. 441 enum ApiVersion { 442 // API version is unknown. 443 API_VERSION_UNSPECIFIED = 0; 444 445 // TPU API V1Alpha1 version. 446 V1_ALPHA1 = 1; 447 448 // TPU API V1 version. 449 V1 = 2; 450 451 // TPU API V2Alpha1 version. 452 V2_ALPHA1 = 3; 453 } 454 455 // Output only. Immutable. The name of the TPU. 456 string name = 1 [ 457 (google.api.field_behavior) = OUTPUT_ONLY, 458 (google.api.field_behavior) = IMMUTABLE 459 ]; 460 461 // The user-supplied description of the TPU. Maximum of 512 characters. 462 string description = 3; 463 464 // The type of hardware accelerators associated with this node. 465 string accelerator_type = 5; 466 467 // Output only. The current state for the TPU Node. 468 State state = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; 469 470 // Output only. If this field is populated, it contains a description of why 471 // the TPU Node is unhealthy. 472 string health_description = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; 473 474 // Required. The runtime version running in the Node. 475 string runtime_version = 11 [(google.api.field_behavior) = REQUIRED]; 476 477 // Network configurations for the TPU node. 478 NetworkConfig network_config = 36; 479 480 // The CIDR block that the TPU node will use when selecting an IP address. 481 // This CIDR block must be a /29 block; the Compute Engine networks API 482 // forbids a smaller block, and using a larger block would be wasteful (a 483 // node can only consume one IP address). Errors will occur if the CIDR block 484 // has already been used for a currently existing TPU node, the CIDR block 485 // conflicts with any subnetworks in the user's provided network, or the 486 // provided network is peered with another network that is using that CIDR 487 // block. 488 string cidr_block = 13; 489 490 // The Google Cloud Platform Service Account to be used by the TPU node VMs. 491 // If None is specified, the default compute service account will be used. 492 ServiceAccount service_account = 37; 493 494 // Output only. The time when the node was created. 495 google.protobuf.Timestamp create_time = 16 496 [(google.api.field_behavior) = OUTPUT_ONLY]; 497 498 // The scheduling options for this node. 499 SchedulingConfig scheduling_config = 17; 500 501 // Output only. The network endpoints where TPU workers can be accessed and 502 // sent work. It is recommended that runtime clients of the node reach out 503 // to the 0th entry in this map first. 504 repeated NetworkEndpoint network_endpoints = 21 505 [(google.api.field_behavior) = OUTPUT_ONLY]; 506 507 // The health status of the TPU node. 508 Health health = 22; 509 510 // Resource labels to represent user-provided metadata. 511 map<string, string> labels = 24; 512 513 // Custom metadata to apply to the TPU Node. 514 // Can set startup-script and shutdown-script 515 map<string, string> metadata = 34; 516 517 // Tags to apply to the TPU Node. Tags are used to identify valid sources or 518 // targets for network firewalls. 519 repeated string tags = 40; 520 521 // Output only. The unique identifier for the TPU Node. 522 int64 id = 33 [(google.api.field_behavior) = OUTPUT_ONLY]; 523 524 // The additional data disks for the Node. 525 repeated AttachedDisk data_disks = 41; 526 527 // Output only. The API version that created this Node. 528 ApiVersion api_version = 38 [(google.api.field_behavior) = OUTPUT_ONLY]; 529 530 // Output only. The Symptoms that have occurred to the TPU Node. 531 repeated Symptom symptoms = 39 [(google.api.field_behavior) = OUTPUT_ONLY]; 532 533 // Output only. The qualified name of the QueuedResource that requested this 534 // Node. 535 string queued_resource = 43 [(google.api.field_behavior) = OUTPUT_ONLY]; 536 537 // The AccleratorConfig for the TPU Node. 538 AcceleratorConfig accelerator_config = 44; 539 540 // Shielded Instance options. 541 ShieldedInstanceConfig shielded_instance_config = 45; 542 543 // Output only. Whether the Node belongs to a Multislice group. 544 bool multislice_node = 47 [(google.api.field_behavior) = OUTPUT_ONLY]; 545 546 // Optional. Boot disk configuration. 547 BootDiskConfig boot_disk_config = 49 [(google.api.field_behavior) = OPTIONAL]; 548} 549 550// A QueuedResource represents a request for resources that will be placed 551// in a queue and fulfilled when the necessary resources are available. 552message QueuedResource { 553 option (google.api.resource) = { 554 type: "tpu.googleapis.com/QueuedResource" 555 pattern: "projects/{project}/locations/{location}/queuedResources/{queued_resource}" 556 }; 557 558 // Details of the TPU resource(s) being requested. 559 message Tpu { 560 // Details of the TPU node(s) being requested. Users can request either a 561 // single node or multiple nodes. 562 // NodeSpec provides the specification for node(s) to be created. 563 message NodeSpec { 564 // Parameters to specify for multi-node QueuedResource requests. This 565 // field must be populated in case of multi-node requests instead of 566 // node_id. It's an error to specify both node_id and multi_node_params. 567 message MultiNodeParams { 568 // Required. Number of nodes with this spec. The system will attempt 569 // to provison "node_count" nodes as part of the request. 570 // This needs to be > 1. 571 int32 node_count = 1 [(google.api.field_behavior) = REQUIRED]; 572 573 // Prefix of node_ids in case of multi-node request 574 // Should follow the `^[A-Za-z0-9_.~+%-]+$` regex format. 575 // If node_count = 3 and node_id_prefix = "np", node ids of nodes 576 // created will be "np-0", "np-1", "np-2". If this field is not 577 // provided we use queued_resource_id as the node_id_prefix. 578 string node_id_prefix = 2; 579 } 580 581 // Required. The parent resource name. 582 string parent = 1 [ 583 (google.api.field_behavior) = REQUIRED, 584 (google.api.resource_reference) = { 585 type: "locations.googleapis.com/Location" 586 } 587 ]; 588 589 // The unqualified resource name. Should follow the `^[A-Za-z0-9_.~+%-]+$` 590 // regex format. This is only specified when requesting a single node. 591 // In case of multi-node requests, multi_node_params must be populated 592 // instead. It's an error to specify both node_id and multi_node_params. 593 string node_id = 2; 594 595 // Optional. Fields to specify in case of multi-node request. 596 MultiNodeParams multi_node_params = 6 597 [(google.api.field_behavior) = OPTIONAL]; 598 599 // Required. The node. 600 Node node = 3 [(google.api.field_behavior) = REQUIRED]; 601 } 602 603 // The TPU node(s) being requested. 604 repeated NodeSpec node_spec = 1; 605 } 606 607 // BestEffort tier definition. 608 message BestEffort {} 609 610 // Spot tier definition. 611 message Spot {} 612 613 // Guaranteed tier definition. 614 message Guaranteed { 615 // Optional. Defines the minimum duration of the guarantee. If specified, 616 // the requested resources will only be provisioned if they can be 617 // allocated for at least the given duration. 618 google.protobuf.Duration min_duration = 1 619 [(google.api.field_behavior) = OPTIONAL]; 620 621 // Optional. Specifies the request should be scheduled on reserved capacity. 622 bool reserved = 2 [(google.api.field_behavior) = OPTIONAL]; 623 } 624 625 // Defines the policy of the QueuedRequest. 626 message QueueingPolicy { 627 // Time flexibility specification. 628 oneof start_timing_constraints { 629 // A relative time after which resources should not be created. 630 // If the request cannot be fulfilled by this time the request will be 631 // failed. 632 google.protobuf.Duration valid_until_duration = 1; 633 634 // An absolute time after which resources should not be created. 635 // If the request cannot be fulfilled by this time the request will be 636 // failed. 637 google.protobuf.Timestamp valid_until_time = 2; 638 639 // A relative time after which resources may be created. 640 google.protobuf.Duration valid_after_duration = 3; 641 642 // An absolute time at which resources may be created. 643 google.protobuf.Timestamp valid_after_time = 4; 644 645 // An absolute time interval within which resources may be created. 646 google.type.Interval valid_interval = 5; 647 } 648 } 649 650 // Output only. Immutable. The name of the QueuedResource. 651 string name = 1 [ 652 (google.api.field_behavior) = OUTPUT_ONLY, 653 (google.api.field_behavior) = IMMUTABLE 654 ]; 655 656 // Resource specification. 657 oneof resource { 658 // Defines a TPU resource. 659 Tpu tpu = 2; 660 } 661 662 // Tier specifies the required tier. 663 oneof tier { 664 // The BestEffort tier. 665 BestEffort best_effort = 3; 666 667 // The Guaranteed tier. 668 Guaranteed guaranteed = 4; 669 670 // Optional. The Spot tier. 671 Spot spot = 9 [(google.api.field_behavior) = OPTIONAL]; 672 } 673 674 // The queueing policy of the QueuedRequest. 675 QueueingPolicy queueing_policy = 5; 676 677 // Output only. State of the QueuedResource request. 678 QueuedResourceState state = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 679 680 // Name of the reservation in which the resource should be provisioned. 681 // Format: projects/{project}/locations/{zone}/reservations/{reservation} 682 string reservation_name = 8; 683} 684 685// QueuedResourceState defines the details of the QueuedResource request. 686message QueuedResourceState { 687 // Output only state of the request 688 enum State { 689 // State of the QueuedResource request is not known/set. 690 STATE_UNSPECIFIED = 0; 691 692 // The QueuedResource request has been received. We're still working on 693 // determining if we will be able to honor this request. 694 CREATING = 1; 695 696 // The QueuedResource request has passed initial validation/admission 697 // control and has been persisted in the queue. 698 ACCEPTED = 2; 699 700 // The QueuedResource request has been selected. The 701 // associated resources are currently being provisioned (or very soon 702 // will begin provisioning). 703 PROVISIONING = 3; 704 705 // The request could not be completed. This may be due to some 706 // late-discovered problem with the request itself, or due to 707 // unavailability of resources within the constraints of the request 708 // (e.g., the 'valid until' start timing constraint expired). 709 FAILED = 4; 710 711 // The QueuedResource is being deleted. 712 DELETING = 5; 713 714 // The resources specified in the QueuedResource request have been 715 // provisioned and are ready for use by the end-user/consumer. 716 ACTIVE = 6; 717 718 // The resources specified in the QueuedResource request are being 719 // deleted. This may have been initiated by the user, or 720 // the Cloud TPU service. Inspect the state data for more details. 721 SUSPENDING = 7; 722 723 // The resources specified in the QueuedResource request have been 724 // deleted. 725 SUSPENDED = 8; 726 727 // The QueuedResource request has passed initial validation and has been 728 // persisted in the queue. It will remain in this state until there are 729 // sufficient free resources to begin provisioning your request. Wait times 730 // will vary significantly depending on demand levels. When demand is high, 731 // not all requests can be immediately provisioned. If you 732 // need more reliable obtainability of TPUs consider purchasing a 733 // reservation. To put a limit on how long you are willing to wait, use 734 // [timing 735 // constraints](https://cloud.google.com/tpu/docs/queued-resources#request_a_queued_resource_before_a_specified_time). 736 WAITING_FOR_RESOURCES = 9; 737 } 738 739 // Further data for the creating state. 740 message CreatingData {} 741 742 // Further data for the accepted state. 743 message AcceptedData {} 744 745 // Further data for the provisioning state. 746 message ProvisioningData {} 747 748 // Further data for the failed state. 749 message FailedData { 750 // The error that caused the queued resource to enter the FAILED state. 751 google.rpc.Status error = 1; 752 } 753 754 // Further data for the deleting state. 755 message DeletingData {} 756 757 // Further data for the active state. 758 message ActiveData {} 759 760 // Further data for the suspending state. 761 message SuspendingData {} 762 763 // Further data for the suspended state. 764 message SuspendedData {} 765 766 // The initiator of the QueuedResource's SUSPENDING/SUSPENDED state. 767 enum StateInitiator { 768 // The state initiator is unspecified. 769 STATE_INITIATOR_UNSPECIFIED = 0; 770 771 // The current QueuedResource state was initiated by the user. 772 USER = 1; 773 774 // The current QueuedResource state was initiated by the service. 775 SERVICE = 2; 776 } 777 778 // State of the QueuedResource request. 779 State state = 1; 780 781 // Further data for the state. 782 oneof state_data { 783 // Further data for the creating state. 784 CreatingData creating_data = 2; 785 786 // Further data for the accepted state. 787 AcceptedData accepted_data = 3; 788 789 // Further data for the provisioning state. 790 ProvisioningData provisioning_data = 4; 791 792 // Further data for the failed state. 793 FailedData failed_data = 5; 794 795 // Further data for the deleting state. 796 DeletingData deleting_data = 6; 797 798 // Further data for the active state. 799 ActiveData active_data = 7; 800 801 // Further data for the suspending state. 802 SuspendingData suspending_data = 8; 803 804 // Further data for the suspended state. 805 SuspendedData suspended_data = 9; 806 } 807 808 // Output only. The initiator of the QueuedResources's current state. 809 StateInitiator state_initiator = 10 810 [(google.api.field_behavior) = OUTPUT_ONLY]; 811} 812 813// Request for [ListNodes][google.cloud.tpu.v2alpha1.Tpu.ListNodes]. 814message ListNodesRequest { 815 // Required. The parent resource name. 816 string parent = 1 [ 817 (google.api.field_behavior) = REQUIRED, 818 (google.api.resource_reference) = { child_type: "tpu.googleapis.com/Node" } 819 ]; 820 821 // The maximum number of items to return. 822 int32 page_size = 2; 823 824 // The next_page_token value returned from a previous List request, if any. 825 string page_token = 3; 826} 827 828// Response for [ListNodes][google.cloud.tpu.v2alpha1.Tpu.ListNodes]. 829message ListNodesResponse { 830 // The listed nodes. 831 repeated Node nodes = 1; 832 833 // The next page token or empty if none. 834 string next_page_token = 2; 835 836 // Locations that could not be reached. 837 repeated string unreachable = 3; 838} 839 840// Request for [GetNode][google.cloud.tpu.v2alpha1.Tpu.GetNode]. 841message GetNodeRequest { 842 // Required. The resource name. 843 string name = 1 [ 844 (google.api.field_behavior) = REQUIRED, 845 (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" } 846 ]; 847} 848 849// Request for [CreateNode][google.cloud.tpu.v2alpha1.Tpu.CreateNode]. 850message CreateNodeRequest { 851 // Required. The parent resource name. 852 string parent = 1 [ 853 (google.api.field_behavior) = REQUIRED, 854 (google.api.resource_reference) = { 855 type: "locations.googleapis.com/Location" 856 } 857 ]; 858 859 // The unqualified resource name. 860 string node_id = 2; 861 862 // Required. The node. 863 Node node = 3 [(google.api.field_behavior) = REQUIRED]; 864 865 // Idempotent request UUID. 866 string request_id = 6; 867} 868 869// Request for [DeleteNode][google.cloud.tpu.v2alpha1.Tpu.DeleteNode]. 870message DeleteNodeRequest { 871 // Required. The resource name. 872 string name = 1 [ 873 (google.api.field_behavior) = REQUIRED, 874 (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" } 875 ]; 876 877 // Idempotent request UUID. 878 string request_id = 3; 879} 880 881// Request for [StopNode][google.cloud.tpu.v2alpha1.Tpu.StopNode]. 882message StopNodeRequest { 883 // Required. The resource name. 884 string name = 1 [ 885 (google.api.field_behavior) = REQUIRED, 886 (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" } 887 ]; 888} 889 890// Request for [StartNode][google.cloud.tpu.v2alpha1.Tpu.StartNode]. 891message StartNodeRequest { 892 // Required. The resource name. 893 string name = 1 [ 894 (google.api.field_behavior) = REQUIRED, 895 (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" } 896 ]; 897} 898 899// Request for [UpdateNode][google.cloud.tpu.v2alpha1.Tpu.UpdateNode]. 900message UpdateNodeRequest { 901 // Required. Mask of fields from [Node][Tpu.Node] to update. 902 // Supported fields: [description, tags, labels, metadata, 903 // network_config.enable_external_ips]. 904 google.protobuf.FieldMask update_mask = 1 905 [(google.api.field_behavior) = REQUIRED]; 906 907 // Required. The node. Only fields specified in update_mask are updated. 908 Node node = 2 [(google.api.field_behavior) = REQUIRED]; 909} 910 911// Request for 912// [ListQueuedResources][google.cloud.tpu.v2alpha1.Tpu.ListQueuedResources]. 913message ListQueuedResourcesRequest { 914 // Required. The parent resource name. 915 string parent = 1 [ 916 (google.api.field_behavior) = REQUIRED, 917 (google.api.resource_reference) = { 918 child_type: "tpu.googleapis.com/QueuedResource" 919 } 920 ]; 921 922 // The maximum number of items to return. 923 int32 page_size = 2; 924 925 // The next_page_token value returned from a previous List request, if any. 926 string page_token = 3; 927} 928 929// Response for 930// [ListQueuedResources][google.cloud.tpu.v2alpha1.Tpu.ListQueuedResources]. 931message ListQueuedResourcesResponse { 932 // The listed queued resources. 933 repeated QueuedResource queued_resources = 1; 934 935 // The next page token or empty if none. 936 string next_page_token = 2; 937 938 // Locations that could not be reached. 939 repeated string unreachable = 3; 940} 941 942// Request for 943// [GetQueuedResource][google.cloud.tpu.v2alpha1.Tpu.GetQueuedResource] 944message GetQueuedResourceRequest { 945 // Required. The resource name. 946 string name = 1 [ 947 (google.api.field_behavior) = REQUIRED, 948 (google.api.resource_reference) = { 949 type: "tpu.googleapis.com/QueuedResource" 950 } 951 ]; 952} 953 954// Request for 955// [CreateQueuedResource][google.cloud.tpu.v2alpha1.Tpu.CreateQueuedResource]. 956message CreateQueuedResourceRequest { 957 // Required. The parent resource name. 958 string parent = 1 [ 959 (google.api.field_behavior) = REQUIRED, 960 (google.api.resource_reference) = { 961 type: "locations.googleapis.com/Location" 962 } 963 ]; 964 965 // The unqualified resource name. Should follow the `^[A-Za-z0-9_.~+%-]+$` 966 // regex format. 967 string queued_resource_id = 2; 968 969 // Required. The queued resource. 970 QueuedResource queued_resource = 3 [(google.api.field_behavior) = REQUIRED]; 971 972 // Idempotent request UUID. 973 string request_id = 4; 974} 975 976// Request for 977// [DeleteQueuedResource][google.cloud.tpu.v2alpha1.Tpu.DeleteQueuedResource]. 978message DeleteQueuedResourceRequest { 979 // Required. The resource name. 980 string name = 1 [ 981 (google.api.field_behavior) = REQUIRED, 982 (google.api.resource_reference) = { 983 type: "tpu.googleapis.com/QueuedResource" 984 } 985 ]; 986 987 // Idempotent request UUID. 988 string request_id = 2; 989 990 // If set to true, all running nodes belonging to this queued resource will 991 // be deleted first and then the queued resource will be deleted. 992 // Otherwise (i.e. force=false), the queued resource will only be deleted if 993 // its nodes have already been deleted or the queued resource is in the 994 // ACCEPTED, FAILED, or SUSPENDED state. 995 bool force = 3; 996} 997 998// Request for 999// [ResetQueuedResource][google.cloud.tpu.v2alpha1.Tpu.ResetQueuedResource]. 1000message ResetQueuedResourceRequest { 1001 // Required. The name of the queued resource. 1002 string name = 1 [ 1003 (google.api.field_behavior) = REQUIRED, 1004 (google.api.resource_reference) = { 1005 type: "tpu.googleapis.com/QueuedResource" 1006 } 1007 ]; 1008} 1009 1010// The per-product per-project service identity for Cloud TPU service. 1011message ServiceIdentity { 1012 // The email address of the service identity. 1013 string email = 1; 1014} 1015 1016// Request for 1017// [GenerateServiceIdentity][google.cloud.tpu.v2alpha1.Tpu.GenerateServiceIdentity]. 1018message GenerateServiceIdentityRequest { 1019 // Required. The parent resource name. 1020 string parent = 1 [ 1021 (google.api.field_behavior) = REQUIRED, 1022 (google.api.resource_reference) = { 1023 type: "locations.googleapis.com/Location" 1024 } 1025 ]; 1026} 1027 1028// Response for 1029// [GenerateServiceIdentity][google.cloud.tpu.v2alpha1.Tpu.GenerateServiceIdentity]. 1030message GenerateServiceIdentityResponse { 1031 // ServiceIdentity that was created or retrieved. 1032 ServiceIdentity identity = 1; 1033} 1034 1035// A accelerator type that a Node can be configured with. 1036message AcceleratorType { 1037 option (google.api.resource) = { 1038 type: "tpu.googleapis.com/AcceleratorType" 1039 pattern: "projects/{project}/locations/{location}/acceleratorTypes/{accelerator_type}" 1040 }; 1041 1042 // The resource name. 1043 string name = 1; 1044 1045 // The accelerator type. 1046 string type = 2; 1047 1048 // The accelerator config. 1049 repeated AcceleratorConfig accelerator_configs = 3; 1050} 1051 1052// Request for 1053// [GetAcceleratorType][google.cloud.tpu.v2alpha1.Tpu.GetAcceleratorType]. 1054message GetAcceleratorTypeRequest { 1055 // Required. The resource name. 1056 string name = 1 [ 1057 (google.api.field_behavior) = REQUIRED, 1058 (google.api.resource_reference) = { 1059 type: "tpu.googleapis.com/AcceleratorType" 1060 } 1061 ]; 1062} 1063 1064// Request for 1065// [ListAcceleratorTypes][google.cloud.tpu.v2alpha1.Tpu.ListAcceleratorTypes]. 1066message ListAcceleratorTypesRequest { 1067 // Required. The parent resource name. 1068 string parent = 1 [ 1069 (google.api.field_behavior) = REQUIRED, 1070 (google.api.resource_reference) = { 1071 child_type: "tpu.googleapis.com/AcceleratorType" 1072 } 1073 ]; 1074 1075 // The maximum number of items to return. 1076 int32 page_size = 2; 1077 1078 // The next_page_token value returned from a previous List request, if any. 1079 string page_token = 3; 1080 1081 // List filter. 1082 string filter = 5; 1083 1084 // Sort results. 1085 string order_by = 6; 1086} 1087 1088// Response for 1089// [ListAcceleratorTypes][google.cloud.tpu.v2alpha1.Tpu.ListAcceleratorTypes]. 1090message ListAcceleratorTypesResponse { 1091 // The listed nodes. 1092 repeated AcceleratorType accelerator_types = 1; 1093 1094 // The next page token or empty if none. 1095 string next_page_token = 2; 1096 1097 // Locations that could not be reached. 1098 repeated string unreachable = 3; 1099} 1100 1101// A runtime version that a Node can be configured with. 1102message RuntimeVersion { 1103 option (google.api.resource) = { 1104 type: "tpu.googleapis.com/RuntimeVersion" 1105 pattern: "projects/{project}/locations/{location}/runtimeVersions/{runtime_version}" 1106 }; 1107 1108 // The resource name. 1109 string name = 1; 1110 1111 // The runtime version. 1112 string version = 2; 1113} 1114 1115// Request for 1116// [GetRuntimeVersion][google.cloud.tpu.v2alpha1.Tpu.GetRuntimeVersion]. 1117message GetRuntimeVersionRequest { 1118 // Required. The resource name. 1119 string name = 1 [ 1120 (google.api.field_behavior) = REQUIRED, 1121 (google.api.resource_reference) = { 1122 type: "tpu.googleapis.com/RuntimeVersion" 1123 } 1124 ]; 1125} 1126 1127// Request for 1128// [ListRuntimeVersions][google.cloud.tpu.v2alpha1.Tpu.ListRuntimeVersions]. 1129message ListRuntimeVersionsRequest { 1130 // Required. The parent resource name. 1131 string parent = 1 [ 1132 (google.api.field_behavior) = REQUIRED, 1133 (google.api.resource_reference) = { 1134 child_type: "tpu.googleapis.com/RuntimeVersion" 1135 } 1136 ]; 1137 1138 // The maximum number of items to return. 1139 int32 page_size = 2; 1140 1141 // The next_page_token value returned from a previous List request, if any. 1142 string page_token = 3; 1143 1144 // List filter. 1145 string filter = 5; 1146 1147 // Sort results. 1148 string order_by = 6; 1149} 1150 1151// Response for 1152// [ListRuntimeVersions][google.cloud.tpu.v2alpha1.Tpu.ListRuntimeVersions]. 1153message ListRuntimeVersionsResponse { 1154 // The listed nodes. 1155 repeated RuntimeVersion runtime_versions = 1; 1156 1157 // The next page token or empty if none. 1158 string next_page_token = 2; 1159 1160 // Locations that could not be reached. 1161 repeated string unreachable = 3; 1162} 1163 1164// Metadata describing an [Operation][google.longrunning.Operation] 1165message OperationMetadata { 1166 // The time the operation was created. 1167 google.protobuf.Timestamp create_time = 1; 1168 1169 // The time the operation finished running. 1170 google.protobuf.Timestamp end_time = 2; 1171 1172 // Target of the operation - for example 1173 // projects/project-1/connectivityTests/test-1 1174 string target = 3; 1175 1176 // Name of the verb executed by the operation. 1177 string verb = 4; 1178 1179 // Human-readable status of the operation, if any. 1180 string status_detail = 5; 1181 1182 // Specifies if cancellation was requested for the operation. 1183 bool cancel_requested = 6; 1184 1185 // API version. 1186 string api_version = 7; 1187} 1188 1189// A Symptom instance. 1190message Symptom { 1191 // SymptomType represents the different types of Symptoms that a TPU can be 1192 // at. 1193 enum SymptomType { 1194 // Unspecified symptom. 1195 SYMPTOM_TYPE_UNSPECIFIED = 0; 1196 1197 // TPU VM memory is low. 1198 LOW_MEMORY = 1; 1199 1200 // TPU runtime is out of memory. 1201 OUT_OF_MEMORY = 2; 1202 1203 // TPU runtime execution has timed out. 1204 EXECUTE_TIMED_OUT = 3; 1205 1206 // TPU runtime fails to construct a mesh that recognizes each TPU device's 1207 // neighbors. 1208 MESH_BUILD_FAIL = 4; 1209 1210 // TPU HBM is out of memory. 1211 HBM_OUT_OF_MEMORY = 5; 1212 1213 // Abusive behaviors have been identified on the current project. 1214 PROJECT_ABUSE = 6; 1215 } 1216 1217 // Timestamp when the Symptom is created. 1218 google.protobuf.Timestamp create_time = 1; 1219 1220 // Type of the Symptom. 1221 SymptomType symptom_type = 2; 1222 1223 // Detailed information of the current Symptom. 1224 string details = 3; 1225 1226 // A string used to uniquely distinguish a worker within a TPU node. 1227 string worker_id = 4; 1228} 1229 1230// Request for 1231// [GetGuestAttributes][google.cloud.tpu.v2alpha1.Tpu.GetGuestAttributes]. 1232message GetGuestAttributesRequest { 1233 // Required. The resource name. 1234 string name = 1 [ 1235 (google.api.field_behavior) = REQUIRED, 1236 (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" } 1237 ]; 1238 1239 // The guest attributes path to be queried. 1240 string query_path = 2; 1241 1242 // The 0-based worker ID. If it is empty, all workers' GuestAttributes will be 1243 // returned. 1244 repeated string worker_ids = 3; 1245} 1246 1247// Response for 1248// [GetGuestAttributes][google.cloud.tpu.v2alpha1.Tpu.GetGuestAttributes]. 1249message GetGuestAttributesResponse { 1250 // The guest attributes for the TPU workers. 1251 repeated GuestAttributes guest_attributes = 1; 1252} 1253 1254// Request for 1255// [SimulateMaintenanceEvent][google.cloud.tpu.v2alpha1.Tpu.SimulateMaintenanceEvent]. 1256message SimulateMaintenanceEventRequest { 1257 // Required. The resource name. 1258 string name = 1 [ 1259 (google.api.field_behavior) = REQUIRED, 1260 (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" } 1261 ]; 1262 1263 // The 0-based worker ID. If it is empty, worker ID 0 will be selected for 1264 // maintenance event simulation. A maintenance event will only be fired on the 1265 // first specified worker ID. Future implementations may support firing on 1266 // multiple workers. 1267 repeated string worker_ids = 2; 1268} 1269 1270// A TPU accelerator configuration. 1271message AcceleratorConfig { 1272 // TPU type. 1273 enum Type { 1274 // Unspecified version. 1275 TYPE_UNSPECIFIED = 0; 1276 1277 // TPU v2. 1278 V2 = 2; 1279 1280 // TPU v3. 1281 V3 = 4; 1282 1283 // TPU v4. 1284 V4 = 7; 1285 } 1286 1287 // Required. Type of TPU. 1288 Type type = 1 [(google.api.field_behavior) = REQUIRED]; 1289 1290 // Required. Topology of TPU in chips. 1291 string topology = 2 [(google.api.field_behavior) = REQUIRED]; 1292} 1293 1294// A set of Shielded Instance options. 1295message ShieldedInstanceConfig { 1296 // Defines whether the instance has Secure Boot enabled. 1297 bool enable_secure_boot = 1; 1298} 1299 1300// Boot disk configurations. 1301message BootDiskConfig { 1302 // Optional. Customer encryption key for boot disk. 1303 CustomerEncryptionKey customer_encryption_key = 1 1304 [(google.api.field_behavior) = OPTIONAL]; 1305 1306 // Optional. Whether the boot disk will be created with confidential compute 1307 // mode. 1308 bool enable_confidential_compute = 2 [(google.api.field_behavior) = OPTIONAL]; 1309} 1310 1311// Customer's encryption key. 1312message CustomerEncryptionKey { 1313 oneof key { 1314 // The name of the encryption key that is stored in Google Cloud KMS. 1315 // For example: 1316 // <pre class="lang-html">"kmsKeyName": "projects/ 1317 // <var class="apiparam">kms_project_id</var>/locations/ 1318 // <var class="apiparam">region</var>/keyRings/<var class="apiparam"> 1319 // key_region</var>/cryptoKeys/<var class="apiparam">key</var> 1320 // </pre> 1321 // The fully-qualifed key name may be returned for resource GET requests. 1322 // For example: 1323 // <pre class="lang-html">"kmsKeyName": "projects/ 1324 // <var class="apiparam">kms_project_id</var>/locations/ 1325 // <var class="apiparam">region</var>/keyRings/<var class="apiparam"> 1326 // key_region</var>/cryptoKeys/<var class="apiparam">key</var> 1327 // /cryptoKeyVersions/1</pre> 1328 string kms_key_name = 7; 1329 } 1330} 1331