xref: /aosp_15_r20/external/googleapis/google/cloud/tpu/v2alpha1/cloud_tpu.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.tpu.v2alpha1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/longrunning/operations.proto";
24import "google/protobuf/duration.proto";
25import "google/protobuf/empty.proto";
26import "google/protobuf/field_mask.proto";
27import "google/protobuf/timestamp.proto";
28import "google/rpc/status.proto";
29import "google/type/interval.proto";
30
31option go_package = "cloud.google.com/go/tpu/apiv2alpha1/tpupb;tpupb";
32option java_multiple_files = true;
33option java_outer_classname = "CloudTpuProto";
34option java_package = "com.google.cloud.tpu.v2alpha1";
35
36// Manages TPU nodes and other resources
37//
38// TPU API v2alpha1
39service Tpu {
40  option (google.api.default_host) = "tpu.googleapis.com";
41  option (google.api.oauth_scopes) =
42      "https://www.googleapis.com/auth/cloud-platform";
43
44  // Lists nodes.
45  rpc ListNodes(ListNodesRequest) returns (ListNodesResponse) {
46    option (google.api.http) = {
47      get: "/v2alpha1/{parent=projects/*/locations/*}/nodes"
48    };
49    option (google.api.method_signature) = "parent";
50  }
51
52  // Gets the details of a node.
53  rpc GetNode(GetNodeRequest) returns (Node) {
54    option (google.api.http) = {
55      get: "/v2alpha1/{name=projects/*/locations/*/nodes/*}"
56    };
57    option (google.api.method_signature) = "name";
58  }
59
60  // Creates a node.
61  rpc CreateNode(CreateNodeRequest) returns (google.longrunning.Operation) {
62    option (google.api.http) = {
63      post: "/v2alpha1/{parent=projects/*/locations/*}/nodes"
64      body: "node"
65    };
66    option (google.api.method_signature) = "parent,node,node_id";
67    option (google.longrunning.operation_info) = {
68      response_type: "Node"
69      metadata_type: "OperationMetadata"
70    };
71  }
72
73  // Deletes a node.
74  rpc DeleteNode(DeleteNodeRequest) returns (google.longrunning.Operation) {
75    option (google.api.http) = {
76      delete: "/v2alpha1/{name=projects/*/locations/*/nodes/*}"
77    };
78    option (google.api.method_signature) = "name";
79    option (google.longrunning.operation_info) = {
80      response_type: "google.protobuf.Empty"
81      metadata_type: "OperationMetadata"
82    };
83  }
84
85  // Stops a node. This operation is only available with single TPU nodes.
86  rpc StopNode(StopNodeRequest) returns (google.longrunning.Operation) {
87    option (google.api.http) = {
88      post: "/v2alpha1/{name=projects/*/locations/*/nodes/*}:stop"
89      body: "*"
90    };
91    option (google.longrunning.operation_info) = {
92      response_type: "Node"
93      metadata_type: "OperationMetadata"
94    };
95  }
96
97  // Starts a node.
98  rpc StartNode(StartNodeRequest) returns (google.longrunning.Operation) {
99    option (google.api.http) = {
100      post: "/v2alpha1/{name=projects/*/locations/*/nodes/*}:start"
101      body: "*"
102    };
103    option (google.longrunning.operation_info) = {
104      response_type: "Node"
105      metadata_type: "OperationMetadata"
106    };
107  }
108
109  // Updates the configurations of a node.
110  rpc UpdateNode(UpdateNodeRequest) returns (google.longrunning.Operation) {
111    option (google.api.http) = {
112      patch: "/v2alpha1/{node.name=projects/*/locations/*/nodes/*}"
113      body: "node"
114    };
115    option (google.api.method_signature) = "node,update_mask";
116    option (google.longrunning.operation_info) = {
117      response_type: "Node"
118      metadata_type: "OperationMetadata"
119    };
120  }
121
122  // Lists queued resources.
123  rpc ListQueuedResources(ListQueuedResourcesRequest)
124      returns (ListQueuedResourcesResponse) {
125    option (google.api.http) = {
126      get: "/v2alpha1/{parent=projects/*/locations/*}/queuedResources"
127    };
128    option (google.api.method_signature) = "parent";
129  }
130
131  // Gets details of a queued resource.
132  rpc GetQueuedResource(GetQueuedResourceRequest) returns (QueuedResource) {
133    option (google.api.http) = {
134      get: "/v2alpha1/{name=projects/*/locations/*/queuedResources/*}"
135    };
136    option (google.api.method_signature) = "name";
137  }
138
139  // Creates a QueuedResource TPU instance.
140  rpc CreateQueuedResource(CreateQueuedResourceRequest)
141      returns (google.longrunning.Operation) {
142    option (google.api.http) = {
143      post: "/v2alpha1/{parent=projects/*/locations/*}/queuedResources"
144      body: "queued_resource"
145    };
146    option (google.api.method_signature) =
147        "parent,queued_resource,queued_resource_id";
148    option (google.longrunning.operation_info) = {
149      response_type: "QueuedResource"
150      metadata_type: "OperationMetadata"
151    };
152  }
153
154  // Deletes a QueuedResource TPU instance.
155  rpc DeleteQueuedResource(DeleteQueuedResourceRequest)
156      returns (google.longrunning.Operation) {
157    option (google.api.http) = {
158      delete: "/v2alpha1/{name=projects/*/locations/*/queuedResources/*}"
159    };
160    option (google.api.method_signature) = "name";
161    option (google.longrunning.operation_info) = {
162      response_type: "QueuedResource"
163      metadata_type: "OperationMetadata"
164    };
165  }
166
167  // Resets a QueuedResource TPU instance
168  rpc ResetQueuedResource(ResetQueuedResourceRequest)
169      returns (google.longrunning.Operation) {
170    option (google.api.http) = {
171      post: "/v2alpha1/{name=projects/*/locations/*/queuedResources/*}:reset"
172      body: "*"
173    };
174    option (google.api.method_signature) = "name";
175    option (google.longrunning.operation_info) = {
176      response_type: "QueuedResource"
177      metadata_type: "OperationMetadata"
178    };
179  }
180
181  // Generates the Cloud TPU service identity for the project.
182  rpc GenerateServiceIdentity(GenerateServiceIdentityRequest)
183      returns (GenerateServiceIdentityResponse) {
184    option (google.api.http) = {
185      post: "/v2alpha1/{parent=projects/*/locations/*}:generateServiceIdentity"
186      body: "*"
187    };
188  }
189
190  // Lists accelerator types supported by this API.
191  rpc ListAcceleratorTypes(ListAcceleratorTypesRequest)
192      returns (ListAcceleratorTypesResponse) {
193    option (google.api.http) = {
194      get: "/v2alpha1/{parent=projects/*/locations/*}/acceleratorTypes"
195    };
196    option (google.api.method_signature) = "parent";
197  }
198
199  // Gets AcceleratorType.
200  rpc GetAcceleratorType(GetAcceleratorTypeRequest) returns (AcceleratorType) {
201    option (google.api.http) = {
202      get: "/v2alpha1/{name=projects/*/locations/*/acceleratorTypes/*}"
203    };
204    option (google.api.method_signature) = "name";
205  }
206
207  // Lists runtime versions supported by this API.
208  rpc ListRuntimeVersions(ListRuntimeVersionsRequest)
209      returns (ListRuntimeVersionsResponse) {
210    option (google.api.http) = {
211      get: "/v2alpha1/{parent=projects/*/locations/*}/runtimeVersions"
212    };
213    option (google.api.method_signature) = "parent";
214  }
215
216  // Gets a runtime version.
217  rpc GetRuntimeVersion(GetRuntimeVersionRequest) returns (RuntimeVersion) {
218    option (google.api.http) = {
219      get: "/v2alpha1/{name=projects/*/locations/*/runtimeVersions/*}"
220    };
221    option (google.api.method_signature) = "name";
222  }
223
224  // Retrieves the guest attributes for the node.
225  rpc GetGuestAttributes(GetGuestAttributesRequest)
226      returns (GetGuestAttributesResponse) {
227    option (google.api.http) = {
228      post: "/v2alpha1/{name=projects/*/locations/*/nodes/*}:getGuestAttributes"
229      body: "*"
230    };
231  }
232
233  // Simulates a maintenance event.
234  rpc SimulateMaintenanceEvent(SimulateMaintenanceEventRequest)
235      returns (google.longrunning.Operation) {
236    option (google.api.http) = {
237      post: "/v2alpha1/{name=projects/*/locations/*/nodes/*}:simulateMaintenanceEvent"
238      body: "*"
239    };
240    option (google.longrunning.operation_info) = {
241      response_type: "Node"
242      metadata_type: "OperationMetadata"
243    };
244  }
245}
246
247// A guest attributes.
248message GuestAttributes {
249  // The path to be queried. This can be the default namespace ('/') or a
250  // nested namespace ('/\<namespace\>/') or a specified key
251  // ('/\<namespace\>/\<key\>')
252  string query_path = 1;
253
254  // The value of the requested queried path.
255  GuestAttributesValue query_value = 2;
256}
257
258// Array of guest attribute namespace/key/value tuples.
259message GuestAttributesValue {
260  // The list of guest attributes entries.
261  repeated GuestAttributesEntry items = 1;
262}
263
264// A guest attributes namespace/key/value entry.
265message GuestAttributesEntry {
266  // Namespace for the guest attribute entry.
267  string namespace = 1;
268
269  // Key for the guest attribute entry.
270  string key = 2;
271
272  // Value for the guest attribute entry.
273  string value = 3;
274}
275
276// A node-attached disk resource.
277// Next ID: 8;
278message AttachedDisk {
279  // The different mode of the attached disk.
280  enum DiskMode {
281    // The disk mode is not known/set.
282    DISK_MODE_UNSPECIFIED = 0;
283
284    // Attaches the disk in read-write mode. Only one TPU node can attach a disk
285    // in read-write mode at a time.
286    READ_WRITE = 1;
287
288    // Attaches the disk in read-only mode. Multiple TPU nodes can attach
289    // a disk in read-only mode at a time.
290    READ_ONLY = 2;
291  }
292
293  // Specifies the full path to an existing disk.
294  // For example: "projects/my-project/zones/us-central1-c/disks/my-disk".
295  string source_disk = 3;
296
297  // The mode in which to attach this disk.
298  // If not specified, the default is READ_WRITE mode.
299  // Only applicable to data_disks.
300  DiskMode mode = 4;
301}
302
303// Sets the scheduling options for this node.
304message SchedulingConfig {
305  // Defines whether the node is preemptible.
306  bool preemptible = 1;
307
308  // Whether the node is created under a reservation.
309  bool reserved = 2;
310}
311
312// A network endpoint over which a TPU worker can be reached.
313message NetworkEndpoint {
314  // The internal IP address of this network endpoint.
315  string ip_address = 1;
316
317  // The port of this network endpoint.
318  int32 port = 2;
319
320  // The access config for the TPU worker.
321  AccessConfig access_config = 5;
322}
323
324// An access config attached to the TPU worker.
325message AccessConfig {
326  // Output only. An external IP address associated with the TPU worker.
327  string external_ip = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
328}
329
330// Network related configurations.
331message NetworkConfig {
332  // The name of the network for the TPU node. It must be a preexisting Google
333  // Compute Engine network. If none is provided, "default" will be used.
334  string network = 1;
335
336  // The name of the subnetwork for the TPU node. It must be a preexisting
337  // Google Compute Engine subnetwork. If none is provided, "default" will be
338  // used.
339  string subnetwork = 2;
340
341  // Indicates that external IP addresses would be associated with the TPU
342  // workers. If set to false, the specified subnetwork or network should have
343  // Private Google Access enabled.
344  bool enable_external_ips = 3;
345
346  // Allows the TPU node to send and receive packets with non-matching
347  // destination or source IPs. This is required if you plan to use the TPU
348  // workers to forward routes.
349  bool can_ip_forward = 4;
350}
351
352// A service account.
353message ServiceAccount {
354  // Email address of the service account. If empty, default Compute service
355  // account will be used.
356  string email = 1;
357
358  // The list of scopes to be made available for this service account. If empty,
359  // access to all Cloud APIs will be allowed.
360  repeated string scope = 2;
361}
362
363// A TPU instance.
364message Node {
365  option (google.api.resource) = {
366    type: "tpu.googleapis.com/Node"
367    pattern: "projects/{project}/locations/{location}/nodes/{node}"
368  };
369
370  // Represents the different states of a TPU node during its lifecycle.
371  enum State {
372    // TPU node state is not known/set.
373    STATE_UNSPECIFIED = 0;
374
375    // TPU node is being created.
376    CREATING = 1;
377
378    // TPU node has been created.
379    READY = 2;
380
381    // TPU node is restarting.
382    RESTARTING = 3;
383
384    // TPU node is undergoing reimaging.
385    REIMAGING = 4;
386
387    // TPU node is being deleted.
388    DELETING = 5;
389
390    // TPU node is being repaired and may be unusable. Details can be
391    // found in the 'help_description' field.
392    REPAIRING = 6;
393
394    // TPU node is stopped.
395    STOPPED = 8;
396
397    // TPU node is currently stopping.
398    STOPPING = 9;
399
400    // TPU node is currently starting.
401    STARTING = 10;
402
403    // TPU node has been preempted. Only applies to Preemptible TPU Nodes.
404    PREEMPTED = 11;
405
406    // TPU node has been terminated due to maintenance or has reached the end of
407    // its life cycle (for preemptible nodes).
408    TERMINATED = 12;
409
410    // TPU node is currently hiding.
411    HIDING = 13;
412
413    // TPU node has been hidden.
414    HIDDEN = 14;
415
416    // TPU node is currently unhiding.
417    UNHIDING = 15;
418  }
419
420  // Health defines the status of a TPU node as reported by
421  // Health Monitor.
422  enum Health {
423    // Health status is unknown: not initialized or failed to retrieve.
424    HEALTH_UNSPECIFIED = 0;
425
426    // The resource is healthy.
427    HEALTHY = 1;
428
429    // The resource is unresponsive.
430    TIMEOUT = 3;
431
432    // The in-guest ML stack is unhealthy.
433    UNHEALTHY_TENSORFLOW = 4;
434
435    // The node is under maintenance/priority boost caused rescheduling and
436    // will resume running once rescheduled.
437    UNHEALTHY_MAINTENANCE = 5;
438  }
439
440  // TPU API Version.
441  enum ApiVersion {
442    // API version is unknown.
443    API_VERSION_UNSPECIFIED = 0;
444
445    // TPU API V1Alpha1 version.
446    V1_ALPHA1 = 1;
447
448    // TPU API V1 version.
449    V1 = 2;
450
451    // TPU API V2Alpha1 version.
452    V2_ALPHA1 = 3;
453  }
454
455  // Output only. Immutable. The name of the TPU.
456  string name = 1 [
457    (google.api.field_behavior) = OUTPUT_ONLY,
458    (google.api.field_behavior) = IMMUTABLE
459  ];
460
461  // The user-supplied description of the TPU. Maximum of 512 characters.
462  string description = 3;
463
464  // The type of hardware accelerators associated with this node.
465  string accelerator_type = 5;
466
467  // Output only. The current state for the TPU Node.
468  State state = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
469
470  // Output only. If this field is populated, it contains a description of why
471  // the TPU Node is unhealthy.
472  string health_description = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
473
474  // Required. The runtime version running in the Node.
475  string runtime_version = 11 [(google.api.field_behavior) = REQUIRED];
476
477  // Network configurations for the TPU node.
478  NetworkConfig network_config = 36;
479
480  // The CIDR block that the TPU node will use when selecting an IP address.
481  // This CIDR block must be a /29 block; the Compute Engine networks API
482  // forbids a smaller block, and using a larger block would be wasteful (a
483  // node can only consume one IP address). Errors will occur if the CIDR block
484  // has already been used for a currently existing TPU node, the CIDR block
485  // conflicts with any subnetworks in the user's provided network, or the
486  // provided network is peered with another network that is using that CIDR
487  // block.
488  string cidr_block = 13;
489
490  // The Google Cloud Platform Service Account to be used by the TPU node VMs.
491  // If None is specified, the default compute service account will be used.
492  ServiceAccount service_account = 37;
493
494  // Output only. The time when the node was created.
495  google.protobuf.Timestamp create_time = 16
496      [(google.api.field_behavior) = OUTPUT_ONLY];
497
498  // The scheduling options for this node.
499  SchedulingConfig scheduling_config = 17;
500
501  // Output only. The network endpoints where TPU workers can be accessed and
502  // sent work. It is recommended that runtime clients of the node reach out
503  // to the 0th entry in this map first.
504  repeated NetworkEndpoint network_endpoints = 21
505      [(google.api.field_behavior) = OUTPUT_ONLY];
506
507  // The health status of the TPU node.
508  Health health = 22;
509
510  // Resource labels to represent user-provided metadata.
511  map<string, string> labels = 24;
512
513  // Custom metadata to apply to the TPU Node.
514  // Can set startup-script and shutdown-script
515  map<string, string> metadata = 34;
516
517  // Tags to apply to the TPU Node. Tags are used to identify valid sources or
518  // targets for network firewalls.
519  repeated string tags = 40;
520
521  // Output only. The unique identifier for the TPU Node.
522  int64 id = 33 [(google.api.field_behavior) = OUTPUT_ONLY];
523
524  // The additional data disks for the Node.
525  repeated AttachedDisk data_disks = 41;
526
527  // Output only. The API version that created this Node.
528  ApiVersion api_version = 38 [(google.api.field_behavior) = OUTPUT_ONLY];
529
530  // Output only. The Symptoms that have occurred to the TPU Node.
531  repeated Symptom symptoms = 39 [(google.api.field_behavior) = OUTPUT_ONLY];
532
533  // Output only. The qualified name of the QueuedResource that requested this
534  // Node.
535  string queued_resource = 43 [(google.api.field_behavior) = OUTPUT_ONLY];
536
537  // The AccleratorConfig for the TPU Node.
538  AcceleratorConfig accelerator_config = 44;
539
540  // Shielded Instance options.
541  ShieldedInstanceConfig shielded_instance_config = 45;
542
543  // Output only. Whether the Node belongs to a Multislice group.
544  bool multislice_node = 47 [(google.api.field_behavior) = OUTPUT_ONLY];
545
546  // Optional. Boot disk configuration.
547  BootDiskConfig boot_disk_config = 49 [(google.api.field_behavior) = OPTIONAL];
548}
549
550// A QueuedResource represents a request for resources that will be placed
551// in a queue and fulfilled when the necessary resources are available.
552message QueuedResource {
553  option (google.api.resource) = {
554    type: "tpu.googleapis.com/QueuedResource"
555    pattern: "projects/{project}/locations/{location}/queuedResources/{queued_resource}"
556  };
557
558  // Details of the TPU resource(s) being requested.
559  message Tpu {
560    // Details of the TPU node(s) being requested. Users can request either a
561    // single node or multiple nodes.
562    // NodeSpec provides the specification for node(s) to be created.
563    message NodeSpec {
564      // Parameters to specify for multi-node QueuedResource requests. This
565      // field must be populated in case of multi-node requests instead of
566      // node_id. It's an error to specify both node_id and multi_node_params.
567      message MultiNodeParams {
568        // Required. Number of nodes with this spec. The system will attempt
569        // to provison "node_count" nodes as part of the request.
570        // This needs to be > 1.
571        int32 node_count = 1 [(google.api.field_behavior) = REQUIRED];
572
573        // Prefix of node_ids in case of multi-node request
574        // Should follow the `^[A-Za-z0-9_.~+%-]+$` regex format.
575        // If node_count = 3 and node_id_prefix = "np", node ids of nodes
576        // created will be "np-0", "np-1", "np-2". If this field is not
577        // provided we use queued_resource_id as the node_id_prefix.
578        string node_id_prefix = 2;
579      }
580
581      // Required. The parent resource name.
582      string parent = 1 [
583        (google.api.field_behavior) = REQUIRED,
584        (google.api.resource_reference) = {
585          type: "locations.googleapis.com/Location"
586        }
587      ];
588
589      // The unqualified resource name. Should follow the `^[A-Za-z0-9_.~+%-]+$`
590      // regex format. This is only specified when requesting a single node.
591      // In case of multi-node requests, multi_node_params must be populated
592      // instead. It's an error to specify both node_id and multi_node_params.
593      string node_id = 2;
594
595      // Optional. Fields to specify in case of multi-node request.
596      MultiNodeParams multi_node_params = 6
597          [(google.api.field_behavior) = OPTIONAL];
598
599      // Required. The node.
600      Node node = 3 [(google.api.field_behavior) = REQUIRED];
601    }
602
603    // The TPU node(s) being requested.
604    repeated NodeSpec node_spec = 1;
605  }
606
607  // BestEffort tier definition.
608  message BestEffort {}
609
610  // Spot tier definition.
611  message Spot {}
612
613  // Guaranteed tier definition.
614  message Guaranteed {
615    // Optional. Defines the minimum duration of the guarantee. If specified,
616    // the requested resources will only be provisioned if they can be
617    // allocated for at least the given duration.
618    google.protobuf.Duration min_duration = 1
619        [(google.api.field_behavior) = OPTIONAL];
620
621    // Optional. Specifies the request should be scheduled on reserved capacity.
622    bool reserved = 2 [(google.api.field_behavior) = OPTIONAL];
623  }
624
625  // Defines the policy of the QueuedRequest.
626  message QueueingPolicy {
627    // Time flexibility specification.
628    oneof start_timing_constraints {
629      // A relative time after which resources should not be created.
630      // If the request cannot be fulfilled by this time the request will be
631      // failed.
632      google.protobuf.Duration valid_until_duration = 1;
633
634      // An absolute time after which resources should not be created.
635      // If the request cannot be fulfilled by this time the request will be
636      // failed.
637      google.protobuf.Timestamp valid_until_time = 2;
638
639      // A relative time after which resources may be created.
640      google.protobuf.Duration valid_after_duration = 3;
641
642      // An absolute time at which resources may be created.
643      google.protobuf.Timestamp valid_after_time = 4;
644
645      // An absolute time interval within which resources may be created.
646      google.type.Interval valid_interval = 5;
647    }
648  }
649
650  // Output only. Immutable. The name of the QueuedResource.
651  string name = 1 [
652    (google.api.field_behavior) = OUTPUT_ONLY,
653    (google.api.field_behavior) = IMMUTABLE
654  ];
655
656  // Resource specification.
657  oneof resource {
658    // Defines a TPU resource.
659    Tpu tpu = 2;
660  }
661
662  // Tier specifies the required tier.
663  oneof tier {
664    // The BestEffort tier.
665    BestEffort best_effort = 3;
666
667    // The Guaranteed tier.
668    Guaranteed guaranteed = 4;
669
670    // Optional. The Spot tier.
671    Spot spot = 9 [(google.api.field_behavior) = OPTIONAL];
672  }
673
674  // The queueing policy of the QueuedRequest.
675  QueueingPolicy queueing_policy = 5;
676
677  // Output only. State of the QueuedResource request.
678  QueuedResourceState state = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
679
680  // Name of the reservation in which the resource should be provisioned.
681  // Format: projects/{project}/locations/{zone}/reservations/{reservation}
682  string reservation_name = 8;
683}
684
685// QueuedResourceState defines the details of the QueuedResource request.
686message QueuedResourceState {
687  // Output only state of the request
688  enum State {
689    // State of the QueuedResource request is not known/set.
690    STATE_UNSPECIFIED = 0;
691
692    // The QueuedResource request has been received. We're still working on
693    // determining if we will be able to honor this request.
694    CREATING = 1;
695
696    // The QueuedResource request has passed initial validation/admission
697    // control and has been persisted in the queue.
698    ACCEPTED = 2;
699
700    // The QueuedResource request has been selected. The
701    // associated resources are currently being provisioned (or very soon
702    // will begin provisioning).
703    PROVISIONING = 3;
704
705    // The request could not be completed. This may be due to some
706    // late-discovered problem with the request itself, or due to
707    // unavailability of resources within the constraints of the request
708    // (e.g., the 'valid until' start timing constraint expired).
709    FAILED = 4;
710
711    // The QueuedResource is being deleted.
712    DELETING = 5;
713
714    // The resources specified in the QueuedResource request have been
715    // provisioned and are ready for use by the end-user/consumer.
716    ACTIVE = 6;
717
718    // The resources specified in the QueuedResource request are being
719    // deleted. This may have been initiated by the user, or
720    // the Cloud TPU service. Inspect the state data for more details.
721    SUSPENDING = 7;
722
723    // The resources specified in the QueuedResource request have been
724    // deleted.
725    SUSPENDED = 8;
726
727    // The QueuedResource request has passed initial validation and has been
728    // persisted in the queue. It will remain in this state until there are
729    // sufficient free resources to begin provisioning your request. Wait times
730    // will vary significantly depending on demand levels. When demand is high,
731    // not all requests can be immediately provisioned. If you
732    // need more reliable obtainability of TPUs consider purchasing a
733    // reservation. To put a limit on how long you are willing to wait, use
734    // [timing
735    // constraints](https://cloud.google.com/tpu/docs/queued-resources#request_a_queued_resource_before_a_specified_time).
736    WAITING_FOR_RESOURCES = 9;
737  }
738
739  // Further data for the creating state.
740  message CreatingData {}
741
742  // Further data for the accepted state.
743  message AcceptedData {}
744
745  // Further data for the provisioning state.
746  message ProvisioningData {}
747
748  // Further data for the failed state.
749  message FailedData {
750    // The error that caused the queued resource to enter the FAILED state.
751    google.rpc.Status error = 1;
752  }
753
754  // Further data for the deleting state.
755  message DeletingData {}
756
757  // Further data for the active state.
758  message ActiveData {}
759
760  // Further data for the suspending state.
761  message SuspendingData {}
762
763  // Further data for the suspended state.
764  message SuspendedData {}
765
766  // The initiator of the QueuedResource's SUSPENDING/SUSPENDED state.
767  enum StateInitiator {
768    // The state initiator is unspecified.
769    STATE_INITIATOR_UNSPECIFIED = 0;
770
771    // The current QueuedResource state was initiated by the user.
772    USER = 1;
773
774    // The current QueuedResource state was initiated by the service.
775    SERVICE = 2;
776  }
777
778  // State of the QueuedResource request.
779  State state = 1;
780
781  // Further data for the state.
782  oneof state_data {
783    // Further data for the creating state.
784    CreatingData creating_data = 2;
785
786    // Further data for the accepted state.
787    AcceptedData accepted_data = 3;
788
789    // Further data for the provisioning state.
790    ProvisioningData provisioning_data = 4;
791
792    // Further data for the failed state.
793    FailedData failed_data = 5;
794
795    // Further data for the deleting state.
796    DeletingData deleting_data = 6;
797
798    // Further data for the active state.
799    ActiveData active_data = 7;
800
801    // Further data for the suspending state.
802    SuspendingData suspending_data = 8;
803
804    // Further data for the suspended state.
805    SuspendedData suspended_data = 9;
806  }
807
808  // Output only. The initiator of the QueuedResources's current state.
809  StateInitiator state_initiator = 10
810      [(google.api.field_behavior) = OUTPUT_ONLY];
811}
812
813// Request for [ListNodes][google.cloud.tpu.v2alpha1.Tpu.ListNodes].
814message ListNodesRequest {
815  // Required. The parent resource name.
816  string parent = 1 [
817    (google.api.field_behavior) = REQUIRED,
818    (google.api.resource_reference) = { child_type: "tpu.googleapis.com/Node" }
819  ];
820
821  // The maximum number of items to return.
822  int32 page_size = 2;
823
824  // The next_page_token value returned from a previous List request, if any.
825  string page_token = 3;
826}
827
828// Response for [ListNodes][google.cloud.tpu.v2alpha1.Tpu.ListNodes].
829message ListNodesResponse {
830  // The listed nodes.
831  repeated Node nodes = 1;
832
833  // The next page token or empty if none.
834  string next_page_token = 2;
835
836  // Locations that could not be reached.
837  repeated string unreachable = 3;
838}
839
840// Request for [GetNode][google.cloud.tpu.v2alpha1.Tpu.GetNode].
841message GetNodeRequest {
842  // Required. The resource name.
843  string name = 1 [
844    (google.api.field_behavior) = REQUIRED,
845    (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" }
846  ];
847}
848
849// Request for [CreateNode][google.cloud.tpu.v2alpha1.Tpu.CreateNode].
850message CreateNodeRequest {
851  // Required. The parent resource name.
852  string parent = 1 [
853    (google.api.field_behavior) = REQUIRED,
854    (google.api.resource_reference) = {
855      type: "locations.googleapis.com/Location"
856    }
857  ];
858
859  // The unqualified resource name.
860  string node_id = 2;
861
862  // Required. The node.
863  Node node = 3 [(google.api.field_behavior) = REQUIRED];
864
865  // Idempotent request UUID.
866  string request_id = 6;
867}
868
869// Request for [DeleteNode][google.cloud.tpu.v2alpha1.Tpu.DeleteNode].
870message DeleteNodeRequest {
871  // Required. The resource name.
872  string name = 1 [
873    (google.api.field_behavior) = REQUIRED,
874    (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" }
875  ];
876
877  // Idempotent request UUID.
878  string request_id = 3;
879}
880
881// Request for [StopNode][google.cloud.tpu.v2alpha1.Tpu.StopNode].
882message StopNodeRequest {
883  // Required. The resource name.
884  string name = 1 [
885    (google.api.field_behavior) = REQUIRED,
886    (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" }
887  ];
888}
889
890// Request for [StartNode][google.cloud.tpu.v2alpha1.Tpu.StartNode].
891message StartNodeRequest {
892  // Required. The resource name.
893  string name = 1 [
894    (google.api.field_behavior) = REQUIRED,
895    (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" }
896  ];
897}
898
899// Request for [UpdateNode][google.cloud.tpu.v2alpha1.Tpu.UpdateNode].
900message UpdateNodeRequest {
901  // Required. Mask of fields from [Node][Tpu.Node] to update.
902  // Supported fields: [description, tags, labels, metadata,
903  // network_config.enable_external_ips].
904  google.protobuf.FieldMask update_mask = 1
905      [(google.api.field_behavior) = REQUIRED];
906
907  // Required. The node. Only fields specified in update_mask are updated.
908  Node node = 2 [(google.api.field_behavior) = REQUIRED];
909}
910
911// Request for
912// [ListQueuedResources][google.cloud.tpu.v2alpha1.Tpu.ListQueuedResources].
913message ListQueuedResourcesRequest {
914  // Required. The parent resource name.
915  string parent = 1 [
916    (google.api.field_behavior) = REQUIRED,
917    (google.api.resource_reference) = {
918      child_type: "tpu.googleapis.com/QueuedResource"
919    }
920  ];
921
922  // The maximum number of items to return.
923  int32 page_size = 2;
924
925  // The next_page_token value returned from a previous List request, if any.
926  string page_token = 3;
927}
928
929// Response for
930// [ListQueuedResources][google.cloud.tpu.v2alpha1.Tpu.ListQueuedResources].
931message ListQueuedResourcesResponse {
932  // The listed queued resources.
933  repeated QueuedResource queued_resources = 1;
934
935  // The next page token or empty if none.
936  string next_page_token = 2;
937
938  // Locations that could not be reached.
939  repeated string unreachable = 3;
940}
941
942// Request for
943// [GetQueuedResource][google.cloud.tpu.v2alpha1.Tpu.GetQueuedResource]
944message GetQueuedResourceRequest {
945  // Required. The resource name.
946  string name = 1 [
947    (google.api.field_behavior) = REQUIRED,
948    (google.api.resource_reference) = {
949      type: "tpu.googleapis.com/QueuedResource"
950    }
951  ];
952}
953
954// Request for
955// [CreateQueuedResource][google.cloud.tpu.v2alpha1.Tpu.CreateQueuedResource].
956message CreateQueuedResourceRequest {
957  // Required. The parent resource name.
958  string parent = 1 [
959    (google.api.field_behavior) = REQUIRED,
960    (google.api.resource_reference) = {
961      type: "locations.googleapis.com/Location"
962    }
963  ];
964
965  // The unqualified resource name. Should follow the `^[A-Za-z0-9_.~+%-]+$`
966  // regex format.
967  string queued_resource_id = 2;
968
969  // Required. The queued resource.
970  QueuedResource queued_resource = 3 [(google.api.field_behavior) = REQUIRED];
971
972  // Idempotent request UUID.
973  string request_id = 4;
974}
975
976// Request for
977// [DeleteQueuedResource][google.cloud.tpu.v2alpha1.Tpu.DeleteQueuedResource].
978message DeleteQueuedResourceRequest {
979  // Required. The resource name.
980  string name = 1 [
981    (google.api.field_behavior) = REQUIRED,
982    (google.api.resource_reference) = {
983      type: "tpu.googleapis.com/QueuedResource"
984    }
985  ];
986
987  // Idempotent request UUID.
988  string request_id = 2;
989
990  // If set to true, all running nodes belonging to this queued resource will
991  // be deleted first and then the queued resource will be deleted.
992  // Otherwise (i.e. force=false), the queued resource will only be deleted if
993  // its nodes have already been deleted or the queued resource is in the
994  // ACCEPTED, FAILED, or SUSPENDED state.
995  bool force = 3;
996}
997
998// Request for
999// [ResetQueuedResource][google.cloud.tpu.v2alpha1.Tpu.ResetQueuedResource].
1000message ResetQueuedResourceRequest {
1001  // Required. The name of the queued resource.
1002  string name = 1 [
1003    (google.api.field_behavior) = REQUIRED,
1004    (google.api.resource_reference) = {
1005      type: "tpu.googleapis.com/QueuedResource"
1006    }
1007  ];
1008}
1009
1010// The per-product per-project service identity for Cloud TPU service.
1011message ServiceIdentity {
1012  // The email address of the service identity.
1013  string email = 1;
1014}
1015
1016// Request for
1017// [GenerateServiceIdentity][google.cloud.tpu.v2alpha1.Tpu.GenerateServiceIdentity].
1018message GenerateServiceIdentityRequest {
1019  // Required. The parent resource name.
1020  string parent = 1 [
1021    (google.api.field_behavior) = REQUIRED,
1022    (google.api.resource_reference) = {
1023      type: "locations.googleapis.com/Location"
1024    }
1025  ];
1026}
1027
1028// Response for
1029// [GenerateServiceIdentity][google.cloud.tpu.v2alpha1.Tpu.GenerateServiceIdentity].
1030message GenerateServiceIdentityResponse {
1031  // ServiceIdentity that was created or retrieved.
1032  ServiceIdentity identity = 1;
1033}
1034
1035// A accelerator type that a Node can be configured with.
1036message AcceleratorType {
1037  option (google.api.resource) = {
1038    type: "tpu.googleapis.com/AcceleratorType"
1039    pattern: "projects/{project}/locations/{location}/acceleratorTypes/{accelerator_type}"
1040  };
1041
1042  // The resource name.
1043  string name = 1;
1044
1045  // The accelerator type.
1046  string type = 2;
1047
1048  // The accelerator config.
1049  repeated AcceleratorConfig accelerator_configs = 3;
1050}
1051
1052// Request for
1053// [GetAcceleratorType][google.cloud.tpu.v2alpha1.Tpu.GetAcceleratorType].
1054message GetAcceleratorTypeRequest {
1055  // Required. The resource name.
1056  string name = 1 [
1057    (google.api.field_behavior) = REQUIRED,
1058    (google.api.resource_reference) = {
1059      type: "tpu.googleapis.com/AcceleratorType"
1060    }
1061  ];
1062}
1063
1064// Request for
1065// [ListAcceleratorTypes][google.cloud.tpu.v2alpha1.Tpu.ListAcceleratorTypes].
1066message ListAcceleratorTypesRequest {
1067  // Required. The parent resource name.
1068  string parent = 1 [
1069    (google.api.field_behavior) = REQUIRED,
1070    (google.api.resource_reference) = {
1071      child_type: "tpu.googleapis.com/AcceleratorType"
1072    }
1073  ];
1074
1075  // The maximum number of items to return.
1076  int32 page_size = 2;
1077
1078  // The next_page_token value returned from a previous List request, if any.
1079  string page_token = 3;
1080
1081  // List filter.
1082  string filter = 5;
1083
1084  // Sort results.
1085  string order_by = 6;
1086}
1087
1088// Response for
1089// [ListAcceleratorTypes][google.cloud.tpu.v2alpha1.Tpu.ListAcceleratorTypes].
1090message ListAcceleratorTypesResponse {
1091  // The listed nodes.
1092  repeated AcceleratorType accelerator_types = 1;
1093
1094  // The next page token or empty if none.
1095  string next_page_token = 2;
1096
1097  // Locations that could not be reached.
1098  repeated string unreachable = 3;
1099}
1100
1101// A runtime version that a Node can be configured with.
1102message RuntimeVersion {
1103  option (google.api.resource) = {
1104    type: "tpu.googleapis.com/RuntimeVersion"
1105    pattern: "projects/{project}/locations/{location}/runtimeVersions/{runtime_version}"
1106  };
1107
1108  // The resource name.
1109  string name = 1;
1110
1111  // The runtime version.
1112  string version = 2;
1113}
1114
1115// Request for
1116// [GetRuntimeVersion][google.cloud.tpu.v2alpha1.Tpu.GetRuntimeVersion].
1117message GetRuntimeVersionRequest {
1118  // Required. The resource name.
1119  string name = 1 [
1120    (google.api.field_behavior) = REQUIRED,
1121    (google.api.resource_reference) = {
1122      type: "tpu.googleapis.com/RuntimeVersion"
1123    }
1124  ];
1125}
1126
1127// Request for
1128// [ListRuntimeVersions][google.cloud.tpu.v2alpha1.Tpu.ListRuntimeVersions].
1129message ListRuntimeVersionsRequest {
1130  // Required. The parent resource name.
1131  string parent = 1 [
1132    (google.api.field_behavior) = REQUIRED,
1133    (google.api.resource_reference) = {
1134      child_type: "tpu.googleapis.com/RuntimeVersion"
1135    }
1136  ];
1137
1138  // The maximum number of items to return.
1139  int32 page_size = 2;
1140
1141  // The next_page_token value returned from a previous List request, if any.
1142  string page_token = 3;
1143
1144  // List filter.
1145  string filter = 5;
1146
1147  // Sort results.
1148  string order_by = 6;
1149}
1150
1151// Response for
1152// [ListRuntimeVersions][google.cloud.tpu.v2alpha1.Tpu.ListRuntimeVersions].
1153message ListRuntimeVersionsResponse {
1154  // The listed nodes.
1155  repeated RuntimeVersion runtime_versions = 1;
1156
1157  // The next page token or empty if none.
1158  string next_page_token = 2;
1159
1160  // Locations that could not be reached.
1161  repeated string unreachable = 3;
1162}
1163
1164// Metadata describing an [Operation][google.longrunning.Operation]
1165message OperationMetadata {
1166  // The time the operation was created.
1167  google.protobuf.Timestamp create_time = 1;
1168
1169  // The time the operation finished running.
1170  google.protobuf.Timestamp end_time = 2;
1171
1172  // Target of the operation - for example
1173  // projects/project-1/connectivityTests/test-1
1174  string target = 3;
1175
1176  // Name of the verb executed by the operation.
1177  string verb = 4;
1178
1179  // Human-readable status of the operation, if any.
1180  string status_detail = 5;
1181
1182  // Specifies if cancellation was requested for the operation.
1183  bool cancel_requested = 6;
1184
1185  // API version.
1186  string api_version = 7;
1187}
1188
1189// A Symptom instance.
1190message Symptom {
1191  // SymptomType represents the different types of Symptoms that a TPU can be
1192  // at.
1193  enum SymptomType {
1194    // Unspecified symptom.
1195    SYMPTOM_TYPE_UNSPECIFIED = 0;
1196
1197    // TPU VM memory is low.
1198    LOW_MEMORY = 1;
1199
1200    // TPU runtime is out of memory.
1201    OUT_OF_MEMORY = 2;
1202
1203    // TPU runtime execution has timed out.
1204    EXECUTE_TIMED_OUT = 3;
1205
1206    // TPU runtime fails to construct a mesh that recognizes each TPU device's
1207    // neighbors.
1208    MESH_BUILD_FAIL = 4;
1209
1210    // TPU HBM is out of memory.
1211    HBM_OUT_OF_MEMORY = 5;
1212
1213    // Abusive behaviors have been identified on the current project.
1214    PROJECT_ABUSE = 6;
1215  }
1216
1217  // Timestamp when the Symptom is created.
1218  google.protobuf.Timestamp create_time = 1;
1219
1220  // Type of the Symptom.
1221  SymptomType symptom_type = 2;
1222
1223  // Detailed information of the current Symptom.
1224  string details = 3;
1225
1226  // A string used to uniquely distinguish a worker within a TPU node.
1227  string worker_id = 4;
1228}
1229
1230// Request for
1231// [GetGuestAttributes][google.cloud.tpu.v2alpha1.Tpu.GetGuestAttributes].
1232message GetGuestAttributesRequest {
1233  // Required. The resource name.
1234  string name = 1 [
1235    (google.api.field_behavior) = REQUIRED,
1236    (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" }
1237  ];
1238
1239  // The guest attributes path to be queried.
1240  string query_path = 2;
1241
1242  // The 0-based worker ID. If it is empty, all workers' GuestAttributes will be
1243  // returned.
1244  repeated string worker_ids = 3;
1245}
1246
1247// Response for
1248// [GetGuestAttributes][google.cloud.tpu.v2alpha1.Tpu.GetGuestAttributes].
1249message GetGuestAttributesResponse {
1250  // The guest attributes for the TPU workers.
1251  repeated GuestAttributes guest_attributes = 1;
1252}
1253
1254// Request for
1255// [SimulateMaintenanceEvent][google.cloud.tpu.v2alpha1.Tpu.SimulateMaintenanceEvent].
1256message SimulateMaintenanceEventRequest {
1257  // Required. The resource name.
1258  string name = 1 [
1259    (google.api.field_behavior) = REQUIRED,
1260    (google.api.resource_reference) = { type: "tpu.googleapis.com/Node" }
1261  ];
1262
1263  // The 0-based worker ID. If it is empty, worker ID 0 will be selected for
1264  // maintenance event simulation. A maintenance event will only be fired on the
1265  // first specified worker ID. Future implementations may support firing on
1266  // multiple workers.
1267  repeated string worker_ids = 2;
1268}
1269
1270// A TPU accelerator configuration.
1271message AcceleratorConfig {
1272  // TPU type.
1273  enum Type {
1274    // Unspecified version.
1275    TYPE_UNSPECIFIED = 0;
1276
1277    // TPU v2.
1278    V2 = 2;
1279
1280    // TPU v3.
1281    V3 = 4;
1282
1283    // TPU v4.
1284    V4 = 7;
1285  }
1286
1287  // Required. Type of TPU.
1288  Type type = 1 [(google.api.field_behavior) = REQUIRED];
1289
1290  // Required. Topology of TPU in chips.
1291  string topology = 2 [(google.api.field_behavior) = REQUIRED];
1292}
1293
1294// A set of Shielded Instance options.
1295message ShieldedInstanceConfig {
1296  // Defines whether the instance has Secure Boot enabled.
1297  bool enable_secure_boot = 1;
1298}
1299
1300// Boot disk configurations.
1301message BootDiskConfig {
1302  // Optional. Customer encryption key for boot disk.
1303  CustomerEncryptionKey customer_encryption_key = 1
1304      [(google.api.field_behavior) = OPTIONAL];
1305
1306  // Optional. Whether the boot disk will be created with confidential compute
1307  // mode.
1308  bool enable_confidential_compute = 2 [(google.api.field_behavior) = OPTIONAL];
1309}
1310
1311// Customer's encryption key.
1312message CustomerEncryptionKey {
1313  oneof key {
1314    // The name of the encryption key that is stored in Google Cloud KMS.
1315    // For example:
1316    // <pre class="lang-html">"kmsKeyName": "projects/
1317    // <var class="apiparam">kms_project_id</var>/locations/
1318    // <var class="apiparam">region</var>/keyRings/<var class="apiparam">
1319    // key_region</var>/cryptoKeys/<var class="apiparam">key</var>
1320    // </pre>
1321    // The fully-qualifed key name may be returned for resource GET requests.
1322    // For example:
1323    // <pre class="lang-html">"kmsKeyName": "projects/
1324    // <var class="apiparam">kms_project_id</var>/locations/
1325    // <var class="apiparam">region</var>/keyRings/<var class="apiparam">
1326    // key_region</var>/cryptoKeys/<var class="apiparam">key</var>
1327    // /cryptoKeyVersions/1</pre>
1328    string kms_key_name = 7;
1329  }
1330}
1331