xref: /aosp_15_r20/external/googleapis/google/cloud/batch/v1/job.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.batch.v1;
18
19import "google/api/field_behavior.proto";
20import "google/api/resource.proto";
21import "google/cloud/batch/v1/task.proto";
22import "google/protobuf/duration.proto";
23import "google/protobuf/timestamp.proto";
24
25option csharp_namespace = "Google.Cloud.Batch.V1";
26option go_package = "cloud.google.com/go/batch/apiv1/batchpb;batchpb";
27option java_multiple_files = true;
28option java_outer_classname = "JobProto";
29option java_package = "com.google.cloud.batch.v1";
30option objc_class_prefix = "GCB";
31option php_namespace = "Google\\Cloud\\Batch\\V1";
32option ruby_package = "Google::Cloud::Batch::V1";
33
34// The Cloud Batch Job description.
35message Job {
36  option (google.api.resource) = {
37    type: "batch.googleapis.com/Job"
38    pattern: "projects/{project}/locations/{location}/jobs/{job}"
39  };
40
41  // Output only. Job name.
42  // For example: "projects/123456/locations/us-central1/jobs/job01".
43  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
44
45  // Output only. A system generated unique ID for the Job.
46  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
47
48  // Priority of the Job.
49  // The valid value range is [0, 100). Default value is 0.
50  // Higher value indicates higher priority.
51  // A job with higher priority value is more likely to run earlier if all other
52  // requirements are satisfied.
53  int64 priority = 3;
54
55  // Required. TaskGroups in the Job. Only one TaskGroup is supported now.
56  repeated TaskGroup task_groups = 4 [(google.api.field_behavior) = REQUIRED];
57
58  // Compute resource allocation for all TaskGroups in the Job.
59  AllocationPolicy allocation_policy = 7;
60
61  // Labels for the Job. Labels could be user provided or system generated.
62  // For example,
63  // "labels": {
64  //    "department": "finance",
65  //    "environment": "test"
66  //  }
67  // You can assign up to 64 labels.  [Google Compute Engine label
68  // restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
69  // apply.
70  // Label names that start with "goog-" or "google-" are reserved.
71  map<string, string> labels = 8;
72
73  // Output only. Job status. It is read only for users.
74  JobStatus status = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
75
76  // Output only. When the Job was created.
77  google.protobuf.Timestamp create_time = 11
78      [(google.api.field_behavior) = OUTPUT_ONLY];
79
80  // Output only. The last time the Job was updated.
81  google.protobuf.Timestamp update_time = 12
82      [(google.api.field_behavior) = OUTPUT_ONLY];
83
84  // Log preservation policy for the Job.
85  LogsPolicy logs_policy = 13;
86
87  // Notification configurations.
88  repeated JobNotification notifications = 14;
89}
90
91// LogsPolicy describes how outputs from a Job's Tasks (stdout/stderr) will be
92// preserved.
93message LogsPolicy {
94  // `CloudLoggingOption` contains additional settings for Cloud Logging logs
95  // generated by Batch job.
96  message CloudLoggingOption {
97    // Optional. Set this flag to true to change the [monitored resource
98    // type](https://cloud.google.com/monitoring/api/resources) for
99    // Cloud Logging logs generated by this Batch job from
100    // the
101    // [`batch.googleapis.com/Job`](https://cloud.google.com/monitoring/api/resources#tag_batch.googleapis.com/Job)
102    // type to the formerly used
103    // [`generic_task`](https://cloud.google.com/monitoring/api/resources#tag_generic_task)
104    // type.
105    bool use_generic_task_monitored_resource = 1
106        [(google.api.field_behavior) = OPTIONAL];
107  }
108
109  // The destination (if any) for logs.
110  enum Destination {
111    // Logs are not preserved.
112    DESTINATION_UNSPECIFIED = 0;
113
114    // Logs are streamed to Cloud Logging.
115    CLOUD_LOGGING = 1;
116
117    // Logs are saved to a file path.
118    PATH = 2;
119  }
120
121  // Where logs should be saved.
122  Destination destination = 1;
123
124  // The path to which logs are saved when the destination = PATH. This can be a
125  // local file path on the VM, or under the mount point of a Persistent Disk or
126  // Filestore, or a Cloud Storage path.
127  string logs_path = 2;
128
129  // Optional. Additional settings for Cloud Logging. It will only take effect
130  // when the destination of `LogsPolicy` is set to `CLOUD_LOGGING`.
131  CloudLoggingOption cloud_logging_option = 3
132      [(google.api.field_behavior) = OPTIONAL];
133}
134
135// Job status.
136message JobStatus {
137  // VM instance status.
138  message InstanceStatus {
139    // The Compute Engine machine type.
140    string machine_type = 1;
141
142    // The VM instance provisioning model.
143    AllocationPolicy.ProvisioningModel provisioning_model = 2;
144
145    // The max number of tasks can be assigned to this instance type.
146    int64 task_pack = 3;
147
148    // The VM boot disk.
149    AllocationPolicy.Disk boot_disk = 4;
150  }
151
152  // Aggregated task status for a TaskGroup.
153  message TaskGroupStatus {
154    // Count of task in each state in the TaskGroup.
155    // The map key is task state name.
156    map<string, int64> counts = 1;
157
158    // Status of instances allocated for the TaskGroup.
159    repeated InstanceStatus instances = 2;
160  }
161
162  // Valid Job states.
163  enum State {
164    // Job state unspecified.
165    STATE_UNSPECIFIED = 0;
166
167    // Job is admitted (validated and persisted) and waiting for resources.
168    QUEUED = 1;
169
170    // Job is scheduled to run as soon as resource allocation is ready.
171    // The resource allocation may happen at a later time but with a high
172    // chance to succeed.
173    SCHEDULED = 2;
174
175    // Resource allocation has been successful. At least one Task in the Job is
176    // RUNNING.
177    RUNNING = 3;
178
179    // All Tasks in the Job have finished successfully.
180    SUCCEEDED = 4;
181
182    // At least one Task in the Job has failed.
183    FAILED = 5;
184
185    // The Job will be deleted, but has not been deleted yet. Typically this is
186    // because resources used by the Job are still being cleaned up.
187    DELETION_IN_PROGRESS = 6;
188  }
189
190  // Job state
191  State state = 1;
192
193  // Job status events
194  repeated StatusEvent status_events = 2;
195
196  // Aggregated task status for each TaskGroup in the Job.
197  // The map key is TaskGroup ID.
198  map<string, TaskGroupStatus> task_groups = 4;
199
200  // The duration of time that the Job spent in status RUNNING.
201  google.protobuf.Duration run_duration = 5;
202}
203
204// Notification configurations.
205message JobNotification {
206  // Message details.
207  // Describe the conditions under which messages will be sent.
208  // If no attribute is defined, no message will be sent by default.
209  // One message should specify either the job or the task level attributes,
210  // but not both. For example,
211  // job level: JOB_STATE_CHANGED and/or a specified new_job_state;
212  // task level: TASK_STATE_CHANGED and/or a specified new_task_state.
213  message Message {
214    // The message type.
215    Type type = 1;
216
217    // The new job state.
218    JobStatus.State new_job_state = 2;
219
220    // The new task state.
221    TaskStatus.State new_task_state = 3;
222  }
223
224  // The message type.
225  enum Type {
226    // Unspecified.
227    TYPE_UNSPECIFIED = 0;
228
229    // Notify users that the job state has changed.
230    JOB_STATE_CHANGED = 1;
231
232    // Notify users that the task state has changed.
233    TASK_STATE_CHANGED = 2;
234  }
235
236  // The Pub/Sub topic where notifications like the job state changes
237  // will be published. The topic must exist in the same project as
238  // the job and billings will be charged to this project.
239  // If not specified, no Pub/Sub messages will be sent.
240  // Topic format: `projects/{project}/topics/{topic}`.
241  string pubsub_topic = 1;
242
243  // The attribute requirements of messages to be sent to this Pub/Sub topic.
244  // Without this field, no message will be sent.
245  Message message = 2;
246}
247
248// A Job's resource allocation policy describes when, where, and how compute
249// resources should be allocated for the Job.
250message AllocationPolicy {
251  message LocationPolicy {
252    // A list of allowed location names represented by internal URLs.
253    //
254    // Each location can be a region or a zone.
255    // Only one region or multiple zones in one region is supported now.
256    // For example,
257    // ["regions/us-central1"] allow VMs in any zones in region us-central1.
258    // ["zones/us-central1-a", "zones/us-central1-c"] only allow VMs
259    // in zones us-central1-a and us-central1-c.
260    //
261    // All locations end up in different regions would cause errors.
262    // For example,
263    // ["regions/us-central1", "zones/us-central1-a", "zones/us-central1-b",
264    // "zones/us-west1-a"] contains 2 regions "us-central1" and
265    // "us-west1". An error is expected in this case.
266    repeated string allowed_locations = 1;
267  }
268
269  // A new persistent disk or a local ssd.
270  // A VM can only have one local SSD setting but multiple local SSD partitions.
271  // See https://cloud.google.com/compute/docs/disks#pdspecs and
272  // https://cloud.google.com/compute/docs/disks#localssds.
273  message Disk {
274    // A data source from which a PD will be created.
275    oneof data_source {
276      // URL for a VM image to use as the data source for this disk.
277      // For example, the following are all valid URLs:
278      //
279      // * Specify the image by its family name:
280      // projects/{project}/global/images/family/{image_family}
281      // * Specify the image version:
282      // projects/{project}/global/images/{image_version}
283      //
284      // You can also use Batch customized image in short names.
285      // The following image values are supported for a boot disk:
286      //
287      // * `batch-debian`: use Batch Debian images.
288      // * `batch-centos`: use Batch CentOS images.
289      // * `batch-cos`: use Batch Container-Optimized images.
290      // * `batch-hpc-centos`: use Batch HPC CentOS images.
291      // * `batch-hpc-rocky`: use Batch HPC Rocky Linux images.
292      string image = 4;
293
294      // Name of a snapshot used as the data source.
295      // Snapshot is not supported as boot disk now.
296      string snapshot = 5;
297    }
298
299    // Disk type as shown in `gcloud compute disk-types list`.
300    // For example, local SSD uses type "local-ssd".
301    // Persistent disks and boot disks use "pd-balanced", "pd-extreme", "pd-ssd"
302    // or "pd-standard".
303    string type = 1;
304
305    // Disk size in GB.
306    //
307    // **Non-Boot Disk**:
308    // If the `type` specifies a persistent disk, this field
309    // is ignored if `data_source` is set as `image` or `snapshot`.
310    // If the `type` specifies a local SSD, this field should be a multiple of
311    // 375 GB, otherwise, the final size will be the next greater multiple of
312    // 375 GB.
313    //
314    // **Boot Disk**:
315    // Batch will calculate the boot disk size based on source
316    // image and task requirements if you do not speicify the size.
317    // If both this field and the `boot_disk_mib` field in task spec's
318    // `compute_resource` are defined, Batch will only honor this field.
319    // Also, this field should be no smaller than the source disk's
320    // size when the `data_source` is set as `snapshot` or `image`.
321    // For example, if you set an image as the `data_source` field and the
322    // image's default disk size 30 GB, you can only use this field to make the
323    // disk larger or equal to 30 GB.
324    int64 size_gb = 2;
325
326    // Local SSDs are available through both "SCSI" and "NVMe" interfaces.
327    // If not indicated, "NVMe" will be the default one for local ssds.
328    // This field is ignored for persistent disks as the interface is chosen
329    // automatically. See
330    // https://cloud.google.com/compute/docs/disks/persistent-disks#choose_an_interface.
331    string disk_interface = 6;
332  }
333
334  // A new or an existing persistent disk (PD) or a local ssd attached to a VM
335  // instance.
336  message AttachedDisk {
337    oneof attached {
338      Disk new_disk = 1;
339
340      // Name of an existing PD.
341      string existing_disk = 2;
342    }
343
344    // Device name that the guest operating system will see.
345    // It is used by Runnable.volumes field to mount disks. So please specify
346    // the device_name if you want Batch to help mount the disk, and it should
347    // match the device_name field in volumes.
348    string device_name = 3;
349  }
350
351  // Accelerator describes Compute Engine accelerators to be attached to the VM.
352  message Accelerator {
353    // The accelerator type. For example, "nvidia-tesla-t4".
354    // See `gcloud compute accelerator-types list`.
355    string type = 1;
356
357    // The number of accelerators of this type.
358    int64 count = 2;
359
360    // Deprecated: please use instances[0].install_gpu_drivers instead.
361    bool install_gpu_drivers = 3 [deprecated = true];
362
363    // Optional. The NVIDIA GPU driver version that should be installed for this
364    // type.
365    //
366    // You can define the specific driver version such as "470.103.01",
367    // following the driver version requirements in
368    // https://cloud.google.com/compute/docs/gpus/install-drivers-gpu#minimum-driver.
369    // Batch will install the specific accelerator driver if qualified.
370    string driver_version = 4 [(google.api.field_behavior) = OPTIONAL];
371  }
372
373  // InstancePolicy describes an instance type and resources attached to each VM
374  // created by this InstancePolicy.
375  message InstancePolicy {
376    // The Compute Engine machine type.
377    string machine_type = 2;
378
379    // The minimum CPU platform.
380    // See
381    // https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform.
382    string min_cpu_platform = 3;
383
384    // The provisioning model.
385    ProvisioningModel provisioning_model = 4;
386
387    // The accelerators attached to each VM instance.
388    repeated Accelerator accelerators = 5;
389
390    // Boot disk to be created and attached to each VM by this InstancePolicy.
391    // Boot disk will be deleted when the VM is deleted.
392    // Batch API now only supports booting from image.
393    Disk boot_disk = 8;
394
395    // Non-boot disks to be attached for each VM created by this InstancePolicy.
396    // New disks will be deleted when the VM is deleted.
397    // A non-boot disk is a disk that can be of a device with a
398    // file system or a raw storage drive that is not ready for data
399    // storage and accessing.
400    repeated AttachedDisk disks = 6;
401
402    // Optional. If specified, VMs will consume only the specified reservation.
403    // If not specified (default), VMs will consume any applicable reservation.
404    string reservation = 7 [(google.api.field_behavior) = OPTIONAL];
405  }
406
407  // InstancePolicyOrTemplate lets you define the type of resources to use for
408  // this job either with an InstancePolicy or an instance template.
409  // If undefined, Batch picks the type of VM to use and doesn't include
410  // optional VM resources such as GPUs and extra disks.
411  message InstancePolicyOrTemplate {
412    oneof policy_template {
413      // InstancePolicy.
414      InstancePolicy policy = 1;
415
416      // Name of an instance template used to create VMs.
417      // Named the field as 'instance_template' instead of 'template' to avoid
418      // c++ keyword conflict.
419      string instance_template = 2;
420    }
421
422    // Set this field true if users want Batch to help fetch drivers from a
423    // third party location and install them for GPUs specified in
424    // policy.accelerators or instance_template on their behalf. Default is
425    // false.
426    //
427    // For Container-Optimized Image cases, Batch will install the
428    // accelerator driver following milestones of
429    // https://cloud.google.com/container-optimized-os/docs/release-notes. For
430    // non Container-Optimized Image cases, following
431    // https://github.com/GoogleCloudPlatform/compute-gpu-installation/blob/main/linux/install_gpu_driver.py.
432    bool install_gpu_drivers = 3;
433  }
434
435  // A network interface.
436  message NetworkInterface {
437    // The URL of an existing network resource.
438    // You can specify the network as a full or partial URL.
439    //
440    // For example, the following are all valid URLs:
441    //
442    // * https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{network}
443    // * projects/{project}/global/networks/{network}
444    // * global/networks/{network}
445    string network = 1;
446
447    // The URL of an existing subnetwork resource in the network.
448    // You can specify the subnetwork as a full or partial URL.
449    //
450    // For example, the following are all valid URLs:
451    //
452    // * https://www.googleapis.com/compute/v1/projects/{project}/regions/{region}/subnetworks/{subnetwork}
453    // * projects/{project}/regions/{region}/subnetworks/{subnetwork}
454    // * regions/{region}/subnetworks/{subnetwork}
455    string subnetwork = 2;
456
457    // Default is false (with an external IP address). Required if
458    // no external public IP address is attached to the VM. If no external
459    // public IP address, additional configuration is required to allow the VM
460    // to access Google Services. See
461    // https://cloud.google.com/vpc/docs/configure-private-google-access and
462    // https://cloud.google.com/nat/docs/gce-example#create-nat for more
463    // information.
464    bool no_external_ip_address = 3;
465  }
466
467  // NetworkPolicy describes VM instance network configurations.
468  message NetworkPolicy {
469    // Network configurations.
470    repeated NetworkInterface network_interfaces = 1;
471  }
472
473  // PlacementPolicy describes a group placement policy for the VMs controlled
474  // by this AllocationPolicy.
475  message PlacementPolicy {
476    // UNSPECIFIED vs. COLLOCATED (default UNSPECIFIED). Use COLLOCATED when you
477    // want VMs to be located close to each other for low network latency
478    // between the VMs. No placement policy will be generated when collocation
479    // is UNSPECIFIED.
480    string collocation = 1;
481
482    // When specified, causes the job to fail if more than max_distance logical
483    // switches are required between VMs. Batch uses the most compact possible
484    // placement of VMs even when max_distance is not specified. An explicit
485    // max_distance makes that level of compactness a strict requirement.
486    // Not yet implemented
487    int64 max_distance = 2;
488  }
489
490  // Compute Engine VM instance provisioning model.
491  enum ProvisioningModel {
492    // Unspecified.
493    PROVISIONING_MODEL_UNSPECIFIED = 0;
494
495    // Standard VM.
496    STANDARD = 1;
497
498    // SPOT VM.
499    SPOT = 2;
500
501    // Preemptible VM (PVM).
502    //
503    // Above SPOT VM is the preferable model for preemptible VM instances: the
504    // old preemptible VM model (indicated by this field) is the older model,
505    // and has been migrated to use the SPOT model as the underlying technology.
506    // This old model will still be supported.
507    PREEMPTIBLE = 3;
508  }
509
510  // Location where compute resources should be allocated for the Job.
511  LocationPolicy location = 1;
512
513  // Describe instances that can be created by this AllocationPolicy.
514  // Only instances[0] is supported now.
515  repeated InstancePolicyOrTemplate instances = 8;
516
517  // Defines the service account for Batch-created VMs. If omitted, the [default
518  // Compute Engine service
519  // account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account)
520  // is used. Must match the service account specified in any used instance
521  // template configured in the Batch job.
522  //
523  // Includes the following fields:
524  //  * email: The service account's email address. If not set, the default
525  //  Compute Engine service account is used.
526  //  * scopes: Additional OAuth scopes to grant the service account, beyond the
527  //  default cloud-platform scope. (list of strings)
528  ServiceAccount service_account = 9;
529
530  // Labels applied to all VM instances and other resources
531  // created by AllocationPolicy.
532  // Labels could be user provided or system generated.
533  // You can assign up to 64 labels. [Google Compute Engine label
534  // restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
535  // apply.
536  // Label names that start with "goog-" or "google-" are reserved.
537  map<string, string> labels = 6;
538
539  // The network policy.
540  //
541  // If you define an instance template in the `InstancePolicyOrTemplate` field,
542  // Batch will use the network settings in the instance template instead of
543  // this field.
544  NetworkPolicy network = 7;
545
546  // The placement policy.
547  PlacementPolicy placement = 10;
548
549  // Optional. Tags applied to the VM instances.
550  //
551  // The tags identify valid sources or targets for network firewalls.
552  // Each tag must be 1-63 characters long, and comply with
553  // [RFC1035](https://www.ietf.org/rfc/rfc1035.txt).
554  repeated string tags = 11 [(google.api.field_behavior) = OPTIONAL];
555}
556
557// A TaskGroup defines one or more Tasks that all share the same TaskSpec.
558message TaskGroup {
559  option (google.api.resource) = {
560    type: "batch.googleapis.com/TaskGroup"
561    pattern: "projects/{project}/locations/{location}/jobs/{job}/taskGroups/{task_group}"
562  };
563
564  // How Tasks in the TaskGroup should be scheduled relative to each other.
565  enum SchedulingPolicy {
566    // Unspecified.
567    SCHEDULING_POLICY_UNSPECIFIED = 0;
568
569    // Run Tasks as soon as resources are available.
570    //
571    // Tasks might be executed in parallel depending on parallelism and
572    // task_count values.
573    AS_SOON_AS_POSSIBLE = 1;
574
575    // Run Tasks sequentially with increased task index.
576    IN_ORDER = 2;
577  }
578
579  // Output only. TaskGroup name.
580  // The system generates this field based on parent Job name.
581  // For example:
582  // "projects/123456/locations/us-west1/jobs/job01/taskGroups/group01".
583  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
584
585  // Required. Tasks in the group share the same task spec.
586  TaskSpec task_spec = 3 [(google.api.field_behavior) = REQUIRED];
587
588  // Number of Tasks in the TaskGroup.
589  // Default is 1.
590  int64 task_count = 4;
591
592  // Max number of tasks that can run in parallel.
593  // Default to min(task_count, parallel tasks per job limit).
594  // See: [Job Limits](https://cloud.google.com/batch/quotas#job_limits).
595  // Field parallelism must be 1 if the scheduling_policy is IN_ORDER.
596  int64 parallelism = 5;
597
598  // Scheduling policy for Tasks in the TaskGroup.
599  // The default value is AS_SOON_AS_POSSIBLE.
600  SchedulingPolicy scheduling_policy = 6;
601
602  // An array of environment variable mappings, which are passed to Tasks with
603  // matching indices. If task_environments is used then task_count should
604  // not be specified in the request (and will be ignored). Task count will be
605  // the length of task_environments.
606  //
607  // Tasks get a BATCH_TASK_INDEX and BATCH_TASK_COUNT environment variable, in
608  // addition to any environment variables set in task_environments, specifying
609  // the number of Tasks in the Task's parent TaskGroup, and the specific Task's
610  // index in the TaskGroup (0 through BATCH_TASK_COUNT - 1).
611  repeated Environment task_environments = 9;
612
613  // Max number of tasks that can be run on a VM at the same time.
614  // If not specified, the system will decide a value based on available
615  // compute resources on a VM and task requirements.
616  int64 task_count_per_node = 10;
617
618  // When true, Batch will populate a file with a list of all VMs assigned to
619  // the TaskGroup and set the BATCH_HOSTS_FILE environment variable to the path
620  // of that file. Defaults to false. The host file supports up to 1000 VMs.
621  bool require_hosts_file = 11;
622
623  // When true, Batch will configure SSH to allow passwordless login between
624  // VMs running the Batch tasks in the same TaskGroup.
625  bool permissive_ssh = 12;
626
627  // Optional. If not set or set to false, Batch uses the root user to execute
628  // runnables. If set to true, Batch runs the runnables using a non-root user.
629  // Currently, the non-root user Batch used is generated by OS Login. For more
630  // information, see [About OS
631  // Login](https://cloud.google.com/compute/docs/oslogin).
632  bool run_as_non_root = 14 [(google.api.field_behavior) = OPTIONAL];
633}
634
635// Carries information about a Google Cloud service account.
636message ServiceAccount {
637  // Email address of the service account.
638  string email = 1;
639
640  // List of scopes to be enabled for this service account.
641  repeated string scopes = 2;
642}
643