1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.batch.v1; 18 19import "google/api/field_behavior.proto"; 20import "google/api/resource.proto"; 21import "google/cloud/batch/v1/task.proto"; 22import "google/protobuf/duration.proto"; 23import "google/protobuf/timestamp.proto"; 24 25option csharp_namespace = "Google.Cloud.Batch.V1"; 26option go_package = "cloud.google.com/go/batch/apiv1/batchpb;batchpb"; 27option java_multiple_files = true; 28option java_outer_classname = "JobProto"; 29option java_package = "com.google.cloud.batch.v1"; 30option objc_class_prefix = "GCB"; 31option php_namespace = "Google\\Cloud\\Batch\\V1"; 32option ruby_package = "Google::Cloud::Batch::V1"; 33 34// The Cloud Batch Job description. 35message Job { 36 option (google.api.resource) = { 37 type: "batch.googleapis.com/Job" 38 pattern: "projects/{project}/locations/{location}/jobs/{job}" 39 }; 40 41 // Output only. Job name. 42 // For example: "projects/123456/locations/us-central1/jobs/job01". 43 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 44 45 // Output only. A system generated unique ID for the Job. 46 string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 47 48 // Priority of the Job. 49 // The valid value range is [0, 100). Default value is 0. 50 // Higher value indicates higher priority. 51 // A job with higher priority value is more likely to run earlier if all other 52 // requirements are satisfied. 53 int64 priority = 3; 54 55 // Required. TaskGroups in the Job. Only one TaskGroup is supported now. 56 repeated TaskGroup task_groups = 4 [(google.api.field_behavior) = REQUIRED]; 57 58 // Compute resource allocation for all TaskGroups in the Job. 59 AllocationPolicy allocation_policy = 7; 60 61 // Labels for the Job. Labels could be user provided or system generated. 62 // For example, 63 // "labels": { 64 // "department": "finance", 65 // "environment": "test" 66 // } 67 // You can assign up to 64 labels. [Google Compute Engine label 68 // restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions) 69 // apply. 70 // Label names that start with "goog-" or "google-" are reserved. 71 map<string, string> labels = 8; 72 73 // Output only. Job status. It is read only for users. 74 JobStatus status = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; 75 76 // Output only. When the Job was created. 77 google.protobuf.Timestamp create_time = 11 78 [(google.api.field_behavior) = OUTPUT_ONLY]; 79 80 // Output only. The last time the Job was updated. 81 google.protobuf.Timestamp update_time = 12 82 [(google.api.field_behavior) = OUTPUT_ONLY]; 83 84 // Log preservation policy for the Job. 85 LogsPolicy logs_policy = 13; 86 87 // Notification configurations. 88 repeated JobNotification notifications = 14; 89} 90 91// LogsPolicy describes how outputs from a Job's Tasks (stdout/stderr) will be 92// preserved. 93message LogsPolicy { 94 // `CloudLoggingOption` contains additional settings for Cloud Logging logs 95 // generated by Batch job. 96 message CloudLoggingOption { 97 // Optional. Set this flag to true to change the [monitored resource 98 // type](https://cloud.google.com/monitoring/api/resources) for 99 // Cloud Logging logs generated by this Batch job from 100 // the 101 // [`batch.googleapis.com/Job`](https://cloud.google.com/monitoring/api/resources#tag_batch.googleapis.com/Job) 102 // type to the formerly used 103 // [`generic_task`](https://cloud.google.com/monitoring/api/resources#tag_generic_task) 104 // type. 105 bool use_generic_task_monitored_resource = 1 106 [(google.api.field_behavior) = OPTIONAL]; 107 } 108 109 // The destination (if any) for logs. 110 enum Destination { 111 // Logs are not preserved. 112 DESTINATION_UNSPECIFIED = 0; 113 114 // Logs are streamed to Cloud Logging. 115 CLOUD_LOGGING = 1; 116 117 // Logs are saved to a file path. 118 PATH = 2; 119 } 120 121 // Where logs should be saved. 122 Destination destination = 1; 123 124 // The path to which logs are saved when the destination = PATH. This can be a 125 // local file path on the VM, or under the mount point of a Persistent Disk or 126 // Filestore, or a Cloud Storage path. 127 string logs_path = 2; 128 129 // Optional. Additional settings for Cloud Logging. It will only take effect 130 // when the destination of `LogsPolicy` is set to `CLOUD_LOGGING`. 131 CloudLoggingOption cloud_logging_option = 3 132 [(google.api.field_behavior) = OPTIONAL]; 133} 134 135// Job status. 136message JobStatus { 137 // VM instance status. 138 message InstanceStatus { 139 // The Compute Engine machine type. 140 string machine_type = 1; 141 142 // The VM instance provisioning model. 143 AllocationPolicy.ProvisioningModel provisioning_model = 2; 144 145 // The max number of tasks can be assigned to this instance type. 146 int64 task_pack = 3; 147 148 // The VM boot disk. 149 AllocationPolicy.Disk boot_disk = 4; 150 } 151 152 // Aggregated task status for a TaskGroup. 153 message TaskGroupStatus { 154 // Count of task in each state in the TaskGroup. 155 // The map key is task state name. 156 map<string, int64> counts = 1; 157 158 // Status of instances allocated for the TaskGroup. 159 repeated InstanceStatus instances = 2; 160 } 161 162 // Valid Job states. 163 enum State { 164 // Job state unspecified. 165 STATE_UNSPECIFIED = 0; 166 167 // Job is admitted (validated and persisted) and waiting for resources. 168 QUEUED = 1; 169 170 // Job is scheduled to run as soon as resource allocation is ready. 171 // The resource allocation may happen at a later time but with a high 172 // chance to succeed. 173 SCHEDULED = 2; 174 175 // Resource allocation has been successful. At least one Task in the Job is 176 // RUNNING. 177 RUNNING = 3; 178 179 // All Tasks in the Job have finished successfully. 180 SUCCEEDED = 4; 181 182 // At least one Task in the Job has failed. 183 FAILED = 5; 184 185 // The Job will be deleted, but has not been deleted yet. Typically this is 186 // because resources used by the Job are still being cleaned up. 187 DELETION_IN_PROGRESS = 6; 188 } 189 190 // Job state 191 State state = 1; 192 193 // Job status events 194 repeated StatusEvent status_events = 2; 195 196 // Aggregated task status for each TaskGroup in the Job. 197 // The map key is TaskGroup ID. 198 map<string, TaskGroupStatus> task_groups = 4; 199 200 // The duration of time that the Job spent in status RUNNING. 201 google.protobuf.Duration run_duration = 5; 202} 203 204// Notification configurations. 205message JobNotification { 206 // Message details. 207 // Describe the conditions under which messages will be sent. 208 // If no attribute is defined, no message will be sent by default. 209 // One message should specify either the job or the task level attributes, 210 // but not both. For example, 211 // job level: JOB_STATE_CHANGED and/or a specified new_job_state; 212 // task level: TASK_STATE_CHANGED and/or a specified new_task_state. 213 message Message { 214 // The message type. 215 Type type = 1; 216 217 // The new job state. 218 JobStatus.State new_job_state = 2; 219 220 // The new task state. 221 TaskStatus.State new_task_state = 3; 222 } 223 224 // The message type. 225 enum Type { 226 // Unspecified. 227 TYPE_UNSPECIFIED = 0; 228 229 // Notify users that the job state has changed. 230 JOB_STATE_CHANGED = 1; 231 232 // Notify users that the task state has changed. 233 TASK_STATE_CHANGED = 2; 234 } 235 236 // The Pub/Sub topic where notifications like the job state changes 237 // will be published. The topic must exist in the same project as 238 // the job and billings will be charged to this project. 239 // If not specified, no Pub/Sub messages will be sent. 240 // Topic format: `projects/{project}/topics/{topic}`. 241 string pubsub_topic = 1; 242 243 // The attribute requirements of messages to be sent to this Pub/Sub topic. 244 // Without this field, no message will be sent. 245 Message message = 2; 246} 247 248// A Job's resource allocation policy describes when, where, and how compute 249// resources should be allocated for the Job. 250message AllocationPolicy { 251 message LocationPolicy { 252 // A list of allowed location names represented by internal URLs. 253 // 254 // Each location can be a region or a zone. 255 // Only one region or multiple zones in one region is supported now. 256 // For example, 257 // ["regions/us-central1"] allow VMs in any zones in region us-central1. 258 // ["zones/us-central1-a", "zones/us-central1-c"] only allow VMs 259 // in zones us-central1-a and us-central1-c. 260 // 261 // All locations end up in different regions would cause errors. 262 // For example, 263 // ["regions/us-central1", "zones/us-central1-a", "zones/us-central1-b", 264 // "zones/us-west1-a"] contains 2 regions "us-central1" and 265 // "us-west1". An error is expected in this case. 266 repeated string allowed_locations = 1; 267 } 268 269 // A new persistent disk or a local ssd. 270 // A VM can only have one local SSD setting but multiple local SSD partitions. 271 // See https://cloud.google.com/compute/docs/disks#pdspecs and 272 // https://cloud.google.com/compute/docs/disks#localssds. 273 message Disk { 274 // A data source from which a PD will be created. 275 oneof data_source { 276 // URL for a VM image to use as the data source for this disk. 277 // For example, the following are all valid URLs: 278 // 279 // * Specify the image by its family name: 280 // projects/{project}/global/images/family/{image_family} 281 // * Specify the image version: 282 // projects/{project}/global/images/{image_version} 283 // 284 // You can also use Batch customized image in short names. 285 // The following image values are supported for a boot disk: 286 // 287 // * `batch-debian`: use Batch Debian images. 288 // * `batch-centos`: use Batch CentOS images. 289 // * `batch-cos`: use Batch Container-Optimized images. 290 // * `batch-hpc-centos`: use Batch HPC CentOS images. 291 // * `batch-hpc-rocky`: use Batch HPC Rocky Linux images. 292 string image = 4; 293 294 // Name of a snapshot used as the data source. 295 // Snapshot is not supported as boot disk now. 296 string snapshot = 5; 297 } 298 299 // Disk type as shown in `gcloud compute disk-types list`. 300 // For example, local SSD uses type "local-ssd". 301 // Persistent disks and boot disks use "pd-balanced", "pd-extreme", "pd-ssd" 302 // or "pd-standard". 303 string type = 1; 304 305 // Disk size in GB. 306 // 307 // **Non-Boot Disk**: 308 // If the `type` specifies a persistent disk, this field 309 // is ignored if `data_source` is set as `image` or `snapshot`. 310 // If the `type` specifies a local SSD, this field should be a multiple of 311 // 375 GB, otherwise, the final size will be the next greater multiple of 312 // 375 GB. 313 // 314 // **Boot Disk**: 315 // Batch will calculate the boot disk size based on source 316 // image and task requirements if you do not speicify the size. 317 // If both this field and the `boot_disk_mib` field in task spec's 318 // `compute_resource` are defined, Batch will only honor this field. 319 // Also, this field should be no smaller than the source disk's 320 // size when the `data_source` is set as `snapshot` or `image`. 321 // For example, if you set an image as the `data_source` field and the 322 // image's default disk size 30 GB, you can only use this field to make the 323 // disk larger or equal to 30 GB. 324 int64 size_gb = 2; 325 326 // Local SSDs are available through both "SCSI" and "NVMe" interfaces. 327 // If not indicated, "NVMe" will be the default one for local ssds. 328 // This field is ignored for persistent disks as the interface is chosen 329 // automatically. See 330 // https://cloud.google.com/compute/docs/disks/persistent-disks#choose_an_interface. 331 string disk_interface = 6; 332 } 333 334 // A new or an existing persistent disk (PD) or a local ssd attached to a VM 335 // instance. 336 message AttachedDisk { 337 oneof attached { 338 Disk new_disk = 1; 339 340 // Name of an existing PD. 341 string existing_disk = 2; 342 } 343 344 // Device name that the guest operating system will see. 345 // It is used by Runnable.volumes field to mount disks. So please specify 346 // the device_name if you want Batch to help mount the disk, and it should 347 // match the device_name field in volumes. 348 string device_name = 3; 349 } 350 351 // Accelerator describes Compute Engine accelerators to be attached to the VM. 352 message Accelerator { 353 // The accelerator type. For example, "nvidia-tesla-t4". 354 // See `gcloud compute accelerator-types list`. 355 string type = 1; 356 357 // The number of accelerators of this type. 358 int64 count = 2; 359 360 // Deprecated: please use instances[0].install_gpu_drivers instead. 361 bool install_gpu_drivers = 3 [deprecated = true]; 362 363 // Optional. The NVIDIA GPU driver version that should be installed for this 364 // type. 365 // 366 // You can define the specific driver version such as "470.103.01", 367 // following the driver version requirements in 368 // https://cloud.google.com/compute/docs/gpus/install-drivers-gpu#minimum-driver. 369 // Batch will install the specific accelerator driver if qualified. 370 string driver_version = 4 [(google.api.field_behavior) = OPTIONAL]; 371 } 372 373 // InstancePolicy describes an instance type and resources attached to each VM 374 // created by this InstancePolicy. 375 message InstancePolicy { 376 // The Compute Engine machine type. 377 string machine_type = 2; 378 379 // The minimum CPU platform. 380 // See 381 // https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform. 382 string min_cpu_platform = 3; 383 384 // The provisioning model. 385 ProvisioningModel provisioning_model = 4; 386 387 // The accelerators attached to each VM instance. 388 repeated Accelerator accelerators = 5; 389 390 // Boot disk to be created and attached to each VM by this InstancePolicy. 391 // Boot disk will be deleted when the VM is deleted. 392 // Batch API now only supports booting from image. 393 Disk boot_disk = 8; 394 395 // Non-boot disks to be attached for each VM created by this InstancePolicy. 396 // New disks will be deleted when the VM is deleted. 397 // A non-boot disk is a disk that can be of a device with a 398 // file system or a raw storage drive that is not ready for data 399 // storage and accessing. 400 repeated AttachedDisk disks = 6; 401 402 // Optional. If specified, VMs will consume only the specified reservation. 403 // If not specified (default), VMs will consume any applicable reservation. 404 string reservation = 7 [(google.api.field_behavior) = OPTIONAL]; 405 } 406 407 // InstancePolicyOrTemplate lets you define the type of resources to use for 408 // this job either with an InstancePolicy or an instance template. 409 // If undefined, Batch picks the type of VM to use and doesn't include 410 // optional VM resources such as GPUs and extra disks. 411 message InstancePolicyOrTemplate { 412 oneof policy_template { 413 // InstancePolicy. 414 InstancePolicy policy = 1; 415 416 // Name of an instance template used to create VMs. 417 // Named the field as 'instance_template' instead of 'template' to avoid 418 // c++ keyword conflict. 419 string instance_template = 2; 420 } 421 422 // Set this field true if users want Batch to help fetch drivers from a 423 // third party location and install them for GPUs specified in 424 // policy.accelerators or instance_template on their behalf. Default is 425 // false. 426 // 427 // For Container-Optimized Image cases, Batch will install the 428 // accelerator driver following milestones of 429 // https://cloud.google.com/container-optimized-os/docs/release-notes. For 430 // non Container-Optimized Image cases, following 431 // https://github.com/GoogleCloudPlatform/compute-gpu-installation/blob/main/linux/install_gpu_driver.py. 432 bool install_gpu_drivers = 3; 433 } 434 435 // A network interface. 436 message NetworkInterface { 437 // The URL of an existing network resource. 438 // You can specify the network as a full or partial URL. 439 // 440 // For example, the following are all valid URLs: 441 // 442 // * https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{network} 443 // * projects/{project}/global/networks/{network} 444 // * global/networks/{network} 445 string network = 1; 446 447 // The URL of an existing subnetwork resource in the network. 448 // You can specify the subnetwork as a full or partial URL. 449 // 450 // For example, the following are all valid URLs: 451 // 452 // * https://www.googleapis.com/compute/v1/projects/{project}/regions/{region}/subnetworks/{subnetwork} 453 // * projects/{project}/regions/{region}/subnetworks/{subnetwork} 454 // * regions/{region}/subnetworks/{subnetwork} 455 string subnetwork = 2; 456 457 // Default is false (with an external IP address). Required if 458 // no external public IP address is attached to the VM. If no external 459 // public IP address, additional configuration is required to allow the VM 460 // to access Google Services. See 461 // https://cloud.google.com/vpc/docs/configure-private-google-access and 462 // https://cloud.google.com/nat/docs/gce-example#create-nat for more 463 // information. 464 bool no_external_ip_address = 3; 465 } 466 467 // NetworkPolicy describes VM instance network configurations. 468 message NetworkPolicy { 469 // Network configurations. 470 repeated NetworkInterface network_interfaces = 1; 471 } 472 473 // PlacementPolicy describes a group placement policy for the VMs controlled 474 // by this AllocationPolicy. 475 message PlacementPolicy { 476 // UNSPECIFIED vs. COLLOCATED (default UNSPECIFIED). Use COLLOCATED when you 477 // want VMs to be located close to each other for low network latency 478 // between the VMs. No placement policy will be generated when collocation 479 // is UNSPECIFIED. 480 string collocation = 1; 481 482 // When specified, causes the job to fail if more than max_distance logical 483 // switches are required between VMs. Batch uses the most compact possible 484 // placement of VMs even when max_distance is not specified. An explicit 485 // max_distance makes that level of compactness a strict requirement. 486 // Not yet implemented 487 int64 max_distance = 2; 488 } 489 490 // Compute Engine VM instance provisioning model. 491 enum ProvisioningModel { 492 // Unspecified. 493 PROVISIONING_MODEL_UNSPECIFIED = 0; 494 495 // Standard VM. 496 STANDARD = 1; 497 498 // SPOT VM. 499 SPOT = 2; 500 501 // Preemptible VM (PVM). 502 // 503 // Above SPOT VM is the preferable model for preemptible VM instances: the 504 // old preemptible VM model (indicated by this field) is the older model, 505 // and has been migrated to use the SPOT model as the underlying technology. 506 // This old model will still be supported. 507 PREEMPTIBLE = 3; 508 } 509 510 // Location where compute resources should be allocated for the Job. 511 LocationPolicy location = 1; 512 513 // Describe instances that can be created by this AllocationPolicy. 514 // Only instances[0] is supported now. 515 repeated InstancePolicyOrTemplate instances = 8; 516 517 // Defines the service account for Batch-created VMs. If omitted, the [default 518 // Compute Engine service 519 // account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) 520 // is used. Must match the service account specified in any used instance 521 // template configured in the Batch job. 522 // 523 // Includes the following fields: 524 // * email: The service account's email address. If not set, the default 525 // Compute Engine service account is used. 526 // * scopes: Additional OAuth scopes to grant the service account, beyond the 527 // default cloud-platform scope. (list of strings) 528 ServiceAccount service_account = 9; 529 530 // Labels applied to all VM instances and other resources 531 // created by AllocationPolicy. 532 // Labels could be user provided or system generated. 533 // You can assign up to 64 labels. [Google Compute Engine label 534 // restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions) 535 // apply. 536 // Label names that start with "goog-" or "google-" are reserved. 537 map<string, string> labels = 6; 538 539 // The network policy. 540 // 541 // If you define an instance template in the `InstancePolicyOrTemplate` field, 542 // Batch will use the network settings in the instance template instead of 543 // this field. 544 NetworkPolicy network = 7; 545 546 // The placement policy. 547 PlacementPolicy placement = 10; 548 549 // Optional. Tags applied to the VM instances. 550 // 551 // The tags identify valid sources or targets for network firewalls. 552 // Each tag must be 1-63 characters long, and comply with 553 // [RFC1035](https://www.ietf.org/rfc/rfc1035.txt). 554 repeated string tags = 11 [(google.api.field_behavior) = OPTIONAL]; 555} 556 557// A TaskGroup defines one or more Tasks that all share the same TaskSpec. 558message TaskGroup { 559 option (google.api.resource) = { 560 type: "batch.googleapis.com/TaskGroup" 561 pattern: "projects/{project}/locations/{location}/jobs/{job}/taskGroups/{task_group}" 562 }; 563 564 // How Tasks in the TaskGroup should be scheduled relative to each other. 565 enum SchedulingPolicy { 566 // Unspecified. 567 SCHEDULING_POLICY_UNSPECIFIED = 0; 568 569 // Run Tasks as soon as resources are available. 570 // 571 // Tasks might be executed in parallel depending on parallelism and 572 // task_count values. 573 AS_SOON_AS_POSSIBLE = 1; 574 575 // Run Tasks sequentially with increased task index. 576 IN_ORDER = 2; 577 } 578 579 // Output only. TaskGroup name. 580 // The system generates this field based on parent Job name. 581 // For example: 582 // "projects/123456/locations/us-west1/jobs/job01/taskGroups/group01". 583 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 584 585 // Required. Tasks in the group share the same task spec. 586 TaskSpec task_spec = 3 [(google.api.field_behavior) = REQUIRED]; 587 588 // Number of Tasks in the TaskGroup. 589 // Default is 1. 590 int64 task_count = 4; 591 592 // Max number of tasks that can run in parallel. 593 // Default to min(task_count, parallel tasks per job limit). 594 // See: [Job Limits](https://cloud.google.com/batch/quotas#job_limits). 595 // Field parallelism must be 1 if the scheduling_policy is IN_ORDER. 596 int64 parallelism = 5; 597 598 // Scheduling policy for Tasks in the TaskGroup. 599 // The default value is AS_SOON_AS_POSSIBLE. 600 SchedulingPolicy scheduling_policy = 6; 601 602 // An array of environment variable mappings, which are passed to Tasks with 603 // matching indices. If task_environments is used then task_count should 604 // not be specified in the request (and will be ignored). Task count will be 605 // the length of task_environments. 606 // 607 // Tasks get a BATCH_TASK_INDEX and BATCH_TASK_COUNT environment variable, in 608 // addition to any environment variables set in task_environments, specifying 609 // the number of Tasks in the Task's parent TaskGroup, and the specific Task's 610 // index in the TaskGroup (0 through BATCH_TASK_COUNT - 1). 611 repeated Environment task_environments = 9; 612 613 // Max number of tasks that can be run on a VM at the same time. 614 // If not specified, the system will decide a value based on available 615 // compute resources on a VM and task requirements. 616 int64 task_count_per_node = 10; 617 618 // When true, Batch will populate a file with a list of all VMs assigned to 619 // the TaskGroup and set the BATCH_HOSTS_FILE environment variable to the path 620 // of that file. Defaults to false. The host file supports up to 1000 VMs. 621 bool require_hosts_file = 11; 622 623 // When true, Batch will configure SSH to allow passwordless login between 624 // VMs running the Batch tasks in the same TaskGroup. 625 bool permissive_ssh = 12; 626 627 // Optional. If not set or set to false, Batch uses the root user to execute 628 // runnables. If set to true, Batch runs the runnables using a non-root user. 629 // Currently, the non-root user Batch used is generated by OS Login. For more 630 // information, see [About OS 631 // Login](https://cloud.google.com/compute/docs/oslogin). 632 bool run_as_non_root = 14 [(google.api.field_behavior) = OPTIONAL]; 633} 634 635// Carries information about a Google Cloud service account. 636message ServiceAccount { 637 // Email address of the service account. 638 string email = 1; 639 640 // List of scopes to be enabled for this service account. 641 repeated string scopes = 2; 642} 643