batch/v1alpha/task.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.batch.v1alpha;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/batch/v1alpha/volume.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.Batch.V1Alpha";
option go_package = "cloud.google.com/go/batch/apiv1alpha/batchpb;batchpb";
option java_multiple_files = true;
option java_outer_classname = "TaskProto";
option java_package = "com.google.cloud.batch.v1alpha";
option objc_class_prefix = "GCB";
option php_namespace = "Google\\Cloud\\Batch\\V1alpha";
option ruby_package = "Google::Cloud::Batch::V1alpha";

// Compute resource requirements.
//
// ComputeResource defines the amount of resources required for each task.
// Make sure your tasks have enough resources to successfully run.
// If you also define the types of resources for a job to use with the
// [InstancePolicyOrTemplate](https://cloud.google.com/batch/docs/reference/rest/v1/projects.locations.jobs#instancepolicyortemplate)
// field, make sure both fields are compatible with each other.
message ComputeResource {
  // The milliCPU count.
  //
  // `cpuMilli` defines the amount of CPU resources per task in milliCPU units.
  // For example, `1000` corresponds to 1 vCPU per task. If undefined, the
  // default value is `2000`.
  //
  // If you also define the VM's machine type using the `machineType` in
  // [InstancePolicy](https://cloud.google.com/batch/docs/reference/rest/v1/projects.locations.jobs#instancepolicy)
  // field or inside the `instanceTemplate` in the
  // [InstancePolicyOrTemplate](https://cloud.google.com/batch/docs/reference/rest/v1/projects.locations.jobs#instancepolicyortemplate)
  // field, make sure the CPU resources for both fields are compatible with each
  // other and with how many tasks you want to allow to run on the same VM at
  // the same time.
  //
  // For example, if you specify the `n2-standard-2` machine type, which has 2
  // vCPUs each, you are recommended to set `cpuMilli` no more than `2000`, or
  // you are recommended to run two tasks on the same VM if you set `cpuMilli`
  // to `1000` or less.
  int64 cpu_milli = 1;

  // Memory in MiB.
  //
  // `memoryMib` defines the amount of memory per task in MiB units.
  // If undefined, the default value is `2000`.
  // If you also define the VM's machine type using the `machineType` in
  // [InstancePolicy](https://cloud.google.com/batch/docs/reference/rest/v1/projects.locations.jobs#instancepolicy)
  // field or inside the `instanceTemplate` in the
  // [InstancePolicyOrTemplate](https://cloud.google.com/batch/docs/reference/rest/v1/projects.locations.jobs#instancepolicyortemplate)
  // field, make sure the memory resources for both fields are compatible with
  // each other and with how many tasks you want to allow to run on the same VM
  // at the same time.
  //
  // For example, if you specify the `n2-standard-2` machine type, which has 8
  // GiB each, you are recommended to set `memoryMib` to no more than `8192`,
  // or you are recommended to run two tasks on the same VM if you set
  // `memoryMib` to `4096` or less.
  int64 memory_mib = 2;

  // The GPU count.
  //
  // Not yet implemented.
  int64 gpu_count = 3;

  // Extra boot disk size in MiB for each task.
  int64 boot_disk_mib = 4;
}

// Status event
message StatusEvent {
  // Type of the event.
  string type = 3;

  // Description of the event.
  string description = 1;

  // The time this event occurred.
  google.protobuf.Timestamp event_time = 2;

  // Task Execution
  TaskExecution task_execution = 4;

  // Task State
  TaskStatus.State task_state = 5;
}

// This Task Execution field includes detail information for
// task execution procedures, based on StatusEvent types.
message TaskExecution {
  // When task is completed as the status of FAILED or SUCCEEDED,
  // exit code is for one task execution result, default is 0 as success.
  int32 exit_code = 1;

  // Optional. The tail end of any content written to standard error by the task
  // execution. This field will be populated only when the execution failed.
  string stderr_snippet = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Status of a task
message TaskStatus {
  // Task states.
  enum State {
    // Unknown state.
    STATE_UNSPECIFIED = 0;

    // The Task is created and waiting for resources.
    PENDING = 1;

    // The Task is assigned to at least one VM.
    ASSIGNED = 2;

    // The Task is running.
    RUNNING = 3;

    // The Task has failed.
    FAILED = 4;

    // The Task has succeeded.
    SUCCEEDED = 5;

    // The Task has not been executed when the Job finishes.
    UNEXECUTED = 6;
  }

  // Task state
  State state = 1;

  // Detailed info about why the state is reached.
  repeated StatusEvent status_events = 2;

  // The resource usage of the task.
  TaskResourceUsage resource_usage = 3;
}

// TaskResourceUsage describes the resource usage of the task.
message TaskResourceUsage {
  // The CPU core hours the task consumes based on task requirement and run
  // time.
  double core_hours = 1;
}

// Runnable describes instructions for executing a specific script or container
// as part of a Task.
message Runnable {
  // Container runnable.
  message Container {
    // The URI to pull the container image from.
    string image_uri = 1;

    // Overrides the `CMD` specified in the container. If there is an ENTRYPOINT
    // (either in the container image or with the entrypoint field below) then
    // commands are appended as arguments to the ENTRYPOINT.
    repeated string commands = 2;

    // Overrides the `ENTRYPOINT` specified in the container.
    string entrypoint = 3;

    // Volumes to mount (bind mount) from the host machine files or directories
    // into the container, formatted to match docker run's --volume option,
    // e.g. /foo:/bar, or /foo:/bar:ro
    //
    // If the `TaskSpec.Volumes` field is specified but this field is not, Batch
    // will mount each volume from the host machine to the container with the
    // same mount path by default. In this case, the default mount option for
    // containers will be read-only (ro) for existing persistent disks and
    // read-write (rw) for other volume types, regardless of the original mount
    // options specified in `TaskSpec.Volumes`. If you need different mount
    // settings, you can explicitly configure them in this field.
    repeated string volumes = 7;

    // Arbitrary additional options to include in the "docker run" command when
    // running this container, e.g. "--network host".
    string options = 8;

    // If set to true, external network access to and from container will be
    // blocked, containers that are with block_external_network as true can
    // still communicate with each other, network cannot be specified in the
    // `container.options` field.
    bool block_external_network = 9;

    // Required if the container image is from a private Docker registry. The
    // username to login to the Docker registry that contains the image.
    //
    // You can either specify the username directly by using plain text or
    // specify an encrypted username by using a Secret Manager secret:
    // `projects/*/secrets/*/versions/*`. However, using a secret is
    // recommended for enhanced security.
    //
    // Caution: If you specify the username using plain text, you risk the
    // username being exposed to any users who can view the job or its logs.
    // To avoid this risk, specify a secret that contains the username instead.
    //
    // Learn more about [Secret
    // Manager](https://cloud.google.com/secret-manager/docs/) and [using
    // Secret Manager with
    // Batch](https://cloud.google.com/batch/docs/create-run-job-secret-manager).
    string username = 10;

    // Required if the container image is from a private Docker registry. The
    // password to login to the Docker registry that contains the image.
    //
    // For security, it is strongly recommended to specify an
    // encrypted password by using a Secret Manager secret:
    // `projects/*/secrets/*/versions/*`.
    //
    // Warning: If you specify the password using plain text, you risk the
    // password being exposed to any users who can view the job or its logs.
    // To avoid this risk, specify a secret that contains the password instead.
    //
    // Learn more about [Secret
    // Manager](https://cloud.google.com/secret-manager/docs/) and [using
    // Secret Manager with
    // Batch](https://cloud.google.com/batch/docs/create-run-job-secret-manager).
    string password = 11;

    // Optional. If set to true, this container runnable uses Image streaming.
    //
    // Use Image streaming to allow the runnable to initialize without
    // waiting for the entire container image to download, which can
    // significantly reduce startup time for large container images.
    //
    // When `enableImageStreaming` is set to true, the container
    // runtime is [containerd](https://containerd.io/) instead of Docker.
    // Additionally, this container runnable only supports the following
    // `container` subfields: `imageUri`,
    // `commands[]`, `entrypoint`, and
    // `volumes[]`; any other `container` subfields are ignored.
    //
    // For more information about the requirements and limitations for using
    // Image streaming with Batch, see the [`image-streaming`
    // sample on
    // GitHub](https://github.com/GoogleCloudPlatform/batch-samples/tree/main/api-samples/image-streaming).
    bool enable_image_streaming = 12 [(google.api.field_behavior) = OPTIONAL];
  }

  // Script runnable.
  message Script {
    oneof command {
      // Script file path on the host VM.
      //
      // To specify an interpreter, please add a `#!<interpreter>`(also known as
      // [shebang line](https://en.wikipedia.org/wiki/Shebang_(Unix))) as the
      // first line of the file.(For example, to execute the script using bash,
      // `#!/bin/bash` should be the first line of the file. To execute the
      // script using`Python3`, `#!/usr/bin/env python3` should be the first
      // line of the file.) Otherwise, the file will by default be executed by
      // `/bin/sh`.
      string path = 1;

      // Shell script text.
      //
      // To specify an interpreter, please add a `#!<interpreter>\n` at the
      // beginning of the text.(For example, to execute the script using bash,
      // `#!/bin/bash\n` should be added. To execute the script using`Python3`,
      // `#!/usr/bin/env python3\n` should be added.) Otherwise, the script will
      // by default be executed by `/bin/sh`.
      string text = 2;
    }
  }

  // Barrier runnable blocks until all tasks in a taskgroup reach it.
  message Barrier {
    // Barriers are identified by their index in runnable list.
    // Names are not required, but if present should be an identifier.
    string name = 1;
  }

  // The script or container to run.
  oneof executable {
    // Container runnable.
    Container container = 1;

    // Script runnable.
    Script script = 2;

    // Barrier runnable.
    Barrier barrier = 6;
  }

  // Optional. DisplayName is an optional field that can be provided by the
  // caller. If provided, it will be used in logs and other outputs to identify
  // the script, making it easier for users to understand the logs. If not
  // provided the index of the runnable will be used for outputs.
  string display_name = 10 [(google.api.field_behavior) = OPTIONAL];

  // Normally, a non-zero exit status causes the Task to fail. This flag allows
  // execution of other Runnables to continue instead.
  bool ignore_exit_status = 3;

  // This flag allows a Runnable to continue running in the background while the
  // Task executes subsequent Runnables. This is useful to provide services to
  // other Runnables (or to provide debugging support tools like SSH servers).
  bool background = 4;

  // By default, after a Runnable fails, no further Runnable are executed. This
  // flag indicates that this Runnable must be run even if the Task has already
  // failed. This is useful for Runnables that copy output files off of the VM
  // or for debugging.
  //
  // The always_run flag does not override the Task's overall max_run_duration.
  // If the max_run_duration has expired then no further Runnables will execute,
  // not even always_run Runnables.
  bool always_run = 5;

  // Environment variables for this Runnable (overrides variables set for the
  // whole Task or TaskGroup).
  Environment environment = 7;

  // Timeout for this Runnable.
  google.protobuf.Duration timeout = 8;

  // Labels for this Runnable.
  map<string, string> labels = 9;
}

// Spec of a task
message TaskSpec {
  // The sequence of scripts or containers to run for this Task. Each Task using
  // this TaskSpec executes its list of runnables in order. The Task succeeds if
  // all of its runnables either exit with a zero status or any that exit with a
  // non-zero status have the ignore_exit_status flag.
  //
  // Background runnables are killed automatically (if they have not already
  // exited) a short time after all foreground runnables have completed. Even
  // though this is likely to result in a non-zero exit status for the
  // background runnable, these automatic kills are not treated as Task
  // failures.
  repeated Runnable runnables = 8;

  // ComputeResource requirements.
  ComputeResource compute_resource = 3;

  // Maximum duration the task should run.
  // The task will be killed and marked as FAILED if over this limit.
  // The valid value range for max_run_duration in seconds is [0,
  // 315576000000.999999999],
  google.protobuf.Duration max_run_duration = 4;

  // Maximum number of retries on failures.
  // The default, 0, which means never retry.
  // The valid value range is [0, 10].
  int32 max_retry_count = 5;

  // Lifecycle management schema when any task in a task group is failed.
  // Currently we only support one lifecycle policy.
  // When the lifecycle policy condition is met,
  // the action in the policy will execute.
  // If task execution result does not meet with the defined lifecycle
  // policy, we consider it as the default policy.
  // Default policy means if the exit code is 0, exit task.
  // If task ends with non-zero exit code, retry the task with max_retry_count.
  repeated LifecyclePolicy lifecycle_policies = 9;

  // Deprecated: please use environment(non-plural) instead.
  map<string, string> environments = 6 [deprecated = true];

  // Volumes to mount before running Tasks using this TaskSpec.
  repeated Volume volumes = 7;

  // Environment variables to set before running the Task.
  Environment environment = 10;
}

// LifecyclePolicy describes how to deal with task failures
// based on different conditions.
message LifecyclePolicy {
  // Conditions for actions to deal with task failures.
  message ActionCondition {
    // Exit codes of a task execution.
    // If there are more than 1 exit codes,
    // when task executes with any of the exit code in the list,
    // the condition is met and the action will be executed.
    repeated int32 exit_codes = 1;
  }

  // Action on task failures based on different conditions.
  enum Action {
    // Action unspecified.
    ACTION_UNSPECIFIED = 0;

    // Action that tasks in the group will be scheduled to re-execute.
    RETRY_TASK = 1;

    // Action that tasks in the group will be stopped immediately.
    FAIL_TASK = 2;
  }

  // Action to execute when ActionCondition is true.
  // When RETRY_TASK is specified, we will retry failed tasks
  // if we notice any exit code match and fail tasks if no match is found.
  // Likewise, when FAIL_TASK is specified, we will fail tasks
  // if we notice any exit code match and retry tasks if no match is found.
  Action action = 1;

  // Conditions that decide why a task failure is dealt with a specific action.
  ActionCondition action_condition = 2;
}

// A Cloud Batch task.
message Task {
  option (google.api.resource) = {
    type: "batch.googleapis.com/Task"
    pattern: "projects/{project}/locations/{location}/jobs/{job}/taskGroups/{task_group}/tasks/{task}"
  };

  // Task name.
  // The name is generated from the parent TaskGroup name and 'id' field.
  // For example:
  // "projects/123456/locations/us-west1/jobs/job01/taskGroups/group01/tasks/task01".
  string name = 1;

  // Task Status.
  TaskStatus status = 2;
}

// An Environment describes a collection of environment variables to set when
// executing Tasks.
message Environment {
  message KMSEnvMap {
    // The name of the KMS key that will be used to decrypt the cipher text.
    string key_name = 1;

    // The value of the cipherText response from the `encrypt` method.
    string cipher_text = 2;
  }

  // A map of environment variable names to values.
  map<string, string> variables = 1;

  // A map of environment variable names to Secret Manager secret names.
  // The VM will access the named secrets to set the value of each environment
  // variable.
  map<string, string> secret_variables = 2;

  // An encrypted JSON dictionary where the key/value pairs correspond to
  // environment variable names and their values.
  KMSEnvMap encrypted_variables = 3;
}