1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.dataflow.v1beta3; 18 19option csharp_namespace = "Google.Cloud.Dataflow.V1Beta3"; 20option go_package = "cloud.google.com/go/dataflow/apiv1beta3/dataflowpb;dataflowpb"; 21option java_multiple_files = true; 22option java_outer_classname = "StreamingProto"; 23option java_package = "com.google.dataflow.v1beta3"; 24option php_namespace = "Google\\Cloud\\Dataflow\\V1beta3"; 25option ruby_package = "Google::Cloud::Dataflow::V1beta3"; 26 27// Global topology of the streaming Dataflow job, including all 28// computations and their sharded locations. 29message TopologyConfig { 30 // The computations associated with a streaming Dataflow job. 31 repeated ComputationTopology computations = 1; 32 33 // The disks assigned to a streaming Dataflow job. 34 repeated DataDiskAssignment data_disk_assignments = 2; 35 36 // Maps user stage names to stable computation names. 37 map<string, string> user_stage_to_computation_name_map = 3; 38 39 // The size (in bits) of keys that will be assigned to source messages. 40 int32 forwarding_key_bits = 4; 41 42 // Version number for persistent state. 43 int32 persistent_state_version = 5; 44} 45 46// Identifies a pubsub location to use for transferring data into or 47// out of a streaming Dataflow job. 48message PubsubLocation { 49 // A pubsub topic, in the form of 50 // "pubsub.googleapis.com/topics/<project-id>/<topic-name>" 51 string topic = 1; 52 53 // A pubsub subscription, in the form of 54 // "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>" 55 string subscription = 2; 56 57 // If set, contains a pubsub label from which to extract record timestamps. 58 // If left empty, record timestamps will be generated upon arrival. 59 string timestamp_label = 3; 60 61 // If set, contains a pubsub label from which to extract record ids. 62 // If left empty, record deduplication will be strictly best effort. 63 string id_label = 4; 64 65 // Indicates whether the pipeline allows late-arriving data. 66 bool drop_late_data = 5; 67 68 // If set, specifies the pubsub subscription that will be used for tracking 69 // custom time timestamps for watermark estimation. 70 string tracking_subscription = 6; 71 72 // If true, then the client has requested to get pubsub attributes. 73 bool with_attributes = 7; 74} 75 76// Identifies the location of a streaming computation stage, for 77// stage-to-stage communication. 78message StreamingStageLocation { 79 // Identifies the particular stream within the streaming Dataflow 80 // job. 81 string stream_id = 1; 82} 83 84// Identifies the location of a streaming side input. 85message StreamingSideInputLocation { 86 // Identifies the particular side input within the streaming Dataflow job. 87 string tag = 1; 88 89 // Identifies the state family where this side input is stored. 90 string state_family = 2; 91} 92 93// Identifies the location of a custom souce. 94message CustomSourceLocation { 95 // Whether this source is stateful. 96 bool stateful = 1; 97} 98 99// Describes a stream of data, either as input to be processed or as 100// output of a streaming Dataflow job. 101message StreamLocation { 102 // A specification of a stream's location. 103 oneof location { 104 // The stream is part of another computation within the current 105 // streaming Dataflow job. 106 StreamingStageLocation streaming_stage_location = 1; 107 108 // The stream is a pubsub stream. 109 PubsubLocation pubsub_location = 2; 110 111 // The stream is a streaming side input. 112 StreamingSideInputLocation side_input_location = 3; 113 114 // The stream is a custom source. 115 CustomSourceLocation custom_source_location = 4; 116 } 117} 118 119// State family configuration. 120message StateFamilyConfig { 121 // The state family value. 122 string state_family = 1; 123 124 // If true, this family corresponds to a read operation. 125 bool is_read = 2; 126} 127 128// All configuration data for a particular Computation. 129message ComputationTopology { 130 // The system stage name. 131 string system_stage_name = 1; 132 133 // The ID of the computation. 134 string computation_id = 5; 135 136 // The key ranges processed by the computation. 137 repeated KeyRangeLocation key_ranges = 2; 138 139 // The inputs to the computation. 140 repeated StreamLocation inputs = 3; 141 142 // The outputs from the computation. 143 repeated StreamLocation outputs = 4; 144 145 // The state family values. 146 repeated StateFamilyConfig state_families = 7; 147} 148 149// Location information for a specific key-range of a sharded computation. 150// Currently we only support UTF-8 character splits to simplify encoding into 151// JSON. 152message KeyRangeLocation { 153 // The start (inclusive) of the key range. 154 string start = 1; 155 156 // The end (exclusive) of the key range. 157 string end = 2; 158 159 // The physical location of this range assignment to be used for 160 // streaming computation cross-worker message delivery. 161 string delivery_endpoint = 3; 162 163 // The name of the data disk where data for this range is stored. 164 // This name is local to the Google Cloud Platform project and uniquely 165 // identifies the disk within that project, for example 166 // "myproject-1014-104817-4c2-harness-0-disk-1". 167 string data_disk = 5; 168 169 // DEPRECATED. The location of the persistent state for this range, as a 170 // persistent directory in the worker local filesystem. 171 string deprecated_persistent_directory = 4 [deprecated = true]; 172} 173 174// Describes mounted data disk. 175message MountedDataDisk { 176 // The name of the data disk. 177 // This name is local to the Google Cloud Platform project and uniquely 178 // identifies the disk within that project, for example 179 // "myproject-1014-104817-4c2-harness-0-disk-1". 180 string data_disk = 1; 181} 182 183// Data disk assignment for a given VM instance. 184message DataDiskAssignment { 185 // VM instance name the data disks mounted to, for example 186 // "myproject-1014-104817-4c2-harness-0". 187 string vm_instance = 1; 188 189 // Mounted data disks. The order is important a data disk's 0-based index in 190 // this list defines which persistent directory the disk is mounted to, for 191 // example the list of { "myproject-1014-104817-4c2-harness-0-disk-0" }, 192 // { "myproject-1014-104817-4c2-harness-0-disk-1" }. 193 repeated string data_disks = 2; 194} 195 196// Data disk assignment information for a specific key-range of a sharded 197// computation. 198// Currently we only support UTF-8 character splits to simplify encoding into 199// JSON. 200message KeyRangeDataDiskAssignment { 201 // The start (inclusive) of the key range. 202 string start = 1; 203 204 // The end (exclusive) of the key range. 205 string end = 2; 206 207 // The name of the data disk where data for this range is stored. 208 // This name is local to the Google Cloud Platform project and uniquely 209 // identifies the disk within that project, for example 210 // "myproject-1014-104817-4c2-harness-0-disk-1". 211 string data_disk = 3; 212} 213 214// Describes full or partial data disk assignment information of the computation 215// ranges. 216message StreamingComputationRanges { 217 // The ID of the computation. 218 string computation_id = 1; 219 220 // Data disk assignments for ranges from this computation. 221 repeated KeyRangeDataDiskAssignment range_assignments = 2; 222} 223 224// Streaming appliance snapshot configuration. 225message StreamingApplianceSnapshotConfig { 226 // If set, indicates the snapshot id for the snapshot being performed. 227 string snapshot_id = 1; 228 229 // Indicates which endpoint is used to import appliance state. 230 string import_state_endpoint = 2; 231} 232