xref: /aosp_15_r20/external/googleapis/google/dataflow/v1beta3/streaming.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.dataflow.v1beta3;
18
19option csharp_namespace = "Google.Cloud.Dataflow.V1Beta3";
20option go_package = "cloud.google.com/go/dataflow/apiv1beta3/dataflowpb;dataflowpb";
21option java_multiple_files = true;
22option java_outer_classname = "StreamingProto";
23option java_package = "com.google.dataflow.v1beta3";
24option php_namespace = "Google\\Cloud\\Dataflow\\V1beta3";
25option ruby_package = "Google::Cloud::Dataflow::V1beta3";
26
27// Global topology of the streaming Dataflow job, including all
28// computations and their sharded locations.
29message TopologyConfig {
30  // The computations associated with a streaming Dataflow job.
31  repeated ComputationTopology computations = 1;
32
33  // The disks assigned to a streaming Dataflow job.
34  repeated DataDiskAssignment data_disk_assignments = 2;
35
36  // Maps user stage names to stable computation names.
37  map<string, string> user_stage_to_computation_name_map = 3;
38
39  // The size (in bits) of keys that will be assigned to source messages.
40  int32 forwarding_key_bits = 4;
41
42  // Version number for persistent state.
43  int32 persistent_state_version = 5;
44}
45
46// Identifies a pubsub location to use for transferring data into or
47// out of a streaming Dataflow job.
48message PubsubLocation {
49  // A pubsub topic, in the form of
50  // "pubsub.googleapis.com/topics/<project-id>/<topic-name>"
51  string topic = 1;
52
53  // A pubsub subscription, in the form of
54  // "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>"
55  string subscription = 2;
56
57  // If set, contains a pubsub label from which to extract record timestamps.
58  // If left empty, record timestamps will be generated upon arrival.
59  string timestamp_label = 3;
60
61  // If set, contains a pubsub label from which to extract record ids.
62  // If left empty, record deduplication will be strictly best effort.
63  string id_label = 4;
64
65  // Indicates whether the pipeline allows late-arriving data.
66  bool drop_late_data = 5;
67
68  // If set, specifies the pubsub subscription that will be used for tracking
69  // custom time timestamps for watermark estimation.
70  string tracking_subscription = 6;
71
72  // If true, then the client has requested to get pubsub attributes.
73  bool with_attributes = 7;
74}
75
76// Identifies the location of a streaming computation stage, for
77// stage-to-stage communication.
78message StreamingStageLocation {
79  // Identifies the particular stream within the streaming Dataflow
80  // job.
81  string stream_id = 1;
82}
83
84// Identifies the location of a streaming side input.
85message StreamingSideInputLocation {
86  // Identifies the particular side input within the streaming Dataflow job.
87  string tag = 1;
88
89  // Identifies the state family where this side input is stored.
90  string state_family = 2;
91}
92
93// Identifies the location of a custom souce.
94message CustomSourceLocation {
95  // Whether this source is stateful.
96  bool stateful = 1;
97}
98
99// Describes a stream of data, either as input to be processed or as
100// output of a streaming Dataflow job.
101message StreamLocation {
102  // A specification of a stream's location.
103  oneof location {
104    // The stream is part of another computation within the current
105    // streaming Dataflow job.
106    StreamingStageLocation streaming_stage_location = 1;
107
108    // The stream is a pubsub stream.
109    PubsubLocation pubsub_location = 2;
110
111    // The stream is a streaming side input.
112    StreamingSideInputLocation side_input_location = 3;
113
114    // The stream is a custom source.
115    CustomSourceLocation custom_source_location = 4;
116  }
117}
118
119// State family configuration.
120message StateFamilyConfig {
121  // The state family value.
122  string state_family = 1;
123
124  // If true, this family corresponds to a read operation.
125  bool is_read = 2;
126}
127
128// All configuration data for a particular Computation.
129message ComputationTopology {
130  // The system stage name.
131  string system_stage_name = 1;
132
133  // The ID of the computation.
134  string computation_id = 5;
135
136  // The key ranges processed by the computation.
137  repeated KeyRangeLocation key_ranges = 2;
138
139  // The inputs to the computation.
140  repeated StreamLocation inputs = 3;
141
142  // The outputs from the computation.
143  repeated StreamLocation outputs = 4;
144
145  // The state family values.
146  repeated StateFamilyConfig state_families = 7;
147}
148
149// Location information for a specific key-range of a sharded computation.
150// Currently we only support UTF-8 character splits to simplify encoding into
151// JSON.
152message KeyRangeLocation {
153  // The start (inclusive) of the key range.
154  string start = 1;
155
156  // The end (exclusive) of the key range.
157  string end = 2;
158
159  // The physical location of this range assignment to be used for
160  // streaming computation cross-worker message delivery.
161  string delivery_endpoint = 3;
162
163  // The name of the data disk where data for this range is stored.
164  // This name is local to the Google Cloud Platform project and uniquely
165  // identifies the disk within that project, for example
166  // "myproject-1014-104817-4c2-harness-0-disk-1".
167  string data_disk = 5;
168
169  // DEPRECATED. The location of the persistent state for this range, as a
170  // persistent directory in the worker local filesystem.
171  string deprecated_persistent_directory = 4 [deprecated = true];
172}
173
174// Describes mounted data disk.
175message MountedDataDisk {
176  // The name of the data disk.
177  // This name is local to the Google Cloud Platform project and uniquely
178  // identifies the disk within that project, for example
179  // "myproject-1014-104817-4c2-harness-0-disk-1".
180  string data_disk = 1;
181}
182
183// Data disk assignment for a given VM instance.
184message DataDiskAssignment {
185  // VM instance name the data disks mounted to, for example
186  // "myproject-1014-104817-4c2-harness-0".
187  string vm_instance = 1;
188
189  // Mounted data disks. The order is important a data disk's 0-based index in
190  // this list defines which persistent directory the disk is mounted to, for
191  // example the list of { "myproject-1014-104817-4c2-harness-0-disk-0" },
192  // { "myproject-1014-104817-4c2-harness-0-disk-1" }.
193  repeated string data_disks = 2;
194}
195
196// Data disk assignment information for a specific key-range of a sharded
197// computation.
198// Currently we only support UTF-8 character splits to simplify encoding into
199// JSON.
200message KeyRangeDataDiskAssignment {
201  // The start (inclusive) of the key range.
202  string start = 1;
203
204  // The end (exclusive) of the key range.
205  string end = 2;
206
207  // The name of the data disk where data for this range is stored.
208  // This name is local to the Google Cloud Platform project and uniquely
209  // identifies the disk within that project, for example
210  // "myproject-1014-104817-4c2-harness-0-disk-1".
211  string data_disk = 3;
212}
213
214// Describes full or partial data disk assignment information of the computation
215// ranges.
216message StreamingComputationRanges {
217  // The ID of the computation.
218  string computation_id = 1;
219
220  // Data disk assignments for ranges from this computation.
221  repeated KeyRangeDataDiskAssignment range_assignments = 2;
222}
223
224// Streaming appliance snapshot configuration.
225message StreamingApplianceSnapshotConfig {
226  // If set, indicates the snapshot id for the snapshot being performed.
227  string snapshot_id = 1;
228
229  // Indicates which endpoint is used to import appliance state.
230  string import_state_endpoint = 2;
231}
232