xref: /aosp_15_r20/external/googleapis/google/cloud/bigquery/storage/v1beta2/stream.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.bigquery.storage.v1beta2;
18
19import "google/api/field_behavior.proto";
20import "google/api/resource.proto";
21import "google/cloud/bigquery/storage/v1beta2/arrow.proto";
22import "google/cloud/bigquery/storage/v1beta2/avro.proto";
23import "google/cloud/bigquery/storage/v1beta2/table.proto";
24import "google/protobuf/timestamp.proto";
25
26option go_package = "cloud.google.com/go/bigquery/storage/apiv1beta2/storagepb;storagepb";
27option java_multiple_files = true;
28option java_outer_classname = "StreamProto";
29option java_package = "com.google.cloud.bigquery.storage.v1beta2";
30option (google.api.resource_definition) = {
31  type: "bigquery.googleapis.com/Table"
32  pattern: "projects/{project}/datasets/{dataset}/tables/{table}"
33};
34
35// Data format for input or output data.
36enum DataFormat {
37  DATA_FORMAT_UNSPECIFIED = 0;
38
39  // Avro is a standard open source row based file format.
40  // See https://avro.apache.org/ for more details.
41  AVRO = 1;
42
43  // Arrow is a standard open source column-based message format.
44  // See https://arrow.apache.org/ for more details.
45  ARROW = 2;
46}
47
48// Information about the ReadSession.
49message ReadSession {
50  option (google.api.resource) = {
51    type: "bigquerystorage.googleapis.com/ReadSession"
52    pattern: "projects/{project}/locations/{location}/sessions/{session}"
53  };
54
55  // Additional attributes when reading a table.
56  message TableModifiers {
57    // The snapshot time of the table. If not set, interpreted as now.
58    google.protobuf.Timestamp snapshot_time = 1;
59  }
60
61  // Options dictating how we read a table.
62  message TableReadOptions {
63    // Names of the fields in the table that should be read. If empty, all
64    // fields will be read. If the specified field is a nested field, all
65    // the sub-fields in the field will be selected. The output field order is
66    // unrelated to the order of fields in selected_fields.
67    repeated string selected_fields = 1;
68
69    // SQL text filtering statement, similar to a WHERE clause in a query.
70    // Aggregates are not supported.
71    //
72    // Examples: "int_field > 5"
73    //           "date_field = CAST('2014-9-27' as DATE)"
74    //           "nullable_field is not NULL"
75    //           "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))"
76    //           "numeric_field BETWEEN 1.0 AND 5.0"
77    //
78    // Restricted to a maximum length for 1 MB.
79    string row_restriction = 2;
80
81    // Optional. Options specific to the Apache Arrow output format.
82    ArrowSerializationOptions arrow_serialization_options = 3 [(google.api.field_behavior) = OPTIONAL];
83  }
84
85  // Output only. Unique identifier for the session, in the form
86  // `projects/{project_id}/locations/{location}/sessions/{session_id}`.
87  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
88
89  // Output only. Time at which the session becomes invalid. After this time, subsequent
90  // requests to read this Session will return errors. The expire_time is
91  // automatically assigned and currently cannot be specified or updated.
92  google.protobuf.Timestamp expire_time = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
93
94  // Immutable. Data format of the output data.
95  DataFormat data_format = 3 [(google.api.field_behavior) = IMMUTABLE];
96
97  // The schema for the read. If read_options.selected_fields is set, the
98  // schema may be different from the table schema as it will only contain
99  // the selected fields.
100  oneof schema {
101    // Output only. Avro schema.
102    AvroSchema avro_schema = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
103
104    // Output only. Arrow schema.
105    ArrowSchema arrow_schema = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
106  }
107
108  // Immutable. Table that this ReadSession is reading from, in the form
109  // `projects/{project_id}/datasets/{dataset_id}/tables/{table_id}
110  string table = 6 [
111    (google.api.field_behavior) = IMMUTABLE,
112    (google.api.resource_reference) = {
113      type: "bigquery.googleapis.com/Table"
114    }
115  ];
116
117  // Optional. Any modifiers which are applied when reading from the specified table.
118  TableModifiers table_modifiers = 7 [(google.api.field_behavior) = OPTIONAL];
119
120  // Optional. Read options for this session (e.g. column selection, filters).
121  TableReadOptions read_options = 8 [(google.api.field_behavior) = OPTIONAL];
122
123  // Output only. A list of streams created with the session.
124  //
125  // At least one stream is created with the session. In the future, larger
126  // request_stream_count values *may* result in this list being unpopulated,
127  // in that case, the user will need to use a List method to get the streams
128  // instead, which is not yet available.
129  repeated ReadStream streams = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
130}
131
132// Information about a single stream that gets data out of the storage system.
133// Most of the information about `ReadStream` instances is aggregated, making
134// `ReadStream` lightweight.
135message ReadStream {
136  option (google.api.resource) = {
137    type: "bigquerystorage.googleapis.com/ReadStream"
138    pattern: "projects/{project}/locations/{location}/sessions/{session}/streams/{stream}"
139  };
140
141  // Output only. Name of the stream, in the form
142  // `projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}`.
143  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
144}
145
146// Information about a single stream that gets data inside the storage system.
147message WriteStream {
148  option (google.api.resource) = {
149    type: "bigquerystorage.googleapis.com/WriteStream"
150    pattern: "projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}"
151  };
152
153  // Type enum of the stream.
154  enum Type {
155    // Unknown type.
156    TYPE_UNSPECIFIED = 0;
157
158    // Data will commit automatically and appear as soon as the write is
159    // acknowledged.
160    COMMITTED = 1;
161
162    // Data is invisible until the stream is committed.
163    PENDING = 2;
164
165    // Data is only visible up to the offset to which it was flushed.
166    BUFFERED = 3;
167  }
168
169  // Output only. Name of the stream, in the form
170  // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`.
171  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
172
173  // Immutable. Type of the stream.
174  Type type = 2 [(google.api.field_behavior) = IMMUTABLE];
175
176  // Output only. Create time of the stream. For the _default stream, this is the
177  // creation_time of the table.
178  google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
179
180  // Output only. Commit time of the stream.
181  // If a stream is of `COMMITTED` type, then it will have a commit_time same as
182  // `create_time`. If the stream is of `PENDING` type, commit_time being empty
183  // means it is not committed.
184  google.protobuf.Timestamp commit_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
185
186  // Output only. The schema of the destination table. It is only returned in
187  // `CreateWriteStream` response. Caller should generate data that's
188  // compatible with this schema to send in initial `AppendRowsRequest`.
189  // The table schema could go out of date during the life time of the stream.
190  TableSchema table_schema = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
191}
192