1// Copyright 2021 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.bigquery.storage.v1beta2; 18 19import "google/api/field_behavior.proto"; 20import "google/api/resource.proto"; 21import "google/cloud/bigquery/storage/v1beta2/arrow.proto"; 22import "google/cloud/bigquery/storage/v1beta2/avro.proto"; 23import "google/cloud/bigquery/storage/v1beta2/table.proto"; 24import "google/protobuf/timestamp.proto"; 25 26option go_package = "cloud.google.com/go/bigquery/storage/apiv1beta2/storagepb;storagepb"; 27option java_multiple_files = true; 28option java_outer_classname = "StreamProto"; 29option java_package = "com.google.cloud.bigquery.storage.v1beta2"; 30option (google.api.resource_definition) = { 31 type: "bigquery.googleapis.com/Table" 32 pattern: "projects/{project}/datasets/{dataset}/tables/{table}" 33}; 34 35// Data format for input or output data. 36enum DataFormat { 37 DATA_FORMAT_UNSPECIFIED = 0; 38 39 // Avro is a standard open source row based file format. 40 // See https://avro.apache.org/ for more details. 41 AVRO = 1; 42 43 // Arrow is a standard open source column-based message format. 44 // See https://arrow.apache.org/ for more details. 45 ARROW = 2; 46} 47 48// Information about the ReadSession. 49message ReadSession { 50 option (google.api.resource) = { 51 type: "bigquerystorage.googleapis.com/ReadSession" 52 pattern: "projects/{project}/locations/{location}/sessions/{session}" 53 }; 54 55 // Additional attributes when reading a table. 56 message TableModifiers { 57 // The snapshot time of the table. If not set, interpreted as now. 58 google.protobuf.Timestamp snapshot_time = 1; 59 } 60 61 // Options dictating how we read a table. 62 message TableReadOptions { 63 // Names of the fields in the table that should be read. If empty, all 64 // fields will be read. If the specified field is a nested field, all 65 // the sub-fields in the field will be selected. The output field order is 66 // unrelated to the order of fields in selected_fields. 67 repeated string selected_fields = 1; 68 69 // SQL text filtering statement, similar to a WHERE clause in a query. 70 // Aggregates are not supported. 71 // 72 // Examples: "int_field > 5" 73 // "date_field = CAST('2014-9-27' as DATE)" 74 // "nullable_field is not NULL" 75 // "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))" 76 // "numeric_field BETWEEN 1.0 AND 5.0" 77 // 78 // Restricted to a maximum length for 1 MB. 79 string row_restriction = 2; 80 81 // Optional. Options specific to the Apache Arrow output format. 82 ArrowSerializationOptions arrow_serialization_options = 3 [(google.api.field_behavior) = OPTIONAL]; 83 } 84 85 // Output only. Unique identifier for the session, in the form 86 // `projects/{project_id}/locations/{location}/sessions/{session_id}`. 87 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 88 89 // Output only. Time at which the session becomes invalid. After this time, subsequent 90 // requests to read this Session will return errors. The expire_time is 91 // automatically assigned and currently cannot be specified or updated. 92 google.protobuf.Timestamp expire_time = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 93 94 // Immutable. Data format of the output data. 95 DataFormat data_format = 3 [(google.api.field_behavior) = IMMUTABLE]; 96 97 // The schema for the read. If read_options.selected_fields is set, the 98 // schema may be different from the table schema as it will only contain 99 // the selected fields. 100 oneof schema { 101 // Output only. Avro schema. 102 AvroSchema avro_schema = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; 103 104 // Output only. Arrow schema. 105 ArrowSchema arrow_schema = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; 106 } 107 108 // Immutable. Table that this ReadSession is reading from, in the form 109 // `projects/{project_id}/datasets/{dataset_id}/tables/{table_id} 110 string table = 6 [ 111 (google.api.field_behavior) = IMMUTABLE, 112 (google.api.resource_reference) = { 113 type: "bigquery.googleapis.com/Table" 114 } 115 ]; 116 117 // Optional. Any modifiers which are applied when reading from the specified table. 118 TableModifiers table_modifiers = 7 [(google.api.field_behavior) = OPTIONAL]; 119 120 // Optional. Read options for this session (e.g. column selection, filters). 121 TableReadOptions read_options = 8 [(google.api.field_behavior) = OPTIONAL]; 122 123 // Output only. A list of streams created with the session. 124 // 125 // At least one stream is created with the session. In the future, larger 126 // request_stream_count values *may* result in this list being unpopulated, 127 // in that case, the user will need to use a List method to get the streams 128 // instead, which is not yet available. 129 repeated ReadStream streams = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; 130} 131 132// Information about a single stream that gets data out of the storage system. 133// Most of the information about `ReadStream` instances is aggregated, making 134// `ReadStream` lightweight. 135message ReadStream { 136 option (google.api.resource) = { 137 type: "bigquerystorage.googleapis.com/ReadStream" 138 pattern: "projects/{project}/locations/{location}/sessions/{session}/streams/{stream}" 139 }; 140 141 // Output only. Name of the stream, in the form 142 // `projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}`. 143 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 144} 145 146// Information about a single stream that gets data inside the storage system. 147message WriteStream { 148 option (google.api.resource) = { 149 type: "bigquerystorage.googleapis.com/WriteStream" 150 pattern: "projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}" 151 }; 152 153 // Type enum of the stream. 154 enum Type { 155 // Unknown type. 156 TYPE_UNSPECIFIED = 0; 157 158 // Data will commit automatically and appear as soon as the write is 159 // acknowledged. 160 COMMITTED = 1; 161 162 // Data is invisible until the stream is committed. 163 PENDING = 2; 164 165 // Data is only visible up to the offset to which it was flushed. 166 BUFFERED = 3; 167 } 168 169 // Output only. Name of the stream, in the form 170 // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`. 171 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 172 173 // Immutable. Type of the stream. 174 Type type = 2 [(google.api.field_behavior) = IMMUTABLE]; 175 176 // Output only. Create time of the stream. For the _default stream, this is the 177 // creation_time of the table. 178 google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; 179 180 // Output only. Commit time of the stream. 181 // If a stream is of `COMMITTED` type, then it will have a commit_time same as 182 // `create_time`. If the stream is of `PENDING` type, commit_time being empty 183 // means it is not committed. 184 google.protobuf.Timestamp commit_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; 185 186 // Output only. The schema of the destination table. It is only returned in 187 // `CreateWriteStream` response. Caller should generate data that's 188 // compatible with this schema to send in initial `AppendRowsRequest`. 189 // The table schema could go out of date during the life time of the stream. 190 TableSchema table_schema = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; 191} 192