1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.documentai.v1beta2; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/cloud/documentai/v1beta2/document.proto"; 23import "google/cloud/documentai/v1beta2/geometry.proto"; 24import "google/longrunning/operations.proto"; 25import "google/protobuf/timestamp.proto"; 26 27option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta2"; 28option go_package = "cloud.google.com/go/documentai/apiv1beta2/documentaipb;documentaipb"; 29option java_multiple_files = true; 30option java_outer_classname = "DocumentAiProto"; 31option java_package = "com.google.cloud.documentai.v1beta2"; 32option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta2"; 33option ruby_package = "Google::Cloud::DocumentAI::V1beta2"; 34 35// Service to parse structured information from unstructured or semi-structured 36// documents using state-of-the-art Google AI such as natural language, 37// computer vision, and translation. 38service DocumentUnderstandingService { 39 option (google.api.default_host) = "documentai.googleapis.com"; 40 option (google.api.oauth_scopes) = 41 "https://www.googleapis.com/auth/cloud-platform"; 42 43 // LRO endpoint to batch process many documents. The output is written 44 // to Cloud Storage as JSON in the [Document] format. 45 rpc BatchProcessDocuments(BatchProcessDocumentsRequest) 46 returns (google.longrunning.Operation) { 47 option (google.api.http) = { 48 post: "/v1beta2/{parent=projects/*/locations/*}/documents:batchProcess" 49 body: "*" 50 additional_bindings { 51 post: "/v1beta2/{parent=projects/*}/documents:batchProcess" 52 body: "*" 53 } 54 }; 55 option (google.api.method_signature) = "requests"; 56 option (google.longrunning.operation_info) = { 57 response_type: "BatchProcessDocumentsResponse" 58 metadata_type: "OperationMetadata" 59 }; 60 } 61 62 // Processes a single document. 63 rpc ProcessDocument(ProcessDocumentRequest) returns (Document) { 64 option (google.api.http) = { 65 post: "/v1beta2/{parent=projects/*/locations/*}/documents:process" 66 body: "*" 67 additional_bindings { 68 post: "/v1beta2/{parent=projects/*}/documents:process" 69 body: "*" 70 } 71 }; 72 } 73} 74 75// Request to batch process documents as an asynchronous operation. The output 76// is written to Cloud Storage as JSON in the [Document] format. 77message BatchProcessDocumentsRequest { 78 // Required. Individual requests for each document. 79 repeated ProcessDocumentRequest requests = 1 80 [(google.api.field_behavior) = REQUIRED]; 81 82 // Target project and location to make a call. 83 // 84 // Format: `projects/{project-id}/locations/{location-id}`. 85 // 86 // If no location is specified, a region will be chosen automatically. 87 string parent = 2; 88} 89 90// Request to process one document. 91message ProcessDocumentRequest { 92 // Target project and location to make a call. 93 // 94 // Format: `projects/{project-id}/locations/{location-id}`. 95 // 96 // If no location is specified, a region will be chosen automatically. 97 // This field is only populated when used in ProcessDocument method. 98 string parent = 9; 99 100 // Required. Information about the input file. 101 InputConfig input_config = 1 [(google.api.field_behavior) = REQUIRED]; 102 103 // The desired output location. This field is only needed in 104 // BatchProcessDocumentsRequest. 105 OutputConfig output_config = 2; 106 107 // Specifies a known document type for deeper structure detection. Valid 108 // values are currently "general" and "invoice". If not provided, "general"\ 109 // is used as default. If any other value is given, the request is rejected. 110 string document_type = 3; 111 112 // Controls table extraction behavior. If not specified, the system will 113 // decide reasonable defaults. 114 TableExtractionParams table_extraction_params = 4; 115 116 // Controls form extraction behavior. If not specified, the system will 117 // decide reasonable defaults. 118 FormExtractionParams form_extraction_params = 5; 119 120 // Controls entity extraction behavior. If not specified, the system will 121 // decide reasonable defaults. 122 EntityExtractionParams entity_extraction_params = 6; 123 124 // Controls OCR behavior. If not specified, the system will decide reasonable 125 // defaults. 126 OcrParams ocr_params = 7; 127 128 // Controls AutoML model prediction behavior. AutoMlParams cannot be used 129 // together with other Params. 130 AutoMlParams automl_params = 8; 131} 132 133// Response to an batch document processing request. This is returned in 134// the LRO Operation after the operation is complete. 135message BatchProcessDocumentsResponse { 136 // Responses for each individual document. 137 repeated ProcessDocumentResponse responses = 1; 138} 139 140// Response to a single document processing request. 141message ProcessDocumentResponse { 142 // Information about the input file. This is the same as the corresponding 143 // input config in the request. 144 InputConfig input_config = 1; 145 146 // The output location of the parsed responses. The responses are written to 147 // this location as JSON-serialized `Document` objects. 148 OutputConfig output_config = 2; 149} 150 151// Parameters to control Optical Character Recognition (OCR) behavior. 152message OcrParams { 153 // List of languages to use for OCR. In most cases, an empty value 154 // yields the best results since it enables automatic language detection. For 155 // languages based on the Latin alphabet, setting `language_hints` is not 156 // needed. In rare cases, when the language of the text in the image is known, 157 // setting a hint will help get better results (although it will be a 158 // significant hindrance if the hint is wrong). Document processing returns an 159 // error if one or more of the specified languages is not one of the 160 // supported languages. 161 repeated string language_hints = 1; 162} 163 164// Parameters to control table extraction behavior. 165message TableExtractionParams { 166 // Whether to enable table extraction. 167 bool enabled = 1; 168 169 // Optional. Table bounding box hints that can be provided to complex cases 170 // which our algorithm cannot locate the table(s) in. 171 repeated TableBoundHint table_bound_hints = 2 172 [(google.api.field_behavior) = OPTIONAL]; 173 174 // Optional. Reserved for future use. 175 repeated string header_hints = 3 [(google.api.field_behavior) = OPTIONAL]; 176 177 // Model version of the table extraction system. Default is "builtin/stable". 178 // Specify "builtin/latest" for the latest model. 179 string model_version = 4; 180} 181 182// A hint for a table bounding box on the page for table parsing. 183message TableBoundHint { 184 // Optional. Page number for multi-paged inputs this hint applies to. If not 185 // provided, this hint will apply to all pages by default. This value is 186 // 1-based. 187 int32 page_number = 1 [(google.api.field_behavior) = OPTIONAL]; 188 189 // Bounding box hint for a table on this page. The coordinates must be 190 // normalized to [0,1] and the bounding box must be an axis-aligned rectangle. 191 BoundingPoly bounding_box = 2; 192} 193 194// Parameters to control form extraction behavior. 195message FormExtractionParams { 196 // Whether to enable form extraction. 197 bool enabled = 1; 198 199 // Reserved for future use. 200 repeated KeyValuePairHint key_value_pair_hints = 2; 201 202 // Model version of the form extraction system. Default is 203 // "builtin/stable". Specify "builtin/latest" for the latest model. 204 // For custom form models, specify: "custom/{model_name}". Model name 205 // format is "bucket_name/path/to/modeldir" corresponding to 206 // "gs://bucket_name/path/to/modeldir" where annotated examples are stored. 207 string model_version = 3; 208} 209 210// Reserved for future use. 211message KeyValuePairHint { 212 // The key text for the hint. 213 string key = 1; 214 215 // Type of the value. This is case-insensitive, and could be one of: 216 // ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER, 217 // ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this list will 218 // be ignored. 219 repeated string value_types = 2; 220} 221 222// Parameters to control entity extraction behavior. 223message EntityExtractionParams { 224 // Whether to enable entity extraction. 225 bool enabled = 1; 226 227 // Model version of the entity extraction. Default is 228 // "builtin/stable". Specify "builtin/latest" for the latest model. 229 string model_version = 2; 230} 231 232// Parameters to control AutoML model prediction behavior. 233message AutoMlParams { 234 // Resource name of the AutoML model. 235 // 236 // Format: `projects/{project-id}/locations/{location-id}/models/{model-id}`. 237 string model = 1; 238} 239 240// The desired input location and metadata. 241message InputConfig { 242 // Required. 243 oneof source { 244 // The Google Cloud Storage location to read the input from. This must be a 245 // single file. 246 GcsSource gcs_source = 1; 247 248 // Content in bytes, represented as a stream of bytes. 249 // Note: As with all `bytes` fields, proto buffer messages use a pure binary 250 // representation, whereas JSON representations use base64. 251 // 252 // This field only works for synchronous ProcessDocument method. 253 bytes contents = 3; 254 } 255 256 // Required. Mimetype of the input. Current supported mimetypes are 257 // application/pdf, image/tiff, and image/gif. In addition, application/json 258 // type is supported for requests with 259 // [ProcessDocumentRequest.automl_params][google.cloud.documentai.v1beta2.ProcessDocumentRequest.automl_params] 260 // field set. The JSON file needs to be in 261 // [Document][google.cloud.documentai.v1beta2.Document] format. 262 string mime_type = 2 [(google.api.field_behavior) = REQUIRED]; 263} 264 265// The desired output location and metadata. 266message OutputConfig { 267 // Required. 268 oneof destination { 269 // The Google Cloud Storage location to write the output to. 270 GcsDestination gcs_destination = 1; 271 } 272 273 // The max number of pages to include into each output Document shard JSON on 274 // Google Cloud Storage. 275 // 276 // The valid range is [1, 100]. If not specified, the default value is 20. 277 // 278 // For example, for one pdf file with 100 pages, 100 parsed pages will be 279 // produced. If `pages_per_shard` = 20, then 5 Document shard JSON files each 280 // containing 20 parsed pages will be written under the prefix 281 // [OutputConfig.gcs_destination.uri][] and suffix pages-x-to-y.json where 282 // x and y are 1-indexed page numbers. 283 // 284 // Example GCS outputs with 157 pages and pages_per_shard = 50: 285 // 286 // <prefix>pages-001-to-050.json 287 // <prefix>pages-051-to-100.json 288 // <prefix>pages-101-to-150.json 289 // <prefix>pages-151-to-157.json 290 int32 pages_per_shard = 2; 291} 292 293// The Google Cloud Storage location where the input file will be read from. 294message GcsSource { 295 string uri = 1 [(google.api.field_behavior) = REQUIRED]; 296} 297 298// The Google Cloud Storage location where the output file will be written to. 299message GcsDestination { 300 string uri = 1 [(google.api.field_behavior) = REQUIRED]; 301} 302 303// Contains metadata for the BatchProcessDocuments operation. 304message OperationMetadata { 305 enum State { 306 // The default value. This value is used if the state is omitted. 307 STATE_UNSPECIFIED = 0; 308 309 // Request is received. 310 ACCEPTED = 1; 311 312 // Request operation is waiting for scheduling. 313 WAITING = 2; 314 315 // Request is being processed. 316 RUNNING = 3; 317 318 // The batch processing completed successfully. 319 SUCCEEDED = 4; 320 321 // The batch processing was cancelled. 322 CANCELLED = 5; 323 324 // The batch processing has failed. 325 FAILED = 6; 326 } 327 328 // The state of the current batch processing. 329 State state = 1; 330 331 // A message providing more details about the current state of processing. 332 string state_message = 2; 333 334 // The creation time of the operation. 335 google.protobuf.Timestamp create_time = 3; 336 337 // The last update time of the operation. 338 google.protobuf.Timestamp update_time = 4; 339} 340