1// Copyright 2019 Google LLC. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15 16syntax = "proto3"; 17 18package google.cloud.documentai.v1beta1; 19 20import "google/api/annotations.proto"; 21import "google/api/client.proto"; 22import "google/api/field_behavior.proto"; 23import "google/cloud/documentai/v1beta1/geometry.proto"; 24import "google/longrunning/operations.proto"; 25import "google/protobuf/timestamp.proto"; 26 27option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta1"; 28option go_package = "cloud.google.com/go/documentai/apiv1beta1/documentaipb;documentaipb"; 29option java_multiple_files = true; 30option java_outer_classname = "DocumentAiProto"; 31option java_package = "com.google.cloud.documentai.v1beta1"; 32option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta1"; 33option ruby_package = "Google::Cloud::DocumentAI::V1beta1"; 34 35// Service to parse structured information from unstructured or semi-structured 36// documents using state-of-the-art Google AI such as natural language, 37// computer vision, and translation. 38service DocumentUnderstandingService { 39 option (google.api.default_host) = "documentai.googleapis.com"; 40 option (google.api.oauth_scopes) = 41 "https://www.googleapis.com/auth/cloud-platform"; 42 43 // LRO endpoint to batch process many documents. 44 rpc BatchProcessDocuments(BatchProcessDocumentsRequest) 45 returns (google.longrunning.Operation) { 46 option (google.api.http) = { 47 post: "/v1beta1/{parent=projects/*/locations/*}/documents:batchProcess" 48 body: "*" 49 additional_bindings { 50 post: "/v1beta1/{parent=projects/*}/documents:batchProcess" 51 body: "*" 52 } 53 }; 54 option (google.api.method_signature) = "requests"; 55 option (google.longrunning.operation_info) = { 56 response_type: "BatchProcessDocumentsResponse" 57 metadata_type: "OperationMetadata" 58 }; 59 } 60} 61 62// Request to batch process documents as an asynchronous operation. 63message BatchProcessDocumentsRequest { 64 // Required. Individual requests for each document. 65 repeated ProcessDocumentRequest requests = 1 66 [(google.api.field_behavior) = REQUIRED]; 67 68 // Target project and location to make a call. 69 // 70 // Format: `projects/{project-id}/locations/{location-id}`. 71 // 72 // If no location is specified, a region will be chosen automatically. 73 string parent = 2; 74} 75 76// Request to process one document. 77message ProcessDocumentRequest { 78 // Required. Information about the input file. 79 InputConfig input_config = 1 [(google.api.field_behavior) = REQUIRED]; 80 81 // Required. The desired output location. 82 OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED]; 83 84 // Specifies a known document type for deeper structure detection. Valid 85 // values are currently "general" and "invoice". If not provided, "general"\ 86 // is used as default. If any other value is given, the request is rejected. 87 string document_type = 3; 88 89 // Controls table extraction behavior. If not specified, the system will 90 // decide reasonable defaults. 91 TableExtractionParams table_extraction_params = 4; 92 93 // Controls form extraction behavior. If not specified, the system will 94 // decide reasonable defaults. 95 FormExtractionParams form_extraction_params = 5; 96 97 // Controls entity extraction behavior. If not specified, the system will 98 // decide reasonable defaults. 99 EntityExtractionParams entity_extraction_params = 6; 100 101 // Controls OCR behavior. If not specified, the system will decide reasonable 102 // defaults. 103 OcrParams ocr_params = 7; 104} 105 106// Response to an batch document processing request. This is returned in 107// the LRO Operation after the operation is complete. 108message BatchProcessDocumentsResponse { 109 // Responses for each individual document. 110 repeated ProcessDocumentResponse responses = 1; 111} 112 113// Response to a single document processing request. 114message ProcessDocumentResponse { 115 // Information about the input file. This is the same as the corresponding 116 // input config in the request. 117 InputConfig input_config = 1; 118 119 // The output location of the parsed responses. The responses are written to 120 // this location as JSON-serialized `Document` objects. 121 OutputConfig output_config = 2; 122} 123 124// Parameters to control Optical Character Recognition (OCR) behavior. 125message OcrParams { 126 // List of languages to use for OCR. In most cases, an empty value 127 // yields the best results since it enables automatic language detection. For 128 // languages based on the Latin alphabet, setting `language_hints` is not 129 // needed. In rare cases, when the language of the text in the image is known, 130 // setting a hint will help get better results (although it will be a 131 // significant hindrance if the hint is wrong). Document processing returns an 132 // error if one or more of the specified languages is not one of the 133 // supported languages. 134 repeated string language_hints = 1; 135} 136 137// Parameters to control table extraction behavior. 138message TableExtractionParams { 139 // Whether to enable table extraction. 140 bool enabled = 1; 141 142 // Optional. Table bounding box hints that can be provided to complex cases 143 // which our algorithm cannot locate the table(s) in. 144 repeated TableBoundHint table_bound_hints = 2 145 [(google.api.field_behavior) = OPTIONAL]; 146 147 // Optional. Table header hints. The extraction will bias towards producing 148 // these terms as table headers, which may improve accuracy. 149 repeated string header_hints = 3 [(google.api.field_behavior) = OPTIONAL]; 150 151 // Model version of the table extraction system. Default is "builtin/stable". 152 // Specify "builtin/latest" for the latest model. 153 string model_version = 4; 154} 155 156// A hint for a table bounding box on the page for table parsing. 157message TableBoundHint { 158 // Optional. Page number for multi-paged inputs this hint applies to. If not 159 // provided, this hint will apply to all pages by default. This value is 160 // 1-based. 161 int32 page_number = 1 [(google.api.field_behavior) = OPTIONAL]; 162 163 // Bounding box hint for a table on this page. The coordinates must be 164 // normalized to [0,1] and the bounding box must be an axis-aligned rectangle. 165 BoundingPoly bounding_box = 2; 166} 167 168// Parameters to control form extraction behavior. 169message FormExtractionParams { 170 // Whether to enable form extraction. 171 bool enabled = 1; 172 173 // User can provide pairs of (key text, value type) to improve the parsing 174 // result. 175 // 176 // For example, if a document has a field called "Date" that holds a date 177 // value and a field called "Amount" that may hold either a currency value 178 // (e.g., "$500.00") or a simple number value (e.g., "20"), you could use the 179 // following hints: [ {"key": "Date", value_types: [ "DATE"]}, {"key": 180 // "Amount", "value_types": [ "PRICE", "NUMBER" ]} ] 181 // 182 // If the value type is unknown, but you want to provide hints for the keys, 183 // you can leave the value_types field blank. e.g. {"key": "Date", 184 // "value_types": []} 185 repeated KeyValuePairHint key_value_pair_hints = 2; 186 187 // Model version of the form extraction system. Default is 188 // "builtin/stable". Specify "builtin/latest" for the latest model. 189 string model_version = 3; 190} 191 192// User-provided hint for key value pair. 193message KeyValuePairHint { 194 // The key text for the hint. 195 string key = 1; 196 197 // Type of the value. This is case-insensitive, and could be one of: 198 // ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER, 199 // ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this list will 200 // be ignored. 201 repeated string value_types = 2; 202} 203 204// Parameters to control entity extraction behavior. 205message EntityExtractionParams { 206 // Whether to enable entity extraction. 207 bool enabled = 1; 208 209 // Model version of the entity extraction. Default is 210 // "builtin/stable". Specify "builtin/latest" for the latest model. 211 string model_version = 2; 212} 213 214// The desired input location and metadata. 215message InputConfig { 216 // Required. 217 oneof source { 218 // The Google Cloud Storage location to read the input from. This must be a 219 // single file. 220 GcsSource gcs_source = 1; 221 } 222 223 // Required. Mimetype of the input. Current supported mimetypes are 224 // application/pdf, image/tiff, and image/gif. 225 string mime_type = 2 [(google.api.field_behavior) = REQUIRED]; 226} 227 228// The desired output location and metadata. 229message OutputConfig { 230 // Required. 231 oneof destination { 232 // The Google Cloud Storage location to write the output to. 233 GcsDestination gcs_destination = 1; 234 } 235 236 // The max number of pages to include into each output Document shard JSON on 237 // Google Cloud Storage. 238 // 239 // The valid range is [1, 100]. If not specified, the default value is 20. 240 // 241 // For example, for one pdf file with 100 pages, 100 parsed pages will be 242 // produced. If `pages_per_shard` = 20, then 5 Document shard JSON files each 243 // containing 20 parsed pages will be written under the prefix 244 // [OutputConfig.gcs_destination.uri][] and suffix pages-x-to-y.json where 245 // x and y are 1-indexed page numbers. 246 // 247 // Example GCS outputs with 157 pages and pages_per_shard = 50: 248 // 249 // <prefix>pages-001-to-050.json 250 // <prefix>pages-051-to-100.json 251 // <prefix>pages-101-to-150.json 252 // <prefix>pages-151-to-157.json 253 int32 pages_per_shard = 2; 254} 255 256// The Google Cloud Storage location where the input file will be read from. 257message GcsSource { 258 string uri = 1 [(google.api.field_behavior) = REQUIRED]; 259} 260 261// The Google Cloud Storage location where the output file will be written to. 262message GcsDestination { 263 string uri = 1 [(google.api.field_behavior) = REQUIRED]; 264} 265 266// Contains metadata for the BatchProcessDocuments operation. 267message OperationMetadata { 268 enum State { 269 // The default value. This value is used if the state is omitted. 270 STATE_UNSPECIFIED = 0; 271 272 // Request is received. 273 ACCEPTED = 1; 274 275 // Request operation is waiting for scheduling. 276 WAITING = 2; 277 278 // Request is being processed. 279 RUNNING = 3; 280 281 // The batch processing completed successfully. 282 SUCCEEDED = 4; 283 284 // The batch processing was cancelled. 285 CANCELLED = 5; 286 287 // The batch processing has failed. 288 FAILED = 6; 289 } 290 291 // The state of the current batch processing. 292 State state = 1; 293 294 // A message providing more details about the current state of processing. 295 string state_message = 2; 296 297 // The creation time of the operation. 298 google.protobuf.Timestamp create_time = 3; 299 300 // The last update time of the operation. 301 google.protobuf.Timestamp update_time = 4; 302} 303