xref: /aosp_15_r20/external/googleapis/google/cloud/documentai/v1beta2/document_understanding.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.documentai.v1beta2;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/cloud/documentai/v1beta2/document.proto";
23import "google/cloud/documentai/v1beta2/geometry.proto";
24import "google/longrunning/operations.proto";
25import "google/protobuf/timestamp.proto";
26
27option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta2";
28option go_package = "cloud.google.com/go/documentai/apiv1beta2/documentaipb;documentaipb";
29option java_multiple_files = true;
30option java_outer_classname = "DocumentAiProto";
31option java_package = "com.google.cloud.documentai.v1beta2";
32option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta2";
33option ruby_package = "Google::Cloud::DocumentAI::V1beta2";
34
35// Service to parse structured information from unstructured or semi-structured
36// documents using state-of-the-art Google AI such as natural language,
37// computer vision, and translation.
38service DocumentUnderstandingService {
39  option (google.api.default_host) = "documentai.googleapis.com";
40  option (google.api.oauth_scopes) =
41      "https://www.googleapis.com/auth/cloud-platform";
42
43  // LRO endpoint to batch process many documents. The output is written
44  // to Cloud Storage as JSON in the [Document] format.
45  rpc BatchProcessDocuments(BatchProcessDocumentsRequest)
46      returns (google.longrunning.Operation) {
47    option (google.api.http) = {
48      post: "/v1beta2/{parent=projects/*/locations/*}/documents:batchProcess"
49      body: "*"
50      additional_bindings {
51        post: "/v1beta2/{parent=projects/*}/documents:batchProcess"
52        body: "*"
53      }
54    };
55    option (google.api.method_signature) = "requests";
56    option (google.longrunning.operation_info) = {
57      response_type: "BatchProcessDocumentsResponse"
58      metadata_type: "OperationMetadata"
59    };
60  }
61
62  // Processes a single document.
63  rpc ProcessDocument(ProcessDocumentRequest) returns (Document) {
64    option (google.api.http) = {
65      post: "/v1beta2/{parent=projects/*/locations/*}/documents:process"
66      body: "*"
67      additional_bindings {
68        post: "/v1beta2/{parent=projects/*}/documents:process"
69        body: "*"
70      }
71    };
72  }
73}
74
75// Request to batch process documents as an asynchronous operation. The output
76// is written to Cloud Storage as JSON in the [Document] format.
77message BatchProcessDocumentsRequest {
78  // Required. Individual requests for each document.
79  repeated ProcessDocumentRequest requests = 1
80      [(google.api.field_behavior) = REQUIRED];
81
82  // Target project and location to make a call.
83  //
84  // Format: `projects/{project-id}/locations/{location-id}`.
85  //
86  // If no location is specified, a region will be chosen automatically.
87  string parent = 2;
88}
89
90// Request to process one document.
91message ProcessDocumentRequest {
92  // Target project and location to make a call.
93  //
94  // Format: `projects/{project-id}/locations/{location-id}`.
95  //
96  // If no location is specified, a region will be chosen automatically.
97  // This field is only populated when used in ProcessDocument method.
98  string parent = 9;
99
100  // Required. Information about the input file.
101  InputConfig input_config = 1 [(google.api.field_behavior) = REQUIRED];
102
103  // The desired output location. This field is only needed in
104  // BatchProcessDocumentsRequest.
105  OutputConfig output_config = 2;
106
107  // Specifies a known document type for deeper structure detection. Valid
108  // values are currently "general" and "invoice". If not provided, "general"\
109  // is used as default. If any other value is given, the request is rejected.
110  string document_type = 3;
111
112  // Controls table extraction behavior. If not specified, the system will
113  // decide reasonable defaults.
114  TableExtractionParams table_extraction_params = 4;
115
116  // Controls form extraction behavior. If not specified, the system will
117  // decide reasonable defaults.
118  FormExtractionParams form_extraction_params = 5;
119
120  // Controls entity extraction behavior. If not specified, the system will
121  // decide reasonable defaults.
122  EntityExtractionParams entity_extraction_params = 6;
123
124  // Controls OCR behavior. If not specified, the system will decide reasonable
125  // defaults.
126  OcrParams ocr_params = 7;
127
128  // Controls AutoML model prediction behavior. AutoMlParams cannot be used
129  // together with other Params.
130  AutoMlParams automl_params = 8;
131}
132
133// Response to an batch document processing request. This is returned in
134// the LRO Operation after the operation is complete.
135message BatchProcessDocumentsResponse {
136  // Responses for each individual document.
137  repeated ProcessDocumentResponse responses = 1;
138}
139
140// Response to a single document processing request.
141message ProcessDocumentResponse {
142  // Information about the input file. This is the same as the corresponding
143  // input config in the request.
144  InputConfig input_config = 1;
145
146  // The output location of the parsed responses. The responses are written to
147  // this location as JSON-serialized `Document` objects.
148  OutputConfig output_config = 2;
149}
150
151// Parameters to control Optical Character Recognition (OCR) behavior.
152message OcrParams {
153  // List of languages to use for OCR. In most cases, an empty value
154  // yields the best results since it enables automatic language detection. For
155  // languages based on the Latin alphabet, setting `language_hints` is not
156  // needed. In rare cases, when the language of the text in the image is known,
157  // setting a hint will help get better results (although it will be a
158  // significant hindrance if the hint is wrong). Document processing returns an
159  // error if one or more of the specified languages is not one of the
160  // supported languages.
161  repeated string language_hints = 1;
162}
163
164// Parameters to control table extraction behavior.
165message TableExtractionParams {
166  // Whether to enable table extraction.
167  bool enabled = 1;
168
169  // Optional. Table bounding box hints that can be provided to complex cases
170  // which our algorithm cannot locate the table(s) in.
171  repeated TableBoundHint table_bound_hints = 2
172      [(google.api.field_behavior) = OPTIONAL];
173
174  // Optional. Reserved for future use.
175  repeated string header_hints = 3 [(google.api.field_behavior) = OPTIONAL];
176
177  // Model version of the table extraction system. Default is "builtin/stable".
178  // Specify "builtin/latest" for the latest model.
179  string model_version = 4;
180}
181
182// A hint for a table bounding box on the page for table parsing.
183message TableBoundHint {
184  // Optional. Page number for multi-paged inputs this hint applies to. If not
185  // provided, this hint will apply to all pages by default. This value is
186  // 1-based.
187  int32 page_number = 1 [(google.api.field_behavior) = OPTIONAL];
188
189  // Bounding box hint for a table on this page. The coordinates must be
190  // normalized to [0,1] and the bounding box must be an axis-aligned rectangle.
191  BoundingPoly bounding_box = 2;
192}
193
194// Parameters to control form extraction behavior.
195message FormExtractionParams {
196  // Whether to enable form extraction.
197  bool enabled = 1;
198
199  // Reserved for future use.
200  repeated KeyValuePairHint key_value_pair_hints = 2;
201
202  // Model version of the form extraction system. Default is
203  // "builtin/stable". Specify "builtin/latest" for the latest model.
204  // For custom form models, specify: "custom/{model_name}". Model name
205  // format is "bucket_name/path/to/modeldir" corresponding to
206  // "gs://bucket_name/path/to/modeldir" where annotated examples are stored.
207  string model_version = 3;
208}
209
210// Reserved for future use.
211message KeyValuePairHint {
212  // The key text for the hint.
213  string key = 1;
214
215  // Type of the value. This is case-insensitive, and could be one of:
216  // ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER,
217  // ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this list will
218  // be ignored.
219  repeated string value_types = 2;
220}
221
222// Parameters to control entity extraction behavior.
223message EntityExtractionParams {
224  // Whether to enable entity extraction.
225  bool enabled = 1;
226
227  // Model version of the entity extraction. Default is
228  // "builtin/stable". Specify "builtin/latest" for the latest model.
229  string model_version = 2;
230}
231
232// Parameters to control AutoML model prediction behavior.
233message AutoMlParams {
234  // Resource name of the AutoML model.
235  //
236  // Format: `projects/{project-id}/locations/{location-id}/models/{model-id}`.
237  string model = 1;
238}
239
240// The desired input location and metadata.
241message InputConfig {
242  // Required.
243  oneof source {
244    // The Google Cloud Storage location to read the input from. This must be a
245    // single file.
246    GcsSource gcs_source = 1;
247
248    // Content in bytes, represented as a stream of bytes.
249    // Note: As with all `bytes` fields, proto buffer messages use a pure binary
250    // representation, whereas JSON representations use base64.
251    //
252    // This field only works for synchronous ProcessDocument method.
253    bytes contents = 3;
254  }
255
256  // Required. Mimetype of the input. Current supported mimetypes are
257  // application/pdf, image/tiff, and image/gif. In addition, application/json
258  // type is supported for requests with
259  // [ProcessDocumentRequest.automl_params][google.cloud.documentai.v1beta2.ProcessDocumentRequest.automl_params]
260  // field set. The JSON file needs to be in
261  // [Document][google.cloud.documentai.v1beta2.Document] format.
262  string mime_type = 2 [(google.api.field_behavior) = REQUIRED];
263}
264
265// The desired output location and metadata.
266message OutputConfig {
267  // Required.
268  oneof destination {
269    // The Google Cloud Storage location to write the output to.
270    GcsDestination gcs_destination = 1;
271  }
272
273  // The max number of pages to include into each output Document shard JSON on
274  // Google Cloud Storage.
275  //
276  // The valid range is [1, 100]. If not specified, the default value is 20.
277  //
278  // For example, for one pdf file with 100 pages, 100 parsed pages will be
279  // produced. If `pages_per_shard` = 20, then 5 Document shard JSON files each
280  // containing 20 parsed pages will be written under the prefix
281  // [OutputConfig.gcs_destination.uri][] and suffix pages-x-to-y.json where
282  // x and y are 1-indexed page numbers.
283  //
284  // Example GCS outputs with 157 pages and pages_per_shard = 50:
285  //
286  // <prefix>pages-001-to-050.json
287  // <prefix>pages-051-to-100.json
288  // <prefix>pages-101-to-150.json
289  // <prefix>pages-151-to-157.json
290  int32 pages_per_shard = 2;
291}
292
293// The Google Cloud Storage location where the input file will be read from.
294message GcsSource {
295  string uri = 1 [(google.api.field_behavior) = REQUIRED];
296}
297
298// The Google Cloud Storage location where the output file will be written to.
299message GcsDestination {
300  string uri = 1 [(google.api.field_behavior) = REQUIRED];
301}
302
303// Contains metadata for the BatchProcessDocuments operation.
304message OperationMetadata {
305  enum State {
306    // The default value. This value is used if the state is omitted.
307    STATE_UNSPECIFIED = 0;
308
309    // Request is received.
310    ACCEPTED = 1;
311
312    // Request operation is waiting for scheduling.
313    WAITING = 2;
314
315    // Request is being processed.
316    RUNNING = 3;
317
318    // The batch processing completed successfully.
319    SUCCEEDED = 4;
320
321    // The batch processing was cancelled.
322    CANCELLED = 5;
323
324    // The batch processing has failed.
325    FAILED = 6;
326  }
327
328  // The state of the current batch processing.
329  State state = 1;
330
331  // A message providing more details about the current state of processing.
332  string state_message = 2;
333
334  // The creation time of the operation.
335  google.protobuf.Timestamp create_time = 3;
336
337  // The last update time of the operation.
338  google.protobuf.Timestamp update_time = 4;
339}
340