xref: /aosp_15_r20/external/googleapis/google/cloud/documentai/v1beta1/document.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2019 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15
16syntax = "proto3";
17
18package google.cloud.documentai.v1beta1;
19
20import "google/api/field_behavior.proto";
21import "google/cloud/documentai/v1beta1/geometry.proto";
22import "google/rpc/status.proto";
23import "google/type/color.proto";
24
25option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta1";
26option go_package = "cloud.google.com/go/documentai/apiv1beta1/documentaipb;documentaipb";
27option java_multiple_files = true;
28option java_outer_classname = "DocumentProto";
29option java_package = "com.google.cloud.documentai.v1beta1";
30option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta1";
31option ruby_package = "Google::Cloud::DocumentAI::V1beta1";
32
33// Document represents the canonical document resource in Document Understanding
34// AI.
35// It is an interchange format that provides insights into documents and allows
36// for collaboration between users and Document Understanding AI to iterate and
37// optimize for quality.
38message Document {
39  // For a large document, sharding may be performed to produce several
40  // document shards. Each document shard contains this field to detail which
41  // shard it is.
42  message ShardInfo {
43    // The 0-based index of this shard.
44    int64 shard_index = 1;
45
46    // Total number of shards.
47    int64 shard_count = 2;
48
49    // The index of the first character in
50    // [Document.text][google.cloud.documentai.v1beta1.Document.text] in the
51    // overall document global text.
52    int64 text_offset = 3;
53  }
54
55  // Annotation for common text style attributes. This adheres to CSS
56  // conventions as much as possible.
57  message Style {
58    // Font size with unit.
59    message FontSize {
60      // Font size for the text.
61      float size = 1;
62
63      // Unit for the font size. Follows CSS naming (in, px, pt, etc.).
64      string unit = 2;
65    }
66
67    // Text anchor indexing into the
68    // [Document.text][google.cloud.documentai.v1beta1.Document.text].
69    TextAnchor text_anchor = 1;
70
71    // Text color.
72    google.type.Color color = 2;
73
74    // Text background color.
75    google.type.Color background_color = 3;
76
77    // Font weight. Possible values are normal, bold, bolder, and lighter.
78    // https://www.w3schools.com/cssref/pr_font_weight.asp
79    string font_weight = 4;
80
81    // Text style. Possible values are normal, italic, and oblique.
82    // https://www.w3schools.com/cssref/pr_font_font-style.asp
83    string text_style = 5;
84
85    // Text decoration. Follows CSS standard.
86    // <text-decoration-line> <text-decoration-color> <text-decoration-style>
87    // https://www.w3schools.com/cssref/pr_text_text-decoration.asp
88    string text_decoration = 6;
89
90    // Font size.
91    FontSize font_size = 7;
92  }
93
94  // A page in a [Document][google.cloud.documentai.v1beta1.Document].
95  message Page {
96    // Dimension for the page.
97    message Dimension {
98      // Page width.
99      float width = 1;
100
101      // Page height.
102      float height = 2;
103
104      // Dimension unit.
105      string unit = 3;
106    }
107
108    // Visual element describing a layout unit on a page.
109    message Layout {
110      // Detected human reading orientation.
111      enum Orientation {
112        // Unspecified orientation.
113        ORIENTATION_UNSPECIFIED = 0;
114
115        // Orientation is aligned with page up.
116        PAGE_UP = 1;
117
118        // Orientation is aligned with page right.
119        // Turn the head 90 degrees clockwise from upright to read.
120        PAGE_RIGHT = 2;
121
122        // Orientation is aligned with page down.
123        // Turn the head 180 degrees from upright to read.
124        PAGE_DOWN = 3;
125
126        // Orientation is aligned with page left.
127        // Turn the head 90 degrees counterclockwise from upright to read.
128        PAGE_LEFT = 4;
129      }
130
131      // Text anchor indexing into the
132      // [Document.text][google.cloud.documentai.v1beta1.Document.text].
133      TextAnchor text_anchor = 1;
134
135      // Confidence of the current
136      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] within
137      // context of the object this layout is for. e.g. confidence can be for a
138      // single token, a table, a visual element, etc. depending on context.
139      // Range [0, 1].
140      float confidence = 2;
141
142      // The bounding polygon for the
143      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout].
144      BoundingPoly bounding_poly = 3;
145
146      // Detected orientation for the
147      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout].
148      Orientation orientation = 4;
149    }
150
151    // A block has a set of lines (collected into paragraphs) that have a
152    // common line-spacing and orientation.
153    message Block {
154      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
155      // [Block][google.cloud.documentai.v1beta1.Document.Page.Block].
156      Layout layout = 1;
157
158      // A list of detected languages together with confidence.
159      repeated DetectedLanguage detected_languages = 2;
160    }
161
162    // A collection of lines that a human would perceive as a paragraph.
163    message Paragraph {
164      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
165      // [Paragraph][google.cloud.documentai.v1beta1.Document.Page.Paragraph].
166      Layout layout = 1;
167
168      // A list of detected languages together with confidence.
169      repeated DetectedLanguage detected_languages = 2;
170    }
171
172    // A collection of tokens that a human would perceive as a line.
173    // Does not cross column boundaries, can be horizontal, vertical, etc.
174    message Line {
175      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
176      // [Line][google.cloud.documentai.v1beta1.Document.Page.Line].
177      Layout layout = 1;
178
179      // A list of detected languages together with confidence.
180      repeated DetectedLanguage detected_languages = 2;
181    }
182
183    // A detected token.
184    message Token {
185      // Detected break at the end of a
186      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
187      message DetectedBreak {
188        // Enum to denote the type of break found.
189        enum Type {
190          // Unspecified break type.
191          TYPE_UNSPECIFIED = 0;
192
193          // A single whitespace.
194          SPACE = 1;
195
196          // A wider whitespace.
197          WIDE_SPACE = 2;
198
199          // A hyphen that indicates that a token has been split across lines.
200          HYPHEN = 3;
201        }
202
203        // Detected break type.
204        Type type = 1;
205      }
206
207      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
208      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
209      Layout layout = 1;
210
211      // Detected break at the end of a
212      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
213      DetectedBreak detected_break = 2;
214
215      // A list of detected languages together with confidence.
216      repeated DetectedLanguage detected_languages = 3;
217    }
218
219    // Detected non-text visual elements e.g. checkbox, signature etc. on the
220    // page.
221    message VisualElement {
222      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
223      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
224      Layout layout = 1;
225
226      // Type of the
227      // [VisualElement][google.cloud.documentai.v1beta1.Document.Page.VisualElement].
228      string type = 2;
229
230      // A list of detected languages together with confidence.
231      repeated DetectedLanguage detected_languages = 3;
232    }
233
234    // A table representation similar to HTML table structure.
235    message Table {
236      // A row of table cells.
237      message TableRow {
238        // Cells that make up this row.
239        repeated TableCell cells = 1;
240      }
241
242      // A cell representation inside the table.
243      message TableCell {
244        // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
245        // [TableCell][google.cloud.documentai.v1beta1.Document.Page.Table.TableCell].
246        Layout layout = 1;
247
248        // How many rows this cell spans.
249        int32 row_span = 2;
250
251        // How many columns this cell spans.
252        int32 col_span = 3;
253
254        // A list of detected languages together with confidence.
255        repeated DetectedLanguage detected_languages = 4;
256      }
257
258      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
259      // [Table][google.cloud.documentai.v1beta1.Document.Page.Table].
260      Layout layout = 1;
261
262      // Header rows of the table.
263      repeated TableRow header_rows = 2;
264
265      // Body rows of the table.
266      repeated TableRow body_rows = 3;
267
268      // A list of detected languages together with confidence.
269      repeated DetectedLanguage detected_languages = 4;
270    }
271
272    // A form field detected on the page.
273    message FormField {
274      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
275      // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField]
276      // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
277      Layout field_name = 1;
278
279      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
280      // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField]
281      // value.
282      Layout field_value = 2;
283
284      // A list of detected languages for name together with confidence.
285      repeated DetectedLanguage name_detected_languages = 3;
286
287      // A list of detected languages for value together with confidence.
288      repeated DetectedLanguage value_detected_languages = 4;
289    }
290
291    // Detected language for a structural component.
292    message DetectedLanguage {
293      // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
294      // information, see
295      // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
296      string language_code = 1;
297
298      // Confidence of detected language. Range [0, 1].
299      float confidence = 2;
300    }
301
302    // 1-based index for current
303    // [Page][google.cloud.documentai.v1beta1.Document.Page] in a parent
304    // [Document][google.cloud.documentai.v1beta1.Document]. Useful when a page
305    // is taken out of a [Document][google.cloud.documentai.v1beta1.Document]
306    // for individual processing.
307    int32 page_number = 1;
308
309    // Physical dimension of the page.
310    Dimension dimension = 2;
311
312    // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
313    // page.
314    Layout layout = 3;
315
316    // A list of detected languages together with confidence.
317    repeated DetectedLanguage detected_languages = 4;
318
319    // A list of visually detected text blocks on the page.
320    // A block has a set of lines (collected into paragraphs) that have a common
321    // line-spacing and orientation.
322    repeated Block blocks = 5;
323
324    // A list of visually detected text paragraphs on the page.
325    // A collection of lines that a human would perceive as a paragraph.
326    repeated Paragraph paragraphs = 6;
327
328    // A list of visually detected text lines on the page.
329    // A collection of tokens that a human would perceive as a line.
330    repeated Line lines = 7;
331
332    // A list of visually detected tokens on the page.
333    repeated Token tokens = 8;
334
335    // A list of detected non-text visual elements e.g. checkbox,
336    // signature etc. on the page.
337    repeated VisualElement visual_elements = 9;
338
339    // A list of visually detected tables on the page.
340    repeated Table tables = 10;
341
342    // A list of visually detected form fields on the page.
343    repeated FormField form_fields = 11;
344  }
345
346  // A phrase in the text that is a known entity type, such as a person, an
347  // organization, or location.
348  message Entity {
349    // Provenance of the entity.
350    // Text anchor indexing into the
351    // [Document.text][google.cloud.documentai.v1beta1.Document.text].
352    TextAnchor text_anchor = 1;
353
354    // Required. Entity type from a schema e.g. `Address`.
355    string type = 2 [(google.api.field_behavior) = REQUIRED];
356
357    // Text value in the document e.g. `1600 Amphitheatre Pkwy`.
358    string mention_text = 3;
359
360    // Canonical mention name. This will be a unique value in the entity list
361    // for this document.
362    string mention_id = 4;
363  }
364
365  // Relationship between
366  // [Entities][google.cloud.documentai.v1beta1.Document.Entity].
367  message EntityRelation {
368    // Subject entity mention_id.
369    string subject_id = 1;
370
371    // Object entity mention_id.
372    string object_id = 2;
373
374    // Relationship description.
375    string relation = 3;
376  }
377
378  // Text reference indexing into the
379  // [Document.text][google.cloud.documentai.v1beta1.Document.text].
380  message TextAnchor {
381    // A text segment in the
382    // [Document.text][google.cloud.documentai.v1beta1.Document.text]. The
383    // indices may be out of bounds which indicate that the text extends into
384    // another document shard for large sharded documents. See
385    // [ShardInfo.text_offset][google.cloud.documentai.v1beta1.Document.ShardInfo.text_offset]
386    message TextSegment {
387      // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment]
388      // start UTF-8 char index in the
389      // [Document.text][google.cloud.documentai.v1beta1.Document.text].
390      int64 start_index = 1;
391
392      // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment]
393      // half open end UTF-8 char index in the
394      // [Document.text][google.cloud.documentai.v1beta1.Document.text].
395      int64 end_index = 2;
396    }
397
398    // The text segments from the
399    // [Document.text][google.cloud.documentai.v1beta1.Document.text].
400    repeated TextSegment text_segments = 1;
401  }
402
403  // Original source document from the user.
404  oneof source {
405    // Currently supports Google Cloud Storage URI of the form
406    //    `gs://bucket_name/object_name`. Object versioning is not supported.
407    //    See [Google Cloud Storage Request
408    //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
409    //    info.
410    string uri = 1;
411
412    // Inline document content, represented as a stream of bytes.
413    // Note: As with all `bytes` fields, protobuffers use a pure binary
414    // representation, whereas JSON representations use base64.
415    bytes content = 2;
416  }
417
418  // An IANA published MIME type (also referred to as media type). For more
419  // information, see
420  // https://www.iana.org/assignments/media-types/media-types.xhtml.
421  string mime_type = 3;
422
423  // UTF-8 encoded text in reading order from the document.
424  string text = 4;
425
426  // Styles for the
427  // [Document.text][google.cloud.documentai.v1beta1.Document.text].
428  repeated Style text_styles = 5;
429
430  // Visual page layout for the
431  // [Document][google.cloud.documentai.v1beta1.Document].
432  repeated Page pages = 6;
433
434  // A list of entities detected on
435  // [Document.text][google.cloud.documentai.v1beta1.Document.text]. For
436  // document shards, entities in this list may cross shard boundaries.
437  repeated Entity entities = 7;
438
439  // Relationship among
440  // [Document.entities][google.cloud.documentai.v1beta1.Document.entities].
441  repeated EntityRelation entity_relations = 8;
442
443  // Information about the sharding if this document is sharded part of a larger
444  // document. If the document is not sharded, this message is not specified.
445  ShardInfo shard_info = 9;
446
447  // Any error that occurred while processing this document.
448  google.rpc.Status error = 10;
449}
450