xref: /aosp_15_r20/external/googleapis/google/cloud/documentai/v1beta3/document.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.documentai.v1beta3;
18
19import "google/api/field_behavior.proto";
20import "google/cloud/documentai/v1beta3/barcode.proto";
21import "google/cloud/documentai/v1beta3/geometry.proto";
22import "google/protobuf/timestamp.proto";
23import "google/rpc/status.proto";
24import "google/type/color.proto";
25import "google/type/date.proto";
26import "google/type/datetime.proto";
27import "google/type/money.proto";
28import "google/type/postal_address.proto";
29
30option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
31option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb";
32option java_multiple_files = true;
33option java_outer_classname = "DocumentProto";
34option java_package = "com.google.cloud.documentai.v1beta3";
35option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
36option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
37
38// Document represents the canonical document resource in Document AI. It is an
39// interchange format that provides insights into documents and allows for
40// collaboration between users and Document AI to iterate and optimize for
41// quality.
42message Document {
43  // For a large document, sharding may be performed to produce several
44  // document shards. Each document shard contains this field to detail which
45  // shard it is.
46  message ShardInfo {
47    // The 0-based index of this shard.
48    int64 shard_index = 1;
49
50    // Total number of shards.
51    int64 shard_count = 2;
52
53    // The index of the first character in
54    // [Document.text][google.cloud.documentai.v1beta3.Document.text] in the
55    // overall document global text.
56    int64 text_offset = 3;
57  }
58
59  // Annotation for common text style attributes. This adheres to CSS
60  // conventions as much as possible.
61  message Style {
62    // Font size with unit.
63    message FontSize {
64      // Font size for the text.
65      float size = 1;
66
67      // Unit for the font size. Follows CSS naming (such as `in`, `px`, and
68      // `pt`).
69      string unit = 2;
70    }
71
72    // Text anchor indexing into the
73    // [Document.text][google.cloud.documentai.v1beta3.Document.text].
74    TextAnchor text_anchor = 1;
75
76    // Text color.
77    google.type.Color color = 2;
78
79    // Text background color.
80    google.type.Color background_color = 3;
81
82    // [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp).
83    // Possible values are `normal`, `bold`, `bolder`, and `lighter`.
84    string font_weight = 4;
85
86    // [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp).
87    // Possible values are `normal`, `italic`, and `oblique`.
88    string text_style = 5;
89
90    // [Text
91    // decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp).
92    // Follows CSS standard. <text-decoration-line> <text-decoration-color>
93    // <text-decoration-style>
94    string text_decoration = 6;
95
96    // Font size.
97    FontSize font_size = 7;
98
99    // Font family such as `Arial`, `Times New Roman`.
100    // https://www.w3schools.com/cssref/pr_font_font-family.asp
101    string font_family = 8;
102  }
103
104  // A page in a [Document][google.cloud.documentai.v1beta3.Document].
105  message Page {
106    // Dimension for the page.
107    message Dimension {
108      // Page width.
109      float width = 1;
110
111      // Page height.
112      float height = 2;
113
114      // Dimension unit.
115      string unit = 3;
116    }
117
118    // Rendered image contents for this page.
119    message Image {
120      // Raw byte content of the image.
121      bytes content = 1;
122
123      // Encoding [media type (MIME
124      // type)](https://www.iana.org/assignments/media-types/media-types.xhtml)
125      // for the image.
126      string mime_type = 2;
127
128      // Width of the image in pixels.
129      int32 width = 3;
130
131      // Height of the image in pixels.
132      int32 height = 4;
133    }
134
135    // Representation for transformation matrix, intended to be compatible and
136    // used with OpenCV format for image manipulation.
137    message Matrix {
138      // Number of rows in the matrix.
139      int32 rows = 1;
140
141      // Number of columns in the matrix.
142      int32 cols = 2;
143
144      // This encodes information about what data type the matrix uses.
145      // For example, 0 (CV_8U) is an unsigned 8-bit image. For the full list
146      // of OpenCV primitive data types, please refer to
147      // https://docs.opencv.org/4.3.0/d1/d1b/group__core__hal__interface.html
148      int32 type = 3;
149
150      // The matrix data.
151      bytes data = 4;
152    }
153
154    // Visual element describing a layout unit on a page.
155    message Layout {
156      // Detected human reading orientation.
157      enum Orientation {
158        // Unspecified orientation.
159        ORIENTATION_UNSPECIFIED = 0;
160
161        // Orientation is aligned with page up.
162        PAGE_UP = 1;
163
164        // Orientation is aligned with page right.
165        // Turn the head 90 degrees clockwise from upright to read.
166        PAGE_RIGHT = 2;
167
168        // Orientation is aligned with page down.
169        // Turn the head 180 degrees from upright to read.
170        PAGE_DOWN = 3;
171
172        // Orientation is aligned with page left.
173        // Turn the head 90 degrees counterclockwise from upright to read.
174        PAGE_LEFT = 4;
175      }
176
177      // Text anchor indexing into the
178      // [Document.text][google.cloud.documentai.v1beta3.Document.text].
179      TextAnchor text_anchor = 1;
180
181      // Confidence of the current
182      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] within
183      // context of the object this layout is for. e.g. confidence can be for a
184      // single token, a table, a visual element, etc. depending on context.
185      // Range `[0, 1]`.
186      float confidence = 2;
187
188      // The bounding polygon for the
189      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout].
190      BoundingPoly bounding_poly = 3;
191
192      // Detected orientation for the
193      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout].
194      Orientation orientation = 4;
195    }
196
197    // A block has a set of lines (collected into paragraphs) that have a
198    // common line-spacing and orientation.
199    message Block {
200      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
201      // [Block][google.cloud.documentai.v1beta3.Document.Page.Block].
202      Layout layout = 1;
203
204      // A list of detected languages together with confidence.
205      repeated DetectedLanguage detected_languages = 2;
206
207      // The history of this annotation.
208      Provenance provenance = 3 [deprecated = true];
209    }
210
211    // A collection of lines that a human would perceive as a paragraph.
212    message Paragraph {
213      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
214      // [Paragraph][google.cloud.documentai.v1beta3.Document.Page.Paragraph].
215      Layout layout = 1;
216
217      // A list of detected languages together with confidence.
218      repeated DetectedLanguage detected_languages = 2;
219
220      // The  history of this annotation.
221      Provenance provenance = 3 [deprecated = true];
222    }
223
224    // A collection of tokens that a human would perceive as a line.
225    // Does not cross column boundaries, can be horizontal, vertical, etc.
226    message Line {
227      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
228      // [Line][google.cloud.documentai.v1beta3.Document.Page.Line].
229      Layout layout = 1;
230
231      // A list of detected languages together with confidence.
232      repeated DetectedLanguage detected_languages = 2;
233
234      // The  history of this annotation.
235      Provenance provenance = 3 [deprecated = true];
236    }
237
238    // A detected token.
239    message Token {
240      // Detected break at the end of a
241      // [Token][google.cloud.documentai.v1beta3.Document.Page.Token].
242      message DetectedBreak {
243        // Enum to denote the type of break found.
244        enum Type {
245          // Unspecified break type.
246          TYPE_UNSPECIFIED = 0;
247
248          // A single whitespace.
249          SPACE = 1;
250
251          // A wider whitespace.
252          WIDE_SPACE = 2;
253
254          // A hyphen that indicates that a token has been split across lines.
255          HYPHEN = 3;
256        }
257
258        // Detected break type.
259        Type type = 1;
260      }
261
262      // Font and other text style attributes.
263      message StyleInfo {
264        // Font size in points (`1` point is `¹⁄₇₂` inches).
265        int32 font_size = 1;
266
267        // Font size in pixels, equal to _unrounded
268        // [font_size][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_size]_
269        // * _resolution_ ÷ `72.0`.
270        double pixel_font_size = 2;
271
272        // Letter spacing in points.
273        double letter_spacing = 3;
274
275        // Name or style of the font.
276        string font_type = 4;
277
278        // Whether the text is bold (equivalent to
279        // [font_weight][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_weight]
280        // is at least `700`).
281        bool bold = 5;
282
283        // Whether the text is italic.
284        bool italic = 6;
285
286        // Whether the text is underlined.
287        bool underlined = 7;
288
289        // Whether the text is strikethrough. This feature is not supported yet.
290        bool strikeout = 8;
291
292        // Whether the text is a subscript. This feature is not supported yet.
293        bool subscript = 9;
294
295        // Whether the text is a superscript. This feature is not supported yet.
296        bool superscript = 10;
297
298        // Whether the text is in small caps. This feature is not supported yet.
299        bool smallcaps = 11;
300
301        // TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
302        // Normal is `400`, bold is `700`.
303        int32 font_weight = 12;
304
305        // Whether the text is handwritten.
306        bool handwritten = 13;
307
308        // Color of the text.
309        google.type.Color text_color = 14;
310
311        // Color of the background.
312        google.type.Color background_color = 15;
313      }
314
315      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
316      // [Token][google.cloud.documentai.v1beta3.Document.Page.Token].
317      Layout layout = 1;
318
319      // Detected break at the end of a
320      // [Token][google.cloud.documentai.v1beta3.Document.Page.Token].
321      DetectedBreak detected_break = 2;
322
323      // A list of detected languages together with confidence.
324      repeated DetectedLanguage detected_languages = 3;
325
326      // The history of this annotation.
327      Provenance provenance = 4 [deprecated = true];
328
329      // Text style attributes.
330      StyleInfo style_info = 5;
331    }
332
333    // A detected symbol.
334    message Symbol {
335      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
336      // [Symbol][google.cloud.documentai.v1beta3.Document.Page.Symbol].
337      Layout layout = 1;
338
339      // A list of detected languages together with confidence.
340      repeated DetectedLanguage detected_languages = 2;
341    }
342
343    // Detected non-text visual elements e.g. checkbox, signature etc. on the
344    // page.
345    message VisualElement {
346      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
347      // [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement].
348      Layout layout = 1;
349
350      // Type of the
351      // [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement].
352      string type = 2;
353
354      // A list of detected languages together with confidence.
355      repeated DetectedLanguage detected_languages = 3;
356    }
357
358    // A table representation similar to HTML table structure.
359    message Table {
360      // A row of table cells.
361      message TableRow {
362        // Cells that make up this row.
363        repeated TableCell cells = 1;
364      }
365
366      // A cell representation inside the table.
367      message TableCell {
368        // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
369        // [TableCell][google.cloud.documentai.v1beta3.Document.Page.Table.TableCell].
370        Layout layout = 1;
371
372        // How many rows this cell spans.
373        int32 row_span = 2;
374
375        // How many columns this cell spans.
376        int32 col_span = 3;
377
378        // A list of detected languages together with confidence.
379        repeated DetectedLanguage detected_languages = 4;
380      }
381
382      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
383      // [Table][google.cloud.documentai.v1beta3.Document.Page.Table].
384      Layout layout = 1;
385
386      // Header rows of the table.
387      repeated TableRow header_rows = 2;
388
389      // Body rows of the table.
390      repeated TableRow body_rows = 3;
391
392      // A list of detected languages together with confidence.
393      repeated DetectedLanguage detected_languages = 4;
394
395      // The history of this table.
396      Provenance provenance = 5 [deprecated = true];
397    }
398
399    // A form field detected on the page.
400    message FormField {
401      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the
402      // [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField]
403      // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
404      Layout field_name = 1;
405
406      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the
407      // [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField]
408      // value.
409      Layout field_value = 2;
410
411      // A list of detected languages for name together with confidence.
412      repeated DetectedLanguage name_detected_languages = 3;
413
414      // A list of detected languages for value together with confidence.
415      repeated DetectedLanguage value_detected_languages = 4;
416
417      // If the value is non-textual, this field represents the type. Current
418      // valid values are:
419      //
420      // - blank (this indicates the `field_value` is normal text)
421      // - `unfilled_checkbox`
422      // - `filled_checkbox`
423      string value_type = 5;
424
425      // Created for Labeling UI to export key text.
426      // If corrections were made to the text identified by the
427      // `field_name.text_anchor`, this field will contain the correction.
428      string corrected_key_text = 6;
429
430      // Created for Labeling UI to export value text.
431      // If corrections were made to the text identified by the
432      // `field_value.text_anchor`, this field will contain the correction.
433      string corrected_value_text = 7;
434
435      // The history of this annotation.
436      Provenance provenance = 8;
437    }
438
439    // A detected barcode.
440    message DetectedBarcode {
441      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
442      // [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode].
443      Layout layout = 1;
444
445      // Detailed barcode information of the
446      // [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode].
447      Barcode barcode = 2;
448    }
449
450    // Detected language for a structural component.
451    message DetectedLanguage {
452      // The [BCP-47 language
453      // code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier),
454      // such as `en-US` or `sr-Latn`.
455      string language_code = 1;
456
457      // Confidence of detected language. Range `[0, 1]`.
458      float confidence = 2;
459    }
460
461    // Image quality scores for the page image.
462    message ImageQualityScores {
463      // Image Quality Defects
464      message DetectedDefect {
465        // Name of the defect type. Supported values are:
466        //
467        // - `quality/defect_blurry`
468        // - `quality/defect_noisy`
469        // - `quality/defect_dark`
470        // - `quality/defect_faint`
471        // - `quality/defect_text_too_small`
472        // - `quality/defect_document_cutoff`
473        // - `quality/defect_text_cutoff`
474        // - `quality/defect_glare`
475        string type = 1;
476
477        // Confidence of detected defect. Range `[0, 1]` where `1` indicates
478        // strong confidence that the defect exists.
479        float confidence = 2;
480      }
481
482      // The overall quality score. Range `[0, 1]` where `1` is perfect quality.
483      float quality_score = 1;
484
485      // A list of detected defects.
486      repeated DetectedDefect detected_defects = 2;
487    }
488
489    // 1-based index for current
490    // [Page][google.cloud.documentai.v1beta3.Document.Page] in a parent
491    // [Document][google.cloud.documentai.v1beta3.Document]. Useful when a page
492    // is taken out of a [Document][google.cloud.documentai.v1beta3.Document]
493    // for individual processing.
494    int32 page_number = 1;
495
496    // Rendered image for this page. This image is preprocessed to remove any
497    // skew, rotation, and distortions such that the annotation bounding boxes
498    // can be upright and axis-aligned.
499    Image image = 13;
500
501    // Transformation matrices that were applied to the original document image
502    // to produce
503    // [Page.image][google.cloud.documentai.v1beta3.Document.Page.image].
504    repeated Matrix transforms = 14;
505
506    // Physical dimension of the page.
507    Dimension dimension = 2;
508
509    // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the
510    // page.
511    Layout layout = 3;
512
513    // A list of detected languages together with confidence.
514    repeated DetectedLanguage detected_languages = 4;
515
516    // A list of visually detected text blocks on the page.
517    // A block has a set of lines (collected into paragraphs) that have a common
518    // line-spacing and orientation.
519    repeated Block blocks = 5;
520
521    // A list of visually detected text paragraphs on the page.
522    // A collection of lines that a human would perceive as a paragraph.
523    repeated Paragraph paragraphs = 6;
524
525    // A list of visually detected text lines on the page.
526    // A collection of tokens that a human would perceive as a line.
527    repeated Line lines = 7;
528
529    // A list of visually detected tokens on the page.
530    repeated Token tokens = 8;
531
532    // A list of detected non-text visual elements e.g. checkbox,
533    // signature etc. on the page.
534    repeated VisualElement visual_elements = 9;
535
536    // A list of visually detected tables on the page.
537    repeated Table tables = 10;
538
539    // A list of visually detected form fields on the page.
540    repeated FormField form_fields = 11;
541
542    // A list of visually detected symbols on the page.
543    repeated Symbol symbols = 12;
544
545    // A list of detected barcodes.
546    repeated DetectedBarcode detected_barcodes = 15;
547
548    // Image quality scores.
549    ImageQualityScores image_quality_scores = 17;
550
551    // The history of this page.
552    Provenance provenance = 16 [deprecated = true];
553  }
554
555  // An entity that could be a phrase in the text or a property that belongs to
556  // the document. It is a known entity type, such as a person, an organization,
557  // or location.
558  message Entity {
559    // Parsed and normalized entity value.
560    message NormalizedValue {
561      // An optional structured entity value.
562      // Must match entity type defined in schema if
563      // known. If this field is present, the `text` field could also be
564      // populated.
565      oneof structured_value {
566        // Money value. See also:
567        // https://github.com/googleapis/googleapis/blob/master/google/type/money.proto
568        google.type.Money money_value = 2;
569
570        // Date value. Includes year, month, day. See also:
571        // https://github.com/googleapis/googleapis/blob/master/google/type/date.proto
572        google.type.Date date_value = 3;
573
574        // DateTime value. Includes date, time, and timezone. See also:
575        // https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto
576        google.type.DateTime datetime_value = 4;
577
578        // Postal address. See also:
579        // https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto
580        google.type.PostalAddress address_value = 5;
581
582        // Boolean value. Can be used for entities with binary values, or for
583        // checkboxes.
584        bool boolean_value = 6;
585
586        // Integer value.
587        int32 integer_value = 7;
588
589        // Float value.
590        float float_value = 8;
591      }
592
593      // Optional. An optional field to store a normalized string.
594      // For some entity types, one of respective `structured_value` fields may
595      // also be populated. Also not all the types of `structured_value` will be
596      // normalized. For example, some processors may not generate `float`
597      // or `integer` normalized text by default.
598      //
599      // Below are sample formats mapped to structured values.
600      //
601      // - Money/Currency type (`money_value`) is in the ISO 4217 text format.
602      // - Date type (`date_value`) is in the ISO 8601 text format.
603      // - Datetime type (`datetime_value`) is in the ISO 8601 text format.
604      string text = 1 [(google.api.field_behavior) = OPTIONAL];
605    }
606
607    // Optional. Provenance of the entity.
608    // Text anchor indexing into the
609    // [Document.text][google.cloud.documentai.v1beta3.Document.text].
610    TextAnchor text_anchor = 1 [(google.api.field_behavior) = OPTIONAL];
611
612    // Required. Entity type from a schema e.g. `Address`.
613    string type = 2 [(google.api.field_behavior) = REQUIRED];
614
615    // Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`.
616    string mention_text = 3 [(google.api.field_behavior) = OPTIONAL];
617
618    // Optional. Deprecated.  Use `id` field instead.
619    string mention_id = 4 [(google.api.field_behavior) = OPTIONAL];
620
621    // Optional. Confidence of detected Schema entity. Range `[0, 1]`.
622    float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
623
624    // Optional. Represents the provenance of this entity wrt. the location on
625    // the page where it was found.
626    PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL];
627
628    // Optional. Canonical id. This will be a unique value in the entity list
629    // for this document.
630    string id = 7 [(google.api.field_behavior) = OPTIONAL];
631
632    // Optional. Normalized entity value. Absent if the extracted value could
633    // not be converted or the type (e.g. address) is not supported for certain
634    // parsers. This field is also only populated for certain supported document
635    // types.
636    NormalizedValue normalized_value = 9
637        [(google.api.field_behavior) = OPTIONAL];
638
639    // Optional. Entities can be nested to form a hierarchical data structure
640    // representing the content in the document.
641    repeated Entity properties = 10 [(google.api.field_behavior) = OPTIONAL];
642
643    // Optional. The history of this annotation.
644    Provenance provenance = 11 [(google.api.field_behavior) = OPTIONAL];
645
646    // Optional. Whether the entity will be redacted for de-identification
647    // purposes.
648    bool redacted = 12 [(google.api.field_behavior) = OPTIONAL];
649  }
650
651  // Relationship between
652  // [Entities][google.cloud.documentai.v1beta3.Document.Entity].
653  message EntityRelation {
654    // Subject entity id.
655    string subject_id = 1;
656
657    // Object entity id.
658    string object_id = 2;
659
660    // Relationship description.
661    string relation = 3;
662  }
663
664  // Text reference indexing into the
665  // [Document.text][google.cloud.documentai.v1beta3.Document.text].
666  message TextAnchor {
667    // A text segment in the
668    // [Document.text][google.cloud.documentai.v1beta3.Document.text]. The
669    // indices may be out of bounds which indicate that the text extends into
670    // another document shard for large sharded documents. See
671    // [ShardInfo.text_offset][google.cloud.documentai.v1beta3.Document.ShardInfo.text_offset]
672    message TextSegment {
673      // [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment]
674      // start UTF-8 char index in the
675      // [Document.text][google.cloud.documentai.v1beta3.Document.text].
676      int64 start_index = 1;
677
678      // [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment]
679      // half open end UTF-8 char index in the
680      // [Document.text][google.cloud.documentai.v1beta3.Document.text].
681      int64 end_index = 2;
682    }
683
684    // The text segments from the
685    // [Document.text][google.cloud.documentai.v1beta3.Document.text].
686    repeated TextSegment text_segments = 1;
687
688    // Contains the content of the text span so that users do
689    // not have to look it up in the text_segments.  It is always
690    // populated for formFields.
691    string content = 2;
692  }
693
694  // Referencing the visual context of the entity in the
695  // [Document.pages][google.cloud.documentai.v1beta3.Document.pages]. Page
696  // anchors can be cross-page, consist of multiple bounding polygons and
697  // optionally reference specific layout element types.
698  message PageAnchor {
699    // Represents a weak reference to a page element within a document.
700    message PageRef {
701      // The type of layout that is being referenced.
702      enum LayoutType {
703        // Layout Unspecified.
704        LAYOUT_TYPE_UNSPECIFIED = 0;
705
706        // References a
707        // [Page.blocks][google.cloud.documentai.v1beta3.Document.Page.blocks]
708        // element.
709        BLOCK = 1;
710
711        // References a
712        // [Page.paragraphs][google.cloud.documentai.v1beta3.Document.Page.paragraphs]
713        // element.
714        PARAGRAPH = 2;
715
716        // References a
717        // [Page.lines][google.cloud.documentai.v1beta3.Document.Page.lines]
718        // element.
719        LINE = 3;
720
721        // References a
722        // [Page.tokens][google.cloud.documentai.v1beta3.Document.Page.tokens]
723        // element.
724        TOKEN = 4;
725
726        // References a
727        // [Page.visual_elements][google.cloud.documentai.v1beta3.Document.Page.visual_elements]
728        // element.
729        VISUAL_ELEMENT = 5;
730
731        // Refrrences a
732        // [Page.tables][google.cloud.documentai.v1beta3.Document.Page.tables]
733        // element.
734        TABLE = 6;
735
736        // References a
737        // [Page.form_fields][google.cloud.documentai.v1beta3.Document.Page.form_fields]
738        // element.
739        FORM_FIELD = 7;
740      }
741
742      // Required. Index into the
743      // [Document.pages][google.cloud.documentai.v1beta3.Document.pages]
744      // element, for example using
745      // `[Document.pages][page_refs.page]` to locate the related page element.
746      // This field is skipped when its value is the default `0`. See
747      // https://developers.google.com/protocol-buffers/docs/proto3#json.
748      int64 page = 1 [(google.api.field_behavior) = REQUIRED];
749
750      // Optional. The type of the layout element that is being referenced if
751      // any.
752      LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL];
753
754      // Optional. Deprecated.  Use
755      // [PageRef.bounding_poly][google.cloud.documentai.v1beta3.Document.PageAnchor.PageRef.bounding_poly]
756      // instead.
757      string layout_id = 3
758          [deprecated = true, (google.api.field_behavior) = OPTIONAL];
759
760      // Optional. Identifies the bounding polygon of a layout element on the
761      // page. If `layout_type` is set, the bounding polygon must be exactly the
762      // same to the layout element it's referring to.
763      BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL];
764
765      // Optional. Confidence of detected page element, if applicable. Range
766      // `[0, 1]`.
767      float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
768    }
769
770    // One or more references to visual page elements
771    repeated PageRef page_refs = 1;
772  }
773
774  // Structure to identify provenance relationships between annotations in
775  // different revisions.
776  message Provenance {
777    // The parent element the current element is based on. Used for
778    // referencing/aligning, removal and replacement operations.
779    message Parent {
780      // The index of the index into current revision's parent_ids list.
781      int32 revision = 1;
782
783      // The index of the parent item in the corresponding item list (eg. list
784      // of entities, properties within entities, etc.) in the parent revision.
785      int32 index = 3;
786
787      // The id of the parent provenance.
788      int32 id = 2 [deprecated = true];
789    }
790
791    // If a processor or agent does an explicit operation on existing elements.
792    enum OperationType {
793      // Operation type unspecified. If no operation is specified a provenance
794      // entry is simply used to match against a `parent`.
795      OPERATION_TYPE_UNSPECIFIED = 0;
796
797      // Add an element.
798      ADD = 1;
799
800      // Remove an element identified by `parent`.
801      REMOVE = 2;
802
803      // Updates any fields within the given provenance scope of the message. It
804      // overwrites the fields rather than replacing them.  Use this when you
805      // want to update a field value of an entity without also updating all the
806      // child properties.
807      UPDATE = 7;
808
809      // Currently unused. Replace an element identified by `parent`.
810      REPLACE = 3;
811
812      // Deprecated. Request human review for the element identified by
813      // `parent`.
814      EVAL_REQUESTED = 4 [deprecated = true];
815
816      // Deprecated. Element is reviewed and approved at human review,
817      // confidence will be set to 1.0.
818      EVAL_APPROVED = 5 [deprecated = true];
819
820      // Deprecated. Element is skipped in the validation process.
821      EVAL_SKIPPED = 6 [deprecated = true];
822    }
823
824    // The index of the revision that produced this element.
825    int32 revision = 1 [deprecated = true];
826
827    // The Id of this operation.  Needs to be unique within the scope of the
828    // revision.
829    int32 id = 2 [deprecated = true];
830
831    // References to the original elements that are replaced.
832    repeated Parent parents = 3;
833
834    // The type of provenance operation.
835    OperationType type = 4;
836  }
837
838  // Contains past or forward revisions of this document.
839  message Revision {
840    // Human Review information of the document.
841    message HumanReview {
842      // Human review state. e.g. `requested`, `succeeded`, `rejected`.
843      string state = 1;
844
845      // A message providing more details about the current state of processing.
846      // For example, the rejection reason when the state is `rejected`.
847      string state_message = 2;
848    }
849
850    // Who/what made the change
851    oneof source {
852      // If the change was made by a person specify the name or id of that
853      // person.
854      string agent = 4;
855
856      // If the annotation was made by processor identify the processor by its
857      // resource name.
858      string processor = 5;
859    }
860
861    // Id of the revision, internally generated by doc proto storage.
862    // Unique within the context of the document.
863    string id = 1;
864
865    // The revisions that this revision is based on.  This can include one or
866    // more parent (when documents are merged.)  This field represents the
867    // index into the `revisions` field.
868    repeated int32 parent = 2 [deprecated = true];
869
870    // The revisions that this revision is based on. Must include all the ids
871    // that have anything to do with this revision - eg. there are
872    // `provenance.parent.revision` fields that index into this field.
873    repeated string parent_ids = 7;
874
875    // The time that the revision was created, internally generated by
876    // doc proto storage at the time of create.
877    google.protobuf.Timestamp create_time = 3;
878
879    // Human Review information of this revision.
880    HumanReview human_review = 6;
881  }
882
883  // This message is used for text changes aka. OCR corrections.
884  message TextChange {
885    // Provenance of the correction.
886    // Text anchor indexing into the
887    // [Document.text][google.cloud.documentai.v1beta3.Document.text].  There
888    // can only be a single `TextAnchor.text_segments` element.  If the start
889    // and end index of the text segment are the same, the text change is
890    // inserted before that index.
891    TextAnchor text_anchor = 1;
892
893    // The text that replaces the text identified in the `text_anchor`.
894    string changed_text = 2;
895
896    // The history of this annotation.
897    repeated Provenance provenance = 3 [deprecated = true];
898  }
899
900  // Represents the parsed layout of a document as a collection of blocks that
901  // the document is divided into.
902  message DocumentLayout {
903    // Represents a block. A block could be one of the various types (text,
904    // table, list) supported.
905    message DocumentLayoutBlock {
906      // Represents where the block starts and ends in the document.
907      message LayoutPageSpan {
908        // Page where block starts in the document.
909        int32 page_start = 1;
910
911        // Page where block ends in the document.
912        int32 page_end = 2;
913      }
914
915      // Represents a text type block.
916      message LayoutTextBlock {
917        // Text content stored in the block.
918        string text = 1;
919
920        // Type of the text in the block. Available options are: `paragraph`,
921        // `subtitle`, `heading-1`, `heading-2`, `heading-3`, `heading-4`,
922        // `heading-5`, `header`, `footer`.
923        string type = 2;
924
925        // A text block could further have child blocks.
926        // Repeated blocks support further hierarchies and nested blocks.
927        repeated DocumentLayoutBlock blocks = 3;
928      }
929
930      // Represents a table type block.
931      message LayoutTableBlock {
932        // Header rows at the top of the table.
933        repeated LayoutTableRow header_rows = 1;
934
935        // Body rows containing main table content.
936        repeated LayoutTableRow body_rows = 2;
937
938        // Table caption/title.
939        string caption = 3;
940      }
941
942      // Represents a row in a table.
943      message LayoutTableRow {
944        // A table row is a list of table cells.
945        repeated LayoutTableCell cells = 1;
946      }
947
948      // Represents a cell in a table row.
949      message LayoutTableCell {
950        // A table cell is a list of blocks.
951        // Repeated blocks support further hierarchies and nested blocks.
952        repeated DocumentLayoutBlock blocks = 1;
953
954        // How many rows this cell spans.
955        int32 row_span = 2;
956
957        // How many columns this cell spans.
958        int32 col_span = 3;
959      }
960
961      // Represents a list type block.
962      message LayoutListBlock {
963        // List entries that constitute a list block.
964        repeated LayoutListEntry list_entries = 1;
965
966        // Type of the list_entries (if exist). Available options are `ordered`
967        // and `unordered`.
968        string type = 2;
969      }
970
971      // Represents an entry in the list.
972      message LayoutListEntry {
973        // A list entry is a list of blocks.
974        // Repeated blocks support further hierarchies and nested blocks.
975        repeated DocumentLayoutBlock blocks = 1;
976      }
977
978      oneof block {
979        // Block consisting of text content.
980        LayoutTextBlock text_block = 2;
981
982        // Block consisting of table content/structure.
983        LayoutTableBlock table_block = 3;
984
985        // Block consisting of list content/structure.
986        LayoutListBlock list_block = 4;
987      }
988
989      // ID of the block.
990      string block_id = 1;
991
992      // Page span of the block.
993      LayoutPageSpan page_span = 5;
994    }
995
996    // List of blocks in the document.
997    repeated DocumentLayoutBlock blocks = 1;
998  }
999
1000  // Represents the chunks that the document is divided into.
1001  message ChunkedDocument {
1002    // Represents a chunk.
1003    message Chunk {
1004      // Represents where the chunk starts and ends in the document.
1005      message ChunkPageSpan {
1006        // Page where chunk starts in the document.
1007        int32 page_start = 1;
1008
1009        // Page where chunk ends in the document.
1010        int32 page_end = 2;
1011      }
1012
1013      // Represents the page header associated with the chunk.
1014      message ChunkPageHeader {
1015        // Header in text format.
1016        string text = 1;
1017
1018        // Page span of the header.
1019        ChunkPageSpan page_span = 2;
1020      }
1021
1022      // Represents the page footer associated with the chunk.
1023      message ChunkPageFooter {
1024        // Footer in text format.
1025        string text = 1;
1026
1027        // Page span of the footer.
1028        ChunkPageSpan page_span = 2;
1029      }
1030
1031      // ID of the chunk.
1032      string chunk_id = 1;
1033
1034      // DO NOT USE.
1035      // List of all parsed documents layout source blocks used to generate the
1036      // chunk.
1037      repeated string source_block_ids = 2;
1038
1039      // Text content of the chunk.
1040      string content = 3;
1041
1042      // Page span of the chunk.
1043      ChunkPageSpan page_span = 4;
1044
1045      // Page headers associated with the chunk.
1046      repeated ChunkPageHeader page_headers = 5;
1047
1048      // Page footers associated with the chunk.
1049      repeated ChunkPageFooter page_footers = 6;
1050    }
1051
1052    // List of chunks.
1053    repeated Chunk chunks = 1;
1054  }
1055
1056  // Original source document from the user.
1057  oneof source {
1058    // Optional. Currently supports Google Cloud Storage URI of the form
1059    // `gs://bucket_name/object_name`. Object versioning is not supported.
1060    // For more information, refer to [Google Cloud Storage Request
1061    // URIs](https://cloud.google.com/storage/docs/reference-uris).
1062    string uri = 1 [(google.api.field_behavior) = OPTIONAL];
1063
1064    // Optional. Inline document content, represented as a stream of bytes.
1065    // Note: As with all `bytes` fields, protobuffers use a pure binary
1066    // representation, whereas JSON representations use base64.
1067    bytes content = 2 [(google.api.field_behavior) = OPTIONAL];
1068  }
1069
1070  // An IANA published [media type (MIME
1071  // type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
1072  string mime_type = 3;
1073
1074  // Optional. UTF-8 encoded text in reading order from the document.
1075  string text = 4 [(google.api.field_behavior) = OPTIONAL];
1076
1077  // Styles for the
1078  // [Document.text][google.cloud.documentai.v1beta3.Document.text].
1079  repeated Style text_styles = 5 [deprecated = true];
1080
1081  // Visual page layout for the
1082  // [Document][google.cloud.documentai.v1beta3.Document].
1083  repeated Page pages = 6;
1084
1085  // A list of entities detected on
1086  // [Document.text][google.cloud.documentai.v1beta3.Document.text]. For
1087  // document shards, entities in this list may cross shard boundaries.
1088  repeated Entity entities = 7;
1089
1090  // Placeholder.  Relationship among
1091  // [Document.entities][google.cloud.documentai.v1beta3.Document.entities].
1092  repeated EntityRelation entity_relations = 8;
1093
1094  // Placeholder.  A list of text corrections made to
1095  // [Document.text][google.cloud.documentai.v1beta3.Document.text].  This is
1096  // usually used for annotating corrections to OCR mistakes.  Text changes for
1097  // a given revision may not overlap with each other.
1098  repeated TextChange text_changes = 14;
1099
1100  // Information about the sharding if this document is sharded part of a larger
1101  // document. If the document is not sharded, this message is not specified.
1102  ShardInfo shard_info = 9;
1103
1104  // Any error that occurred while processing this document.
1105  google.rpc.Status error = 10;
1106
1107  // Placeholder. Revision history of this document.
1108  repeated Revision revisions = 13;
1109
1110  // Parsed layout of the document.
1111  DocumentLayout document_layout = 17;
1112
1113  // Document chunked based on chunking config.
1114  ChunkedDocument chunked_document = 18;
1115}
1116
1117// The revision reference specifies which revision on the document to read.
1118message RevisionRef {
1119  // Some predefined revision cases.
1120  enum RevisionCase {
1121    // Unspecified case, fall back to read the `LATEST_HUMAN_REVIEW`.
1122    REVISION_CASE_UNSPECIFIED = 0;
1123
1124    // The latest revision made by a human.
1125    LATEST_HUMAN_REVIEW = 1;
1126
1127    // The latest revision based on timestamp.
1128    LATEST_TIMESTAMP = 2;
1129
1130    // The first (OCR) revision.
1131    BASE_OCR_REVISION = 3;
1132  }
1133
1134  // Specifies which revision to read.
1135  oneof source {
1136    // Reads the revision by the predefined case.
1137    RevisionCase revision_case = 1;
1138
1139    // Reads the revision given by the id.
1140    string revision_id = 2;
1141
1142    // Reads the revision generated by the processor version.
1143    // The format takes the full resource name of processor version.
1144    // `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
1145    string latest_processor_version = 3;
1146  }
1147}
1148