xref: /aosp_15_r20/external/googleapis/google/cloud/documentai/v1beta3/document.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1*d5c09012SAndroid Build Coastguard Worker// Copyright 2023 Google LLC
2*d5c09012SAndroid Build Coastguard Worker//
3*d5c09012SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License");
4*d5c09012SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License.
5*d5c09012SAndroid Build Coastguard Worker// You may obtain a copy of the License at
6*d5c09012SAndroid Build Coastguard Worker//
7*d5c09012SAndroid Build Coastguard Worker//     http://www.apache.org/licenses/LICENSE-2.0
8*d5c09012SAndroid Build Coastguard Worker//
9*d5c09012SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software
10*d5c09012SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS,
11*d5c09012SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*d5c09012SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and
13*d5c09012SAndroid Build Coastguard Worker// limitations under the License.
14*d5c09012SAndroid Build Coastguard Worker
15*d5c09012SAndroid Build Coastguard Workersyntax = "proto3";
16*d5c09012SAndroid Build Coastguard Worker
17*d5c09012SAndroid Build Coastguard Workerpackage google.cloud.documentai.v1beta3;
18*d5c09012SAndroid Build Coastguard Worker
19*d5c09012SAndroid Build Coastguard Workerimport "google/api/field_behavior.proto";
20*d5c09012SAndroid Build Coastguard Workerimport "google/cloud/documentai/v1beta3/barcode.proto";
21*d5c09012SAndroid Build Coastguard Workerimport "google/cloud/documentai/v1beta3/geometry.proto";
22*d5c09012SAndroid Build Coastguard Workerimport "google/protobuf/timestamp.proto";
23*d5c09012SAndroid Build Coastguard Workerimport "google/rpc/status.proto";
24*d5c09012SAndroid Build Coastguard Workerimport "google/type/color.proto";
25*d5c09012SAndroid Build Coastguard Workerimport "google/type/date.proto";
26*d5c09012SAndroid Build Coastguard Workerimport "google/type/datetime.proto";
27*d5c09012SAndroid Build Coastguard Workerimport "google/type/money.proto";
28*d5c09012SAndroid Build Coastguard Workerimport "google/type/postal_address.proto";
29*d5c09012SAndroid Build Coastguard Worker
30*d5c09012SAndroid Build Coastguard Workeroption csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
31*d5c09012SAndroid Build Coastguard Workeroption go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb";
32*d5c09012SAndroid Build Coastguard Workeroption java_multiple_files = true;
33*d5c09012SAndroid Build Coastguard Workeroption java_outer_classname = "DocumentProto";
34*d5c09012SAndroid Build Coastguard Workeroption java_package = "com.google.cloud.documentai.v1beta3";
35*d5c09012SAndroid Build Coastguard Workeroption php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
36*d5c09012SAndroid Build Coastguard Workeroption ruby_package = "Google::Cloud::DocumentAI::V1beta3";
37*d5c09012SAndroid Build Coastguard Worker
38*d5c09012SAndroid Build Coastguard Worker// Document represents the canonical document resource in Document AI. It is an
39*d5c09012SAndroid Build Coastguard Worker// interchange format that provides insights into documents and allows for
40*d5c09012SAndroid Build Coastguard Worker// collaboration between users and Document AI to iterate and optimize for
41*d5c09012SAndroid Build Coastguard Worker// quality.
42*d5c09012SAndroid Build Coastguard Workermessage Document {
43*d5c09012SAndroid Build Coastguard Worker  // For a large document, sharding may be performed to produce several
44*d5c09012SAndroid Build Coastguard Worker  // document shards. Each document shard contains this field to detail which
45*d5c09012SAndroid Build Coastguard Worker  // shard it is.
46*d5c09012SAndroid Build Coastguard Worker  message ShardInfo {
47*d5c09012SAndroid Build Coastguard Worker    // The 0-based index of this shard.
48*d5c09012SAndroid Build Coastguard Worker    int64 shard_index = 1;
49*d5c09012SAndroid Build Coastguard Worker
50*d5c09012SAndroid Build Coastguard Worker    // Total number of shards.
51*d5c09012SAndroid Build Coastguard Worker    int64 shard_count = 2;
52*d5c09012SAndroid Build Coastguard Worker
53*d5c09012SAndroid Build Coastguard Worker    // The index of the first character in
54*d5c09012SAndroid Build Coastguard Worker    // [Document.text][google.cloud.documentai.v1beta3.Document.text] in the
55*d5c09012SAndroid Build Coastguard Worker    // overall document global text.
56*d5c09012SAndroid Build Coastguard Worker    int64 text_offset = 3;
57*d5c09012SAndroid Build Coastguard Worker  }
58*d5c09012SAndroid Build Coastguard Worker
59*d5c09012SAndroid Build Coastguard Worker  // Annotation for common text style attributes. This adheres to CSS
60*d5c09012SAndroid Build Coastguard Worker  // conventions as much as possible.
61*d5c09012SAndroid Build Coastguard Worker  message Style {
62*d5c09012SAndroid Build Coastguard Worker    // Font size with unit.
63*d5c09012SAndroid Build Coastguard Worker    message FontSize {
64*d5c09012SAndroid Build Coastguard Worker      // Font size for the text.
65*d5c09012SAndroid Build Coastguard Worker      float size = 1;
66*d5c09012SAndroid Build Coastguard Worker
67*d5c09012SAndroid Build Coastguard Worker      // Unit for the font size. Follows CSS naming (such as `in`, `px`, and
68*d5c09012SAndroid Build Coastguard Worker      // `pt`).
69*d5c09012SAndroid Build Coastguard Worker      string unit = 2;
70*d5c09012SAndroid Build Coastguard Worker    }
71*d5c09012SAndroid Build Coastguard Worker
72*d5c09012SAndroid Build Coastguard Worker    // Text anchor indexing into the
73*d5c09012SAndroid Build Coastguard Worker    // [Document.text][google.cloud.documentai.v1beta3.Document.text].
74*d5c09012SAndroid Build Coastguard Worker    TextAnchor text_anchor = 1;
75*d5c09012SAndroid Build Coastguard Worker
76*d5c09012SAndroid Build Coastguard Worker    // Text color.
77*d5c09012SAndroid Build Coastguard Worker    google.type.Color color = 2;
78*d5c09012SAndroid Build Coastguard Worker
79*d5c09012SAndroid Build Coastguard Worker    // Text background color.
80*d5c09012SAndroid Build Coastguard Worker    google.type.Color background_color = 3;
81*d5c09012SAndroid Build Coastguard Worker
82*d5c09012SAndroid Build Coastguard Worker    // [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp).
83*d5c09012SAndroid Build Coastguard Worker    // Possible values are `normal`, `bold`, `bolder`, and `lighter`.
84*d5c09012SAndroid Build Coastguard Worker    string font_weight = 4;
85*d5c09012SAndroid Build Coastguard Worker
86*d5c09012SAndroid Build Coastguard Worker    // [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp).
87*d5c09012SAndroid Build Coastguard Worker    // Possible values are `normal`, `italic`, and `oblique`.
88*d5c09012SAndroid Build Coastguard Worker    string text_style = 5;
89*d5c09012SAndroid Build Coastguard Worker
90*d5c09012SAndroid Build Coastguard Worker    // [Text
91*d5c09012SAndroid Build Coastguard Worker    // decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp).
92*d5c09012SAndroid Build Coastguard Worker    // Follows CSS standard. <text-decoration-line> <text-decoration-color>
93*d5c09012SAndroid Build Coastguard Worker    // <text-decoration-style>
94*d5c09012SAndroid Build Coastguard Worker    string text_decoration = 6;
95*d5c09012SAndroid Build Coastguard Worker
96*d5c09012SAndroid Build Coastguard Worker    // Font size.
97*d5c09012SAndroid Build Coastguard Worker    FontSize font_size = 7;
98*d5c09012SAndroid Build Coastguard Worker
99*d5c09012SAndroid Build Coastguard Worker    // Font family such as `Arial`, `Times New Roman`.
100*d5c09012SAndroid Build Coastguard Worker    // https://www.w3schools.com/cssref/pr_font_font-family.asp
101*d5c09012SAndroid Build Coastguard Worker    string font_family = 8;
102*d5c09012SAndroid Build Coastguard Worker  }
103*d5c09012SAndroid Build Coastguard Worker
104*d5c09012SAndroid Build Coastguard Worker  // A page in a [Document][google.cloud.documentai.v1beta3.Document].
105*d5c09012SAndroid Build Coastguard Worker  message Page {
106*d5c09012SAndroid Build Coastguard Worker    // Dimension for the page.
107*d5c09012SAndroid Build Coastguard Worker    message Dimension {
108*d5c09012SAndroid Build Coastguard Worker      // Page width.
109*d5c09012SAndroid Build Coastguard Worker      float width = 1;
110*d5c09012SAndroid Build Coastguard Worker
111*d5c09012SAndroid Build Coastguard Worker      // Page height.
112*d5c09012SAndroid Build Coastguard Worker      float height = 2;
113*d5c09012SAndroid Build Coastguard Worker
114*d5c09012SAndroid Build Coastguard Worker      // Dimension unit.
115*d5c09012SAndroid Build Coastguard Worker      string unit = 3;
116*d5c09012SAndroid Build Coastguard Worker    }
117*d5c09012SAndroid Build Coastguard Worker
118*d5c09012SAndroid Build Coastguard Worker    // Rendered image contents for this page.
119*d5c09012SAndroid Build Coastguard Worker    message Image {
120*d5c09012SAndroid Build Coastguard Worker      // Raw byte content of the image.
121*d5c09012SAndroid Build Coastguard Worker      bytes content = 1;
122*d5c09012SAndroid Build Coastguard Worker
123*d5c09012SAndroid Build Coastguard Worker      // Encoding [media type (MIME
124*d5c09012SAndroid Build Coastguard Worker      // type)](https://www.iana.org/assignments/media-types/media-types.xhtml)
125*d5c09012SAndroid Build Coastguard Worker      // for the image.
126*d5c09012SAndroid Build Coastguard Worker      string mime_type = 2;
127*d5c09012SAndroid Build Coastguard Worker
128*d5c09012SAndroid Build Coastguard Worker      // Width of the image in pixels.
129*d5c09012SAndroid Build Coastguard Worker      int32 width = 3;
130*d5c09012SAndroid Build Coastguard Worker
131*d5c09012SAndroid Build Coastguard Worker      // Height of the image in pixels.
132*d5c09012SAndroid Build Coastguard Worker      int32 height = 4;
133*d5c09012SAndroid Build Coastguard Worker    }
134*d5c09012SAndroid Build Coastguard Worker
135*d5c09012SAndroid Build Coastguard Worker    // Representation for transformation matrix, intended to be compatible and
136*d5c09012SAndroid Build Coastguard Worker    // used with OpenCV format for image manipulation.
137*d5c09012SAndroid Build Coastguard Worker    message Matrix {
138*d5c09012SAndroid Build Coastguard Worker      // Number of rows in the matrix.
139*d5c09012SAndroid Build Coastguard Worker      int32 rows = 1;
140*d5c09012SAndroid Build Coastguard Worker
141*d5c09012SAndroid Build Coastguard Worker      // Number of columns in the matrix.
142*d5c09012SAndroid Build Coastguard Worker      int32 cols = 2;
143*d5c09012SAndroid Build Coastguard Worker
144*d5c09012SAndroid Build Coastguard Worker      // This encodes information about what data type the matrix uses.
145*d5c09012SAndroid Build Coastguard Worker      // For example, 0 (CV_8U) is an unsigned 8-bit image. For the full list
146*d5c09012SAndroid Build Coastguard Worker      // of OpenCV primitive data types, please refer to
147*d5c09012SAndroid Build Coastguard Worker      // https://docs.opencv.org/4.3.0/d1/d1b/group__core__hal__interface.html
148*d5c09012SAndroid Build Coastguard Worker      int32 type = 3;
149*d5c09012SAndroid Build Coastguard Worker
150*d5c09012SAndroid Build Coastguard Worker      // The matrix data.
151*d5c09012SAndroid Build Coastguard Worker      bytes data = 4;
152*d5c09012SAndroid Build Coastguard Worker    }
153*d5c09012SAndroid Build Coastguard Worker
154*d5c09012SAndroid Build Coastguard Worker    // Visual element describing a layout unit on a page.
155*d5c09012SAndroid Build Coastguard Worker    message Layout {
156*d5c09012SAndroid Build Coastguard Worker      // Detected human reading orientation.
157*d5c09012SAndroid Build Coastguard Worker      enum Orientation {
158*d5c09012SAndroid Build Coastguard Worker        // Unspecified orientation.
159*d5c09012SAndroid Build Coastguard Worker        ORIENTATION_UNSPECIFIED = 0;
160*d5c09012SAndroid Build Coastguard Worker
161*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page up.
162*d5c09012SAndroid Build Coastguard Worker        PAGE_UP = 1;
163*d5c09012SAndroid Build Coastguard Worker
164*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page right.
165*d5c09012SAndroid Build Coastguard Worker        // Turn the head 90 degrees clockwise from upright to read.
166*d5c09012SAndroid Build Coastguard Worker        PAGE_RIGHT = 2;
167*d5c09012SAndroid Build Coastguard Worker
168*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page down.
169*d5c09012SAndroid Build Coastguard Worker        // Turn the head 180 degrees from upright to read.
170*d5c09012SAndroid Build Coastguard Worker        PAGE_DOWN = 3;
171*d5c09012SAndroid Build Coastguard Worker
172*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page left.
173*d5c09012SAndroid Build Coastguard Worker        // Turn the head 90 degrees counterclockwise from upright to read.
174*d5c09012SAndroid Build Coastguard Worker        PAGE_LEFT = 4;
175*d5c09012SAndroid Build Coastguard Worker      }
176*d5c09012SAndroid Build Coastguard Worker
177*d5c09012SAndroid Build Coastguard Worker      // Text anchor indexing into the
178*d5c09012SAndroid Build Coastguard Worker      // [Document.text][google.cloud.documentai.v1beta3.Document.text].
179*d5c09012SAndroid Build Coastguard Worker      TextAnchor text_anchor = 1;
180*d5c09012SAndroid Build Coastguard Worker
181*d5c09012SAndroid Build Coastguard Worker      // Confidence of the current
182*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] within
183*d5c09012SAndroid Build Coastguard Worker      // context of the object this layout is for. e.g. confidence can be for a
184*d5c09012SAndroid Build Coastguard Worker      // single token, a table, a visual element, etc. depending on context.
185*d5c09012SAndroid Build Coastguard Worker      // Range `[0, 1]`.
186*d5c09012SAndroid Build Coastguard Worker      float confidence = 2;
187*d5c09012SAndroid Build Coastguard Worker
188*d5c09012SAndroid Build Coastguard Worker      // The bounding polygon for the
189*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout].
190*d5c09012SAndroid Build Coastguard Worker      BoundingPoly bounding_poly = 3;
191*d5c09012SAndroid Build Coastguard Worker
192*d5c09012SAndroid Build Coastguard Worker      // Detected orientation for the
193*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout].
194*d5c09012SAndroid Build Coastguard Worker      Orientation orientation = 4;
195*d5c09012SAndroid Build Coastguard Worker    }
196*d5c09012SAndroid Build Coastguard Worker
197*d5c09012SAndroid Build Coastguard Worker    // A block has a set of lines (collected into paragraphs) that have a
198*d5c09012SAndroid Build Coastguard Worker    // common line-spacing and orientation.
199*d5c09012SAndroid Build Coastguard Worker    message Block {
200*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
201*d5c09012SAndroid Build Coastguard Worker      // [Block][google.cloud.documentai.v1beta3.Document.Page.Block].
202*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
203*d5c09012SAndroid Build Coastguard Worker
204*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
205*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 2;
206*d5c09012SAndroid Build Coastguard Worker
207*d5c09012SAndroid Build Coastguard Worker      // The history of this annotation.
208*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 3 [deprecated = true];
209*d5c09012SAndroid Build Coastguard Worker    }
210*d5c09012SAndroid Build Coastguard Worker
211*d5c09012SAndroid Build Coastguard Worker    // A collection of lines that a human would perceive as a paragraph.
212*d5c09012SAndroid Build Coastguard Worker    message Paragraph {
213*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
214*d5c09012SAndroid Build Coastguard Worker      // [Paragraph][google.cloud.documentai.v1beta3.Document.Page.Paragraph].
215*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
216*d5c09012SAndroid Build Coastguard Worker
217*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
218*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 2;
219*d5c09012SAndroid Build Coastguard Worker
220*d5c09012SAndroid Build Coastguard Worker      // The  history of this annotation.
221*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 3 [deprecated = true];
222*d5c09012SAndroid Build Coastguard Worker    }
223*d5c09012SAndroid Build Coastguard Worker
224*d5c09012SAndroid Build Coastguard Worker    // A collection of tokens that a human would perceive as a line.
225*d5c09012SAndroid Build Coastguard Worker    // Does not cross column boundaries, can be horizontal, vertical, etc.
226*d5c09012SAndroid Build Coastguard Worker    message Line {
227*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
228*d5c09012SAndroid Build Coastguard Worker      // [Line][google.cloud.documentai.v1beta3.Document.Page.Line].
229*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
230*d5c09012SAndroid Build Coastguard Worker
231*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
232*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 2;
233*d5c09012SAndroid Build Coastguard Worker
234*d5c09012SAndroid Build Coastguard Worker      // The  history of this annotation.
235*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 3 [deprecated = true];
236*d5c09012SAndroid Build Coastguard Worker    }
237*d5c09012SAndroid Build Coastguard Worker
238*d5c09012SAndroid Build Coastguard Worker    // A detected token.
239*d5c09012SAndroid Build Coastguard Worker    message Token {
240*d5c09012SAndroid Build Coastguard Worker      // Detected break at the end of a
241*d5c09012SAndroid Build Coastguard Worker      // [Token][google.cloud.documentai.v1beta3.Document.Page.Token].
242*d5c09012SAndroid Build Coastguard Worker      message DetectedBreak {
243*d5c09012SAndroid Build Coastguard Worker        // Enum to denote the type of break found.
244*d5c09012SAndroid Build Coastguard Worker        enum Type {
245*d5c09012SAndroid Build Coastguard Worker          // Unspecified break type.
246*d5c09012SAndroid Build Coastguard Worker          TYPE_UNSPECIFIED = 0;
247*d5c09012SAndroid Build Coastguard Worker
248*d5c09012SAndroid Build Coastguard Worker          // A single whitespace.
249*d5c09012SAndroid Build Coastguard Worker          SPACE = 1;
250*d5c09012SAndroid Build Coastguard Worker
251*d5c09012SAndroid Build Coastguard Worker          // A wider whitespace.
252*d5c09012SAndroid Build Coastguard Worker          WIDE_SPACE = 2;
253*d5c09012SAndroid Build Coastguard Worker
254*d5c09012SAndroid Build Coastguard Worker          // A hyphen that indicates that a token has been split across lines.
255*d5c09012SAndroid Build Coastguard Worker          HYPHEN = 3;
256*d5c09012SAndroid Build Coastguard Worker        }
257*d5c09012SAndroid Build Coastguard Worker
258*d5c09012SAndroid Build Coastguard Worker        // Detected break type.
259*d5c09012SAndroid Build Coastguard Worker        Type type = 1;
260*d5c09012SAndroid Build Coastguard Worker      }
261*d5c09012SAndroid Build Coastguard Worker
262*d5c09012SAndroid Build Coastguard Worker      // Font and other text style attributes.
263*d5c09012SAndroid Build Coastguard Worker      message StyleInfo {
264*d5c09012SAndroid Build Coastguard Worker        // Font size in points (`1` point is `¹⁄₇₂` inches).
265*d5c09012SAndroid Build Coastguard Worker        int32 font_size = 1;
266*d5c09012SAndroid Build Coastguard Worker
267*d5c09012SAndroid Build Coastguard Worker        // Font size in pixels, equal to _unrounded
268*d5c09012SAndroid Build Coastguard Worker        // [font_size][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_size]_
269*d5c09012SAndroid Build Coastguard Worker        // * _resolution_ ÷ `72.0`.
270*d5c09012SAndroid Build Coastguard Worker        double pixel_font_size = 2;
271*d5c09012SAndroid Build Coastguard Worker
272*d5c09012SAndroid Build Coastguard Worker        // Letter spacing in points.
273*d5c09012SAndroid Build Coastguard Worker        double letter_spacing = 3;
274*d5c09012SAndroid Build Coastguard Worker
275*d5c09012SAndroid Build Coastguard Worker        // Name or style of the font.
276*d5c09012SAndroid Build Coastguard Worker        string font_type = 4;
277*d5c09012SAndroid Build Coastguard Worker
278*d5c09012SAndroid Build Coastguard Worker        // Whether the text is bold (equivalent to
279*d5c09012SAndroid Build Coastguard Worker        // [font_weight][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_weight]
280*d5c09012SAndroid Build Coastguard Worker        // is at least `700`).
281*d5c09012SAndroid Build Coastguard Worker        bool bold = 5;
282*d5c09012SAndroid Build Coastguard Worker
283*d5c09012SAndroid Build Coastguard Worker        // Whether the text is italic.
284*d5c09012SAndroid Build Coastguard Worker        bool italic = 6;
285*d5c09012SAndroid Build Coastguard Worker
286*d5c09012SAndroid Build Coastguard Worker        // Whether the text is underlined.
287*d5c09012SAndroid Build Coastguard Worker        bool underlined = 7;
288*d5c09012SAndroid Build Coastguard Worker
289*d5c09012SAndroid Build Coastguard Worker        // Whether the text is strikethrough. This feature is not supported yet.
290*d5c09012SAndroid Build Coastguard Worker        bool strikeout = 8;
291*d5c09012SAndroid Build Coastguard Worker
292*d5c09012SAndroid Build Coastguard Worker        // Whether the text is a subscript. This feature is not supported yet.
293*d5c09012SAndroid Build Coastguard Worker        bool subscript = 9;
294*d5c09012SAndroid Build Coastguard Worker
295*d5c09012SAndroid Build Coastguard Worker        // Whether the text is a superscript. This feature is not supported yet.
296*d5c09012SAndroid Build Coastguard Worker        bool superscript = 10;
297*d5c09012SAndroid Build Coastguard Worker
298*d5c09012SAndroid Build Coastguard Worker        // Whether the text is in small caps. This feature is not supported yet.
299*d5c09012SAndroid Build Coastguard Worker        bool smallcaps = 11;
300*d5c09012SAndroid Build Coastguard Worker
301*d5c09012SAndroid Build Coastguard Worker        // TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
302*d5c09012SAndroid Build Coastguard Worker        // Normal is `400`, bold is `700`.
303*d5c09012SAndroid Build Coastguard Worker        int32 font_weight = 12;
304*d5c09012SAndroid Build Coastguard Worker
305*d5c09012SAndroid Build Coastguard Worker        // Whether the text is handwritten.
306*d5c09012SAndroid Build Coastguard Worker        bool handwritten = 13;
307*d5c09012SAndroid Build Coastguard Worker
308*d5c09012SAndroid Build Coastguard Worker        // Color of the text.
309*d5c09012SAndroid Build Coastguard Worker        google.type.Color text_color = 14;
310*d5c09012SAndroid Build Coastguard Worker
311*d5c09012SAndroid Build Coastguard Worker        // Color of the background.
312*d5c09012SAndroid Build Coastguard Worker        google.type.Color background_color = 15;
313*d5c09012SAndroid Build Coastguard Worker      }
314*d5c09012SAndroid Build Coastguard Worker
315*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
316*d5c09012SAndroid Build Coastguard Worker      // [Token][google.cloud.documentai.v1beta3.Document.Page.Token].
317*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
318*d5c09012SAndroid Build Coastguard Worker
319*d5c09012SAndroid Build Coastguard Worker      // Detected break at the end of a
320*d5c09012SAndroid Build Coastguard Worker      // [Token][google.cloud.documentai.v1beta3.Document.Page.Token].
321*d5c09012SAndroid Build Coastguard Worker      DetectedBreak detected_break = 2;
322*d5c09012SAndroid Build Coastguard Worker
323*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
324*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 3;
325*d5c09012SAndroid Build Coastguard Worker
326*d5c09012SAndroid Build Coastguard Worker      // The history of this annotation.
327*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 4 [deprecated = true];
328*d5c09012SAndroid Build Coastguard Worker
329*d5c09012SAndroid Build Coastguard Worker      // Text style attributes.
330*d5c09012SAndroid Build Coastguard Worker      StyleInfo style_info = 5;
331*d5c09012SAndroid Build Coastguard Worker    }
332*d5c09012SAndroid Build Coastguard Worker
333*d5c09012SAndroid Build Coastguard Worker    // A detected symbol.
334*d5c09012SAndroid Build Coastguard Worker    message Symbol {
335*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
336*d5c09012SAndroid Build Coastguard Worker      // [Symbol][google.cloud.documentai.v1beta3.Document.Page.Symbol].
337*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
338*d5c09012SAndroid Build Coastguard Worker
339*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
340*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 2;
341*d5c09012SAndroid Build Coastguard Worker    }
342*d5c09012SAndroid Build Coastguard Worker
343*d5c09012SAndroid Build Coastguard Worker    // Detected non-text visual elements e.g. checkbox, signature etc. on the
344*d5c09012SAndroid Build Coastguard Worker    // page.
345*d5c09012SAndroid Build Coastguard Worker    message VisualElement {
346*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
347*d5c09012SAndroid Build Coastguard Worker      // [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement].
348*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
349*d5c09012SAndroid Build Coastguard Worker
350*d5c09012SAndroid Build Coastguard Worker      // Type of the
351*d5c09012SAndroid Build Coastguard Worker      // [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement].
352*d5c09012SAndroid Build Coastguard Worker      string type = 2;
353*d5c09012SAndroid Build Coastguard Worker
354*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
355*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 3;
356*d5c09012SAndroid Build Coastguard Worker    }
357*d5c09012SAndroid Build Coastguard Worker
358*d5c09012SAndroid Build Coastguard Worker    // A table representation similar to HTML table structure.
359*d5c09012SAndroid Build Coastguard Worker    message Table {
360*d5c09012SAndroid Build Coastguard Worker      // A row of table cells.
361*d5c09012SAndroid Build Coastguard Worker      message TableRow {
362*d5c09012SAndroid Build Coastguard Worker        // Cells that make up this row.
363*d5c09012SAndroid Build Coastguard Worker        repeated TableCell cells = 1;
364*d5c09012SAndroid Build Coastguard Worker      }
365*d5c09012SAndroid Build Coastguard Worker
366*d5c09012SAndroid Build Coastguard Worker      // A cell representation inside the table.
367*d5c09012SAndroid Build Coastguard Worker      message TableCell {
368*d5c09012SAndroid Build Coastguard Worker        // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
369*d5c09012SAndroid Build Coastguard Worker        // [TableCell][google.cloud.documentai.v1beta3.Document.Page.Table.TableCell].
370*d5c09012SAndroid Build Coastguard Worker        Layout layout = 1;
371*d5c09012SAndroid Build Coastguard Worker
372*d5c09012SAndroid Build Coastguard Worker        // How many rows this cell spans.
373*d5c09012SAndroid Build Coastguard Worker        int32 row_span = 2;
374*d5c09012SAndroid Build Coastguard Worker
375*d5c09012SAndroid Build Coastguard Worker        // How many columns this cell spans.
376*d5c09012SAndroid Build Coastguard Worker        int32 col_span = 3;
377*d5c09012SAndroid Build Coastguard Worker
378*d5c09012SAndroid Build Coastguard Worker        // A list of detected languages together with confidence.
379*d5c09012SAndroid Build Coastguard Worker        repeated DetectedLanguage detected_languages = 4;
380*d5c09012SAndroid Build Coastguard Worker      }
381*d5c09012SAndroid Build Coastguard Worker
382*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
383*d5c09012SAndroid Build Coastguard Worker      // [Table][google.cloud.documentai.v1beta3.Document.Page.Table].
384*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
385*d5c09012SAndroid Build Coastguard Worker
386*d5c09012SAndroid Build Coastguard Worker      // Header rows of the table.
387*d5c09012SAndroid Build Coastguard Worker      repeated TableRow header_rows = 2;
388*d5c09012SAndroid Build Coastguard Worker
389*d5c09012SAndroid Build Coastguard Worker      // Body rows of the table.
390*d5c09012SAndroid Build Coastguard Worker      repeated TableRow body_rows = 3;
391*d5c09012SAndroid Build Coastguard Worker
392*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
393*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 4;
394*d5c09012SAndroid Build Coastguard Worker
395*d5c09012SAndroid Build Coastguard Worker      // The history of this table.
396*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 5 [deprecated = true];
397*d5c09012SAndroid Build Coastguard Worker    }
398*d5c09012SAndroid Build Coastguard Worker
399*d5c09012SAndroid Build Coastguard Worker    // A form field detected on the page.
400*d5c09012SAndroid Build Coastguard Worker    message FormField {
401*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the
402*d5c09012SAndroid Build Coastguard Worker      // [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField]
403*d5c09012SAndroid Build Coastguard Worker      // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
404*d5c09012SAndroid Build Coastguard Worker      Layout field_name = 1;
405*d5c09012SAndroid Build Coastguard Worker
406*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the
407*d5c09012SAndroid Build Coastguard Worker      // [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField]
408*d5c09012SAndroid Build Coastguard Worker      // value.
409*d5c09012SAndroid Build Coastguard Worker      Layout field_value = 2;
410*d5c09012SAndroid Build Coastguard Worker
411*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages for name together with confidence.
412*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage name_detected_languages = 3;
413*d5c09012SAndroid Build Coastguard Worker
414*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages for value together with confidence.
415*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage value_detected_languages = 4;
416*d5c09012SAndroid Build Coastguard Worker
417*d5c09012SAndroid Build Coastguard Worker      // If the value is non-textual, this field represents the type. Current
418*d5c09012SAndroid Build Coastguard Worker      // valid values are:
419*d5c09012SAndroid Build Coastguard Worker      //
420*d5c09012SAndroid Build Coastguard Worker      // - blank (this indicates the `field_value` is normal text)
421*d5c09012SAndroid Build Coastguard Worker      // - `unfilled_checkbox`
422*d5c09012SAndroid Build Coastguard Worker      // - `filled_checkbox`
423*d5c09012SAndroid Build Coastguard Worker      string value_type = 5;
424*d5c09012SAndroid Build Coastguard Worker
425*d5c09012SAndroid Build Coastguard Worker      // Created for Labeling UI to export key text.
426*d5c09012SAndroid Build Coastguard Worker      // If corrections were made to the text identified by the
427*d5c09012SAndroid Build Coastguard Worker      // `field_name.text_anchor`, this field will contain the correction.
428*d5c09012SAndroid Build Coastguard Worker      string corrected_key_text = 6;
429*d5c09012SAndroid Build Coastguard Worker
430*d5c09012SAndroid Build Coastguard Worker      // Created for Labeling UI to export value text.
431*d5c09012SAndroid Build Coastguard Worker      // If corrections were made to the text identified by the
432*d5c09012SAndroid Build Coastguard Worker      // `field_value.text_anchor`, this field will contain the correction.
433*d5c09012SAndroid Build Coastguard Worker      string corrected_value_text = 7;
434*d5c09012SAndroid Build Coastguard Worker
435*d5c09012SAndroid Build Coastguard Worker      // The history of this annotation.
436*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 8;
437*d5c09012SAndroid Build Coastguard Worker    }
438*d5c09012SAndroid Build Coastguard Worker
439*d5c09012SAndroid Build Coastguard Worker    // A detected barcode.
440*d5c09012SAndroid Build Coastguard Worker    message DetectedBarcode {
441*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
442*d5c09012SAndroid Build Coastguard Worker      // [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode].
443*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
444*d5c09012SAndroid Build Coastguard Worker
445*d5c09012SAndroid Build Coastguard Worker      // Detailed barcode information of the
446*d5c09012SAndroid Build Coastguard Worker      // [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode].
447*d5c09012SAndroid Build Coastguard Worker      Barcode barcode = 2;
448*d5c09012SAndroid Build Coastguard Worker    }
449*d5c09012SAndroid Build Coastguard Worker
450*d5c09012SAndroid Build Coastguard Worker    // Detected language for a structural component.
451*d5c09012SAndroid Build Coastguard Worker    message DetectedLanguage {
452*d5c09012SAndroid Build Coastguard Worker      // The [BCP-47 language
453*d5c09012SAndroid Build Coastguard Worker      // code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier),
454*d5c09012SAndroid Build Coastguard Worker      // such as `en-US` or `sr-Latn`.
455*d5c09012SAndroid Build Coastguard Worker      string language_code = 1;
456*d5c09012SAndroid Build Coastguard Worker
457*d5c09012SAndroid Build Coastguard Worker      // Confidence of detected language. Range `[0, 1]`.
458*d5c09012SAndroid Build Coastguard Worker      float confidence = 2;
459*d5c09012SAndroid Build Coastguard Worker    }
460*d5c09012SAndroid Build Coastguard Worker
461*d5c09012SAndroid Build Coastguard Worker    // Image quality scores for the page image.
462*d5c09012SAndroid Build Coastguard Worker    message ImageQualityScores {
463*d5c09012SAndroid Build Coastguard Worker      // Image Quality Defects
464*d5c09012SAndroid Build Coastguard Worker      message DetectedDefect {
465*d5c09012SAndroid Build Coastguard Worker        // Name of the defect type. Supported values are:
466*d5c09012SAndroid Build Coastguard Worker        //
467*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_blurry`
468*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_noisy`
469*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_dark`
470*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_faint`
471*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_text_too_small`
472*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_document_cutoff`
473*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_text_cutoff`
474*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_glare`
475*d5c09012SAndroid Build Coastguard Worker        string type = 1;
476*d5c09012SAndroid Build Coastguard Worker
477*d5c09012SAndroid Build Coastguard Worker        // Confidence of detected defect. Range `[0, 1]` where `1` indicates
478*d5c09012SAndroid Build Coastguard Worker        // strong confidence that the defect exists.
479*d5c09012SAndroid Build Coastguard Worker        float confidence = 2;
480*d5c09012SAndroid Build Coastguard Worker      }
481*d5c09012SAndroid Build Coastguard Worker
482*d5c09012SAndroid Build Coastguard Worker      // The overall quality score. Range `[0, 1]` where `1` is perfect quality.
483*d5c09012SAndroid Build Coastguard Worker      float quality_score = 1;
484*d5c09012SAndroid Build Coastguard Worker
485*d5c09012SAndroid Build Coastguard Worker      // A list of detected defects.
486*d5c09012SAndroid Build Coastguard Worker      repeated DetectedDefect detected_defects = 2;
487*d5c09012SAndroid Build Coastguard Worker    }
488*d5c09012SAndroid Build Coastguard Worker
489*d5c09012SAndroid Build Coastguard Worker    // 1-based index for current
490*d5c09012SAndroid Build Coastguard Worker    // [Page][google.cloud.documentai.v1beta3.Document.Page] in a parent
491*d5c09012SAndroid Build Coastguard Worker    // [Document][google.cloud.documentai.v1beta3.Document]. Useful when a page
492*d5c09012SAndroid Build Coastguard Worker    // is taken out of a [Document][google.cloud.documentai.v1beta3.Document]
493*d5c09012SAndroid Build Coastguard Worker    // for individual processing.
494*d5c09012SAndroid Build Coastguard Worker    int32 page_number = 1;
495*d5c09012SAndroid Build Coastguard Worker
496*d5c09012SAndroid Build Coastguard Worker    // Rendered image for this page. This image is preprocessed to remove any
497*d5c09012SAndroid Build Coastguard Worker    // skew, rotation, and distortions such that the annotation bounding boxes
498*d5c09012SAndroid Build Coastguard Worker    // can be upright and axis-aligned.
499*d5c09012SAndroid Build Coastguard Worker    Image image = 13;
500*d5c09012SAndroid Build Coastguard Worker
501*d5c09012SAndroid Build Coastguard Worker    // Transformation matrices that were applied to the original document image
502*d5c09012SAndroid Build Coastguard Worker    // to produce
503*d5c09012SAndroid Build Coastguard Worker    // [Page.image][google.cloud.documentai.v1beta3.Document.Page.image].
504*d5c09012SAndroid Build Coastguard Worker    repeated Matrix transforms = 14;
505*d5c09012SAndroid Build Coastguard Worker
506*d5c09012SAndroid Build Coastguard Worker    // Physical dimension of the page.
507*d5c09012SAndroid Build Coastguard Worker    Dimension dimension = 2;
508*d5c09012SAndroid Build Coastguard Worker
509*d5c09012SAndroid Build Coastguard Worker    // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the
510*d5c09012SAndroid Build Coastguard Worker    // page.
511*d5c09012SAndroid Build Coastguard Worker    Layout layout = 3;
512*d5c09012SAndroid Build Coastguard Worker
513*d5c09012SAndroid Build Coastguard Worker    // A list of detected languages together with confidence.
514*d5c09012SAndroid Build Coastguard Worker    repeated DetectedLanguage detected_languages = 4;
515*d5c09012SAndroid Build Coastguard Worker
516*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected text blocks on the page.
517*d5c09012SAndroid Build Coastguard Worker    // A block has a set of lines (collected into paragraphs) that have a common
518*d5c09012SAndroid Build Coastguard Worker    // line-spacing and orientation.
519*d5c09012SAndroid Build Coastguard Worker    repeated Block blocks = 5;
520*d5c09012SAndroid Build Coastguard Worker
521*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected text paragraphs on the page.
522*d5c09012SAndroid Build Coastguard Worker    // A collection of lines that a human would perceive as a paragraph.
523*d5c09012SAndroid Build Coastguard Worker    repeated Paragraph paragraphs = 6;
524*d5c09012SAndroid Build Coastguard Worker
525*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected text lines on the page.
526*d5c09012SAndroid Build Coastguard Worker    // A collection of tokens that a human would perceive as a line.
527*d5c09012SAndroid Build Coastguard Worker    repeated Line lines = 7;
528*d5c09012SAndroid Build Coastguard Worker
529*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected tokens on the page.
530*d5c09012SAndroid Build Coastguard Worker    repeated Token tokens = 8;
531*d5c09012SAndroid Build Coastguard Worker
532*d5c09012SAndroid Build Coastguard Worker    // A list of detected non-text visual elements e.g. checkbox,
533*d5c09012SAndroid Build Coastguard Worker    // signature etc. on the page.
534*d5c09012SAndroid Build Coastguard Worker    repeated VisualElement visual_elements = 9;
535*d5c09012SAndroid Build Coastguard Worker
536*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected tables on the page.
537*d5c09012SAndroid Build Coastguard Worker    repeated Table tables = 10;
538*d5c09012SAndroid Build Coastguard Worker
539*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected form fields on the page.
540*d5c09012SAndroid Build Coastguard Worker    repeated FormField form_fields = 11;
541*d5c09012SAndroid Build Coastguard Worker
542*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected symbols on the page.
543*d5c09012SAndroid Build Coastguard Worker    repeated Symbol symbols = 12;
544*d5c09012SAndroid Build Coastguard Worker
545*d5c09012SAndroid Build Coastguard Worker    // A list of detected barcodes.
546*d5c09012SAndroid Build Coastguard Worker    repeated DetectedBarcode detected_barcodes = 15;
547*d5c09012SAndroid Build Coastguard Worker
548*d5c09012SAndroid Build Coastguard Worker    // Image quality scores.
549*d5c09012SAndroid Build Coastguard Worker    ImageQualityScores image_quality_scores = 17;
550*d5c09012SAndroid Build Coastguard Worker
551*d5c09012SAndroid Build Coastguard Worker    // The history of this page.
552*d5c09012SAndroid Build Coastguard Worker    Provenance provenance = 16 [deprecated = true];
553*d5c09012SAndroid Build Coastguard Worker  }
554*d5c09012SAndroid Build Coastguard Worker
555*d5c09012SAndroid Build Coastguard Worker  // An entity that could be a phrase in the text or a property that belongs to
556*d5c09012SAndroid Build Coastguard Worker  // the document. It is a known entity type, such as a person, an organization,
557*d5c09012SAndroid Build Coastguard Worker  // or location.
558*d5c09012SAndroid Build Coastguard Worker  message Entity {
559*d5c09012SAndroid Build Coastguard Worker    // Parsed and normalized entity value.
560*d5c09012SAndroid Build Coastguard Worker    message NormalizedValue {
561*d5c09012SAndroid Build Coastguard Worker      // An optional structured entity value.
562*d5c09012SAndroid Build Coastguard Worker      // Must match entity type defined in schema if
563*d5c09012SAndroid Build Coastguard Worker      // known. If this field is present, the `text` field could also be
564*d5c09012SAndroid Build Coastguard Worker      // populated.
565*d5c09012SAndroid Build Coastguard Worker      oneof structured_value {
566*d5c09012SAndroid Build Coastguard Worker        // Money value. See also:
567*d5c09012SAndroid Build Coastguard Worker        // https://github.com/googleapis/googleapis/blob/master/google/type/money.proto
568*d5c09012SAndroid Build Coastguard Worker        google.type.Money money_value = 2;
569*d5c09012SAndroid Build Coastguard Worker
570*d5c09012SAndroid Build Coastguard Worker        // Date value. Includes year, month, day. See also:
571*d5c09012SAndroid Build Coastguard Worker        // https://github.com/googleapis/googleapis/blob/master/google/type/date.proto
572*d5c09012SAndroid Build Coastguard Worker        google.type.Date date_value = 3;
573*d5c09012SAndroid Build Coastguard Worker
574*d5c09012SAndroid Build Coastguard Worker        // DateTime value. Includes date, time, and timezone. See also:
575*d5c09012SAndroid Build Coastguard Worker        // https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto
576*d5c09012SAndroid Build Coastguard Worker        google.type.DateTime datetime_value = 4;
577*d5c09012SAndroid Build Coastguard Worker
578*d5c09012SAndroid Build Coastguard Worker        // Postal address. See also:
579*d5c09012SAndroid Build Coastguard Worker        // https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto
580*d5c09012SAndroid Build Coastguard Worker        google.type.PostalAddress address_value = 5;
581*d5c09012SAndroid Build Coastguard Worker
582*d5c09012SAndroid Build Coastguard Worker        // Boolean value. Can be used for entities with binary values, or for
583*d5c09012SAndroid Build Coastguard Worker        // checkboxes.
584*d5c09012SAndroid Build Coastguard Worker        bool boolean_value = 6;
585*d5c09012SAndroid Build Coastguard Worker
586*d5c09012SAndroid Build Coastguard Worker        // Integer value.
587*d5c09012SAndroid Build Coastguard Worker        int32 integer_value = 7;
588*d5c09012SAndroid Build Coastguard Worker
589*d5c09012SAndroid Build Coastguard Worker        // Float value.
590*d5c09012SAndroid Build Coastguard Worker        float float_value = 8;
591*d5c09012SAndroid Build Coastguard Worker      }
592*d5c09012SAndroid Build Coastguard Worker
593*d5c09012SAndroid Build Coastguard Worker      // Optional. An optional field to store a normalized string.
594*d5c09012SAndroid Build Coastguard Worker      // For some entity types, one of respective `structured_value` fields may
595*d5c09012SAndroid Build Coastguard Worker      // also be populated. Also not all the types of `structured_value` will be
596*d5c09012SAndroid Build Coastguard Worker      // normalized. For example, some processors may not generate `float`
597*d5c09012SAndroid Build Coastguard Worker      // or `integer` normalized text by default.
598*d5c09012SAndroid Build Coastguard Worker      //
599*d5c09012SAndroid Build Coastguard Worker      // Below are sample formats mapped to structured values.
600*d5c09012SAndroid Build Coastguard Worker      //
601*d5c09012SAndroid Build Coastguard Worker      // - Money/Currency type (`money_value`) is in the ISO 4217 text format.
602*d5c09012SAndroid Build Coastguard Worker      // - Date type (`date_value`) is in the ISO 8601 text format.
603*d5c09012SAndroid Build Coastguard Worker      // - Datetime type (`datetime_value`) is in the ISO 8601 text format.
604*d5c09012SAndroid Build Coastguard Worker      string text = 1 [(google.api.field_behavior) = OPTIONAL];
605*d5c09012SAndroid Build Coastguard Worker    }
606*d5c09012SAndroid Build Coastguard Worker
607*d5c09012SAndroid Build Coastguard Worker    // Optional. Provenance of the entity.
608*d5c09012SAndroid Build Coastguard Worker    // Text anchor indexing into the
609*d5c09012SAndroid Build Coastguard Worker    // [Document.text][google.cloud.documentai.v1beta3.Document.text].
610*d5c09012SAndroid Build Coastguard Worker    TextAnchor text_anchor = 1 [(google.api.field_behavior) = OPTIONAL];
611*d5c09012SAndroid Build Coastguard Worker
612*d5c09012SAndroid Build Coastguard Worker    // Required. Entity type from a schema e.g. `Address`.
613*d5c09012SAndroid Build Coastguard Worker    string type = 2 [(google.api.field_behavior) = REQUIRED];
614*d5c09012SAndroid Build Coastguard Worker
615*d5c09012SAndroid Build Coastguard Worker    // Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`.
616*d5c09012SAndroid Build Coastguard Worker    string mention_text = 3 [(google.api.field_behavior) = OPTIONAL];
617*d5c09012SAndroid Build Coastguard Worker
618*d5c09012SAndroid Build Coastguard Worker    // Optional. Deprecated.  Use `id` field instead.
619*d5c09012SAndroid Build Coastguard Worker    string mention_id = 4 [(google.api.field_behavior) = OPTIONAL];
620*d5c09012SAndroid Build Coastguard Worker
621*d5c09012SAndroid Build Coastguard Worker    // Optional. Confidence of detected Schema entity. Range `[0, 1]`.
622*d5c09012SAndroid Build Coastguard Worker    float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
623*d5c09012SAndroid Build Coastguard Worker
624*d5c09012SAndroid Build Coastguard Worker    // Optional. Represents the provenance of this entity wrt. the location on
625*d5c09012SAndroid Build Coastguard Worker    // the page where it was found.
626*d5c09012SAndroid Build Coastguard Worker    PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL];
627*d5c09012SAndroid Build Coastguard Worker
628*d5c09012SAndroid Build Coastguard Worker    // Optional. Canonical id. This will be a unique value in the entity list
629*d5c09012SAndroid Build Coastguard Worker    // for this document.
630*d5c09012SAndroid Build Coastguard Worker    string id = 7 [(google.api.field_behavior) = OPTIONAL];
631*d5c09012SAndroid Build Coastguard Worker
632*d5c09012SAndroid Build Coastguard Worker    // Optional. Normalized entity value. Absent if the extracted value could
633*d5c09012SAndroid Build Coastguard Worker    // not be converted or the type (e.g. address) is not supported for certain
634*d5c09012SAndroid Build Coastguard Worker    // parsers. This field is also only populated for certain supported document
635*d5c09012SAndroid Build Coastguard Worker    // types.
636*d5c09012SAndroid Build Coastguard Worker    NormalizedValue normalized_value = 9
637*d5c09012SAndroid Build Coastguard Worker        [(google.api.field_behavior) = OPTIONAL];
638*d5c09012SAndroid Build Coastguard Worker
639*d5c09012SAndroid Build Coastguard Worker    // Optional. Entities can be nested to form a hierarchical data structure
640*d5c09012SAndroid Build Coastguard Worker    // representing the content in the document.
641*d5c09012SAndroid Build Coastguard Worker    repeated Entity properties = 10 [(google.api.field_behavior) = OPTIONAL];
642*d5c09012SAndroid Build Coastguard Worker
643*d5c09012SAndroid Build Coastguard Worker    // Optional. The history of this annotation.
644*d5c09012SAndroid Build Coastguard Worker    Provenance provenance = 11 [(google.api.field_behavior) = OPTIONAL];
645*d5c09012SAndroid Build Coastguard Worker
646*d5c09012SAndroid Build Coastguard Worker    // Optional. Whether the entity will be redacted for de-identification
647*d5c09012SAndroid Build Coastguard Worker    // purposes.
648*d5c09012SAndroid Build Coastguard Worker    bool redacted = 12 [(google.api.field_behavior) = OPTIONAL];
649*d5c09012SAndroid Build Coastguard Worker  }
650*d5c09012SAndroid Build Coastguard Worker
651*d5c09012SAndroid Build Coastguard Worker  // Relationship between
652*d5c09012SAndroid Build Coastguard Worker  // [Entities][google.cloud.documentai.v1beta3.Document.Entity].
653*d5c09012SAndroid Build Coastguard Worker  message EntityRelation {
654*d5c09012SAndroid Build Coastguard Worker    // Subject entity id.
655*d5c09012SAndroid Build Coastguard Worker    string subject_id = 1;
656*d5c09012SAndroid Build Coastguard Worker
657*d5c09012SAndroid Build Coastguard Worker    // Object entity id.
658*d5c09012SAndroid Build Coastguard Worker    string object_id = 2;
659*d5c09012SAndroid Build Coastguard Worker
660*d5c09012SAndroid Build Coastguard Worker    // Relationship description.
661*d5c09012SAndroid Build Coastguard Worker    string relation = 3;
662*d5c09012SAndroid Build Coastguard Worker  }
663*d5c09012SAndroid Build Coastguard Worker
664*d5c09012SAndroid Build Coastguard Worker  // Text reference indexing into the
665*d5c09012SAndroid Build Coastguard Worker  // [Document.text][google.cloud.documentai.v1beta3.Document.text].
666*d5c09012SAndroid Build Coastguard Worker  message TextAnchor {
667*d5c09012SAndroid Build Coastguard Worker    // A text segment in the
668*d5c09012SAndroid Build Coastguard Worker    // [Document.text][google.cloud.documentai.v1beta3.Document.text]. The
669*d5c09012SAndroid Build Coastguard Worker    // indices may be out of bounds which indicate that the text extends into
670*d5c09012SAndroid Build Coastguard Worker    // another document shard for large sharded documents. See
671*d5c09012SAndroid Build Coastguard Worker    // [ShardInfo.text_offset][google.cloud.documentai.v1beta3.Document.ShardInfo.text_offset]
672*d5c09012SAndroid Build Coastguard Worker    message TextSegment {
673*d5c09012SAndroid Build Coastguard Worker      // [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment]
674*d5c09012SAndroid Build Coastguard Worker      // start UTF-8 char index in the
675*d5c09012SAndroid Build Coastguard Worker      // [Document.text][google.cloud.documentai.v1beta3.Document.text].
676*d5c09012SAndroid Build Coastguard Worker      int64 start_index = 1;
677*d5c09012SAndroid Build Coastguard Worker
678*d5c09012SAndroid Build Coastguard Worker      // [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment]
679*d5c09012SAndroid Build Coastguard Worker      // half open end UTF-8 char index in the
680*d5c09012SAndroid Build Coastguard Worker      // [Document.text][google.cloud.documentai.v1beta3.Document.text].
681*d5c09012SAndroid Build Coastguard Worker      int64 end_index = 2;
682*d5c09012SAndroid Build Coastguard Worker    }
683*d5c09012SAndroid Build Coastguard Worker
684*d5c09012SAndroid Build Coastguard Worker    // The text segments from the
685*d5c09012SAndroid Build Coastguard Worker    // [Document.text][google.cloud.documentai.v1beta3.Document.text].
686*d5c09012SAndroid Build Coastguard Worker    repeated TextSegment text_segments = 1;
687*d5c09012SAndroid Build Coastguard Worker
688*d5c09012SAndroid Build Coastguard Worker    // Contains the content of the text span so that users do
689*d5c09012SAndroid Build Coastguard Worker    // not have to look it up in the text_segments.  It is always
690*d5c09012SAndroid Build Coastguard Worker    // populated for formFields.
691*d5c09012SAndroid Build Coastguard Worker    string content = 2;
692*d5c09012SAndroid Build Coastguard Worker  }
693*d5c09012SAndroid Build Coastguard Worker
694*d5c09012SAndroid Build Coastguard Worker  // Referencing the visual context of the entity in the
695*d5c09012SAndroid Build Coastguard Worker  // [Document.pages][google.cloud.documentai.v1beta3.Document.pages]. Page
696*d5c09012SAndroid Build Coastguard Worker  // anchors can be cross-page, consist of multiple bounding polygons and
697*d5c09012SAndroid Build Coastguard Worker  // optionally reference specific layout element types.
698*d5c09012SAndroid Build Coastguard Worker  message PageAnchor {
699*d5c09012SAndroid Build Coastguard Worker    // Represents a weak reference to a page element within a document.
700*d5c09012SAndroid Build Coastguard Worker    message PageRef {
701*d5c09012SAndroid Build Coastguard Worker      // The type of layout that is being referenced.
702*d5c09012SAndroid Build Coastguard Worker      enum LayoutType {
703*d5c09012SAndroid Build Coastguard Worker        // Layout Unspecified.
704*d5c09012SAndroid Build Coastguard Worker        LAYOUT_TYPE_UNSPECIFIED = 0;
705*d5c09012SAndroid Build Coastguard Worker
706*d5c09012SAndroid Build Coastguard Worker        // References a
707*d5c09012SAndroid Build Coastguard Worker        // [Page.blocks][google.cloud.documentai.v1beta3.Document.Page.blocks]
708*d5c09012SAndroid Build Coastguard Worker        // element.
709*d5c09012SAndroid Build Coastguard Worker        BLOCK = 1;
710*d5c09012SAndroid Build Coastguard Worker
711*d5c09012SAndroid Build Coastguard Worker        // References a
712*d5c09012SAndroid Build Coastguard Worker        // [Page.paragraphs][google.cloud.documentai.v1beta3.Document.Page.paragraphs]
713*d5c09012SAndroid Build Coastguard Worker        // element.
714*d5c09012SAndroid Build Coastguard Worker        PARAGRAPH = 2;
715*d5c09012SAndroid Build Coastguard Worker
716*d5c09012SAndroid Build Coastguard Worker        // References a
717*d5c09012SAndroid Build Coastguard Worker        // [Page.lines][google.cloud.documentai.v1beta3.Document.Page.lines]
718*d5c09012SAndroid Build Coastguard Worker        // element.
719*d5c09012SAndroid Build Coastguard Worker        LINE = 3;
720*d5c09012SAndroid Build Coastguard Worker
721*d5c09012SAndroid Build Coastguard Worker        // References a
722*d5c09012SAndroid Build Coastguard Worker        // [Page.tokens][google.cloud.documentai.v1beta3.Document.Page.tokens]
723*d5c09012SAndroid Build Coastguard Worker        // element.
724*d5c09012SAndroid Build Coastguard Worker        TOKEN = 4;
725*d5c09012SAndroid Build Coastguard Worker
726*d5c09012SAndroid Build Coastguard Worker        // References a
727*d5c09012SAndroid Build Coastguard Worker        // [Page.visual_elements][google.cloud.documentai.v1beta3.Document.Page.visual_elements]
728*d5c09012SAndroid Build Coastguard Worker        // element.
729*d5c09012SAndroid Build Coastguard Worker        VISUAL_ELEMENT = 5;
730*d5c09012SAndroid Build Coastguard Worker
731*d5c09012SAndroid Build Coastguard Worker        // Refrrences a
732*d5c09012SAndroid Build Coastguard Worker        // [Page.tables][google.cloud.documentai.v1beta3.Document.Page.tables]
733*d5c09012SAndroid Build Coastguard Worker        // element.
734*d5c09012SAndroid Build Coastguard Worker        TABLE = 6;
735*d5c09012SAndroid Build Coastguard Worker
736*d5c09012SAndroid Build Coastguard Worker        // References a
737*d5c09012SAndroid Build Coastguard Worker        // [Page.form_fields][google.cloud.documentai.v1beta3.Document.Page.form_fields]
738*d5c09012SAndroid Build Coastguard Worker        // element.
739*d5c09012SAndroid Build Coastguard Worker        FORM_FIELD = 7;
740*d5c09012SAndroid Build Coastguard Worker      }
741*d5c09012SAndroid Build Coastguard Worker
742*d5c09012SAndroid Build Coastguard Worker      // Required. Index into the
743*d5c09012SAndroid Build Coastguard Worker      // [Document.pages][google.cloud.documentai.v1beta3.Document.pages]
744*d5c09012SAndroid Build Coastguard Worker      // element, for example using
745*d5c09012SAndroid Build Coastguard Worker      // `[Document.pages][page_refs.page]` to locate the related page element.
746*d5c09012SAndroid Build Coastguard Worker      // This field is skipped when its value is the default `0`. See
747*d5c09012SAndroid Build Coastguard Worker      // https://developers.google.com/protocol-buffers/docs/proto3#json.
748*d5c09012SAndroid Build Coastguard Worker      int64 page = 1 [(google.api.field_behavior) = REQUIRED];
749*d5c09012SAndroid Build Coastguard Worker
750*d5c09012SAndroid Build Coastguard Worker      // Optional. The type of the layout element that is being referenced if
751*d5c09012SAndroid Build Coastguard Worker      // any.
752*d5c09012SAndroid Build Coastguard Worker      LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL];
753*d5c09012SAndroid Build Coastguard Worker
754*d5c09012SAndroid Build Coastguard Worker      // Optional. Deprecated.  Use
755*d5c09012SAndroid Build Coastguard Worker      // [PageRef.bounding_poly][google.cloud.documentai.v1beta3.Document.PageAnchor.PageRef.bounding_poly]
756*d5c09012SAndroid Build Coastguard Worker      // instead.
757*d5c09012SAndroid Build Coastguard Worker      string layout_id = 3
758*d5c09012SAndroid Build Coastguard Worker          [deprecated = true, (google.api.field_behavior) = OPTIONAL];
759*d5c09012SAndroid Build Coastguard Worker
760*d5c09012SAndroid Build Coastguard Worker      // Optional. Identifies the bounding polygon of a layout element on the
761*d5c09012SAndroid Build Coastguard Worker      // page. If `layout_type` is set, the bounding polygon must be exactly the
762*d5c09012SAndroid Build Coastguard Worker      // same to the layout element it's referring to.
763*d5c09012SAndroid Build Coastguard Worker      BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL];
764*d5c09012SAndroid Build Coastguard Worker
765*d5c09012SAndroid Build Coastguard Worker      // Optional. Confidence of detected page element, if applicable. Range
766*d5c09012SAndroid Build Coastguard Worker      // `[0, 1]`.
767*d5c09012SAndroid Build Coastguard Worker      float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
768*d5c09012SAndroid Build Coastguard Worker    }
769*d5c09012SAndroid Build Coastguard Worker
770*d5c09012SAndroid Build Coastguard Worker    // One or more references to visual page elements
771*d5c09012SAndroid Build Coastguard Worker    repeated PageRef page_refs = 1;
772*d5c09012SAndroid Build Coastguard Worker  }
773*d5c09012SAndroid Build Coastguard Worker
774*d5c09012SAndroid Build Coastguard Worker  // Structure to identify provenance relationships between annotations in
775*d5c09012SAndroid Build Coastguard Worker  // different revisions.
776*d5c09012SAndroid Build Coastguard Worker  message Provenance {
777*d5c09012SAndroid Build Coastguard Worker    // The parent element the current element is based on. Used for
778*d5c09012SAndroid Build Coastguard Worker    // referencing/aligning, removal and replacement operations.
779*d5c09012SAndroid Build Coastguard Worker    message Parent {
780*d5c09012SAndroid Build Coastguard Worker      // The index of the index into current revision's parent_ids list.
781*d5c09012SAndroid Build Coastguard Worker      int32 revision = 1;
782*d5c09012SAndroid Build Coastguard Worker
783*d5c09012SAndroid Build Coastguard Worker      // The index of the parent item in the corresponding item list (eg. list
784*d5c09012SAndroid Build Coastguard Worker      // of entities, properties within entities, etc.) in the parent revision.
785*d5c09012SAndroid Build Coastguard Worker      int32 index = 3;
786*d5c09012SAndroid Build Coastguard Worker
787*d5c09012SAndroid Build Coastguard Worker      // The id of the parent provenance.
788*d5c09012SAndroid Build Coastguard Worker      int32 id = 2 [deprecated = true];
789*d5c09012SAndroid Build Coastguard Worker    }
790*d5c09012SAndroid Build Coastguard Worker
791*d5c09012SAndroid Build Coastguard Worker    // If a processor or agent does an explicit operation on existing elements.
792*d5c09012SAndroid Build Coastguard Worker    enum OperationType {
793*d5c09012SAndroid Build Coastguard Worker      // Operation type unspecified. If no operation is specified a provenance
794*d5c09012SAndroid Build Coastguard Worker      // entry is simply used to match against a `parent`.
795*d5c09012SAndroid Build Coastguard Worker      OPERATION_TYPE_UNSPECIFIED = 0;
796*d5c09012SAndroid Build Coastguard Worker
797*d5c09012SAndroid Build Coastguard Worker      // Add an element.
798*d5c09012SAndroid Build Coastguard Worker      ADD = 1;
799*d5c09012SAndroid Build Coastguard Worker
800*d5c09012SAndroid Build Coastguard Worker      // Remove an element identified by `parent`.
801*d5c09012SAndroid Build Coastguard Worker      REMOVE = 2;
802*d5c09012SAndroid Build Coastguard Worker
803*d5c09012SAndroid Build Coastguard Worker      // Updates any fields within the given provenance scope of the message. It
804*d5c09012SAndroid Build Coastguard Worker      // overwrites the fields rather than replacing them.  Use this when you
805*d5c09012SAndroid Build Coastguard Worker      // want to update a field value of an entity without also updating all the
806*d5c09012SAndroid Build Coastguard Worker      // child properties.
807*d5c09012SAndroid Build Coastguard Worker      UPDATE = 7;
808*d5c09012SAndroid Build Coastguard Worker
809*d5c09012SAndroid Build Coastguard Worker      // Currently unused. Replace an element identified by `parent`.
810*d5c09012SAndroid Build Coastguard Worker      REPLACE = 3;
811*d5c09012SAndroid Build Coastguard Worker
812*d5c09012SAndroid Build Coastguard Worker      // Deprecated. Request human review for the element identified by
813*d5c09012SAndroid Build Coastguard Worker      // `parent`.
814*d5c09012SAndroid Build Coastguard Worker      EVAL_REQUESTED = 4 [deprecated = true];
815*d5c09012SAndroid Build Coastguard Worker
816*d5c09012SAndroid Build Coastguard Worker      // Deprecated. Element is reviewed and approved at human review,
817*d5c09012SAndroid Build Coastguard Worker      // confidence will be set to 1.0.
818*d5c09012SAndroid Build Coastguard Worker      EVAL_APPROVED = 5 [deprecated = true];
819*d5c09012SAndroid Build Coastguard Worker
820*d5c09012SAndroid Build Coastguard Worker      // Deprecated. Element is skipped in the validation process.
821*d5c09012SAndroid Build Coastguard Worker      EVAL_SKIPPED = 6 [deprecated = true];
822*d5c09012SAndroid Build Coastguard Worker    }
823*d5c09012SAndroid Build Coastguard Worker
824*d5c09012SAndroid Build Coastguard Worker    // The index of the revision that produced this element.
825*d5c09012SAndroid Build Coastguard Worker    int32 revision = 1 [deprecated = true];
826*d5c09012SAndroid Build Coastguard Worker
827*d5c09012SAndroid Build Coastguard Worker    // The Id of this operation.  Needs to be unique within the scope of the
828*d5c09012SAndroid Build Coastguard Worker    // revision.
829*d5c09012SAndroid Build Coastguard Worker    int32 id = 2 [deprecated = true];
830*d5c09012SAndroid Build Coastguard Worker
831*d5c09012SAndroid Build Coastguard Worker    // References to the original elements that are replaced.
832*d5c09012SAndroid Build Coastguard Worker    repeated Parent parents = 3;
833*d5c09012SAndroid Build Coastguard Worker
834*d5c09012SAndroid Build Coastguard Worker    // The type of provenance operation.
835*d5c09012SAndroid Build Coastguard Worker    OperationType type = 4;
836*d5c09012SAndroid Build Coastguard Worker  }
837*d5c09012SAndroid Build Coastguard Worker
838*d5c09012SAndroid Build Coastguard Worker  // Contains past or forward revisions of this document.
839*d5c09012SAndroid Build Coastguard Worker  message Revision {
840*d5c09012SAndroid Build Coastguard Worker    // Human Review information of the document.
841*d5c09012SAndroid Build Coastguard Worker    message HumanReview {
842*d5c09012SAndroid Build Coastguard Worker      // Human review state. e.g. `requested`, `succeeded`, `rejected`.
843*d5c09012SAndroid Build Coastguard Worker      string state = 1;
844*d5c09012SAndroid Build Coastguard Worker
845*d5c09012SAndroid Build Coastguard Worker      // A message providing more details about the current state of processing.
846*d5c09012SAndroid Build Coastguard Worker      // For example, the rejection reason when the state is `rejected`.
847*d5c09012SAndroid Build Coastguard Worker      string state_message = 2;
848*d5c09012SAndroid Build Coastguard Worker    }
849*d5c09012SAndroid Build Coastguard Worker
850*d5c09012SAndroid Build Coastguard Worker    // Who/what made the change
851*d5c09012SAndroid Build Coastguard Worker    oneof source {
852*d5c09012SAndroid Build Coastguard Worker      // If the change was made by a person specify the name or id of that
853*d5c09012SAndroid Build Coastguard Worker      // person.
854*d5c09012SAndroid Build Coastguard Worker      string agent = 4;
855*d5c09012SAndroid Build Coastguard Worker
856*d5c09012SAndroid Build Coastguard Worker      // If the annotation was made by processor identify the processor by its
857*d5c09012SAndroid Build Coastguard Worker      // resource name.
858*d5c09012SAndroid Build Coastguard Worker      string processor = 5;
859*d5c09012SAndroid Build Coastguard Worker    }
860*d5c09012SAndroid Build Coastguard Worker
861*d5c09012SAndroid Build Coastguard Worker    // Id of the revision, internally generated by doc proto storage.
862*d5c09012SAndroid Build Coastguard Worker    // Unique within the context of the document.
863*d5c09012SAndroid Build Coastguard Worker    string id = 1;
864*d5c09012SAndroid Build Coastguard Worker
865*d5c09012SAndroid Build Coastguard Worker    // The revisions that this revision is based on.  This can include one or
866*d5c09012SAndroid Build Coastguard Worker    // more parent (when documents are merged.)  This field represents the
867*d5c09012SAndroid Build Coastguard Worker    // index into the `revisions` field.
868*d5c09012SAndroid Build Coastguard Worker    repeated int32 parent = 2 [deprecated = true];
869*d5c09012SAndroid Build Coastguard Worker
870*d5c09012SAndroid Build Coastguard Worker    // The revisions that this revision is based on. Must include all the ids
871*d5c09012SAndroid Build Coastguard Worker    // that have anything to do with this revision - eg. there are
872*d5c09012SAndroid Build Coastguard Worker    // `provenance.parent.revision` fields that index into this field.
873*d5c09012SAndroid Build Coastguard Worker    repeated string parent_ids = 7;
874*d5c09012SAndroid Build Coastguard Worker
875*d5c09012SAndroid Build Coastguard Worker    // The time that the revision was created, internally generated by
876*d5c09012SAndroid Build Coastguard Worker    // doc proto storage at the time of create.
877*d5c09012SAndroid Build Coastguard Worker    google.protobuf.Timestamp create_time = 3;
878*d5c09012SAndroid Build Coastguard Worker
879*d5c09012SAndroid Build Coastguard Worker    // Human Review information of this revision.
880*d5c09012SAndroid Build Coastguard Worker    HumanReview human_review = 6;
881*d5c09012SAndroid Build Coastguard Worker  }
882*d5c09012SAndroid Build Coastguard Worker
883*d5c09012SAndroid Build Coastguard Worker  // This message is used for text changes aka. OCR corrections.
884*d5c09012SAndroid Build Coastguard Worker  message TextChange {
885*d5c09012SAndroid Build Coastguard Worker    // Provenance of the correction.
886*d5c09012SAndroid Build Coastguard Worker    // Text anchor indexing into the
887*d5c09012SAndroid Build Coastguard Worker    // [Document.text][google.cloud.documentai.v1beta3.Document.text].  There
888*d5c09012SAndroid Build Coastguard Worker    // can only be a single `TextAnchor.text_segments` element.  If the start
889*d5c09012SAndroid Build Coastguard Worker    // and end index of the text segment are the same, the text change is
890*d5c09012SAndroid Build Coastguard Worker    // inserted before that index.
891*d5c09012SAndroid Build Coastguard Worker    TextAnchor text_anchor = 1;
892*d5c09012SAndroid Build Coastguard Worker
893*d5c09012SAndroid Build Coastguard Worker    // The text that replaces the text identified in the `text_anchor`.
894*d5c09012SAndroid Build Coastguard Worker    string changed_text = 2;
895*d5c09012SAndroid Build Coastguard Worker
896*d5c09012SAndroid Build Coastguard Worker    // The history of this annotation.
897*d5c09012SAndroid Build Coastguard Worker    repeated Provenance provenance = 3 [deprecated = true];
898*d5c09012SAndroid Build Coastguard Worker  }
899*d5c09012SAndroid Build Coastguard Worker
900*d5c09012SAndroid Build Coastguard Worker  // Represents the parsed layout of a document as a collection of blocks that
901*d5c09012SAndroid Build Coastguard Worker  // the document is divided into.
902*d5c09012SAndroid Build Coastguard Worker  message DocumentLayout {
903*d5c09012SAndroid Build Coastguard Worker    // Represents a block. A block could be one of the various types (text,
904*d5c09012SAndroid Build Coastguard Worker    // table, list) supported.
905*d5c09012SAndroid Build Coastguard Worker    message DocumentLayoutBlock {
906*d5c09012SAndroid Build Coastguard Worker      // Represents where the block starts and ends in the document.
907*d5c09012SAndroid Build Coastguard Worker      message LayoutPageSpan {
908*d5c09012SAndroid Build Coastguard Worker        // Page where block starts in the document.
909*d5c09012SAndroid Build Coastguard Worker        int32 page_start = 1;
910*d5c09012SAndroid Build Coastguard Worker
911*d5c09012SAndroid Build Coastguard Worker        // Page where block ends in the document.
912*d5c09012SAndroid Build Coastguard Worker        int32 page_end = 2;
913*d5c09012SAndroid Build Coastguard Worker      }
914*d5c09012SAndroid Build Coastguard Worker
915*d5c09012SAndroid Build Coastguard Worker      // Represents a text type block.
916*d5c09012SAndroid Build Coastguard Worker      message LayoutTextBlock {
917*d5c09012SAndroid Build Coastguard Worker        // Text content stored in the block.
918*d5c09012SAndroid Build Coastguard Worker        string text = 1;
919*d5c09012SAndroid Build Coastguard Worker
920*d5c09012SAndroid Build Coastguard Worker        // Type of the text in the block. Available options are: `paragraph`,
921*d5c09012SAndroid Build Coastguard Worker        // `subtitle`, `heading-1`, `heading-2`, `heading-3`, `heading-4`,
922*d5c09012SAndroid Build Coastguard Worker        // `heading-5`, `header`, `footer`.
923*d5c09012SAndroid Build Coastguard Worker        string type = 2;
924*d5c09012SAndroid Build Coastguard Worker
925*d5c09012SAndroid Build Coastguard Worker        // A text block could further have child blocks.
926*d5c09012SAndroid Build Coastguard Worker        // Repeated blocks support further hierarchies and nested blocks.
927*d5c09012SAndroid Build Coastguard Worker        repeated DocumentLayoutBlock blocks = 3;
928*d5c09012SAndroid Build Coastguard Worker      }
929*d5c09012SAndroid Build Coastguard Worker
930*d5c09012SAndroid Build Coastguard Worker      // Represents a table type block.
931*d5c09012SAndroid Build Coastguard Worker      message LayoutTableBlock {
932*d5c09012SAndroid Build Coastguard Worker        // Header rows at the top of the table.
933*d5c09012SAndroid Build Coastguard Worker        repeated LayoutTableRow header_rows = 1;
934*d5c09012SAndroid Build Coastguard Worker
935*d5c09012SAndroid Build Coastguard Worker        // Body rows containing main table content.
936*d5c09012SAndroid Build Coastguard Worker        repeated LayoutTableRow body_rows = 2;
937*d5c09012SAndroid Build Coastguard Worker
938*d5c09012SAndroid Build Coastguard Worker        // Table caption/title.
939*d5c09012SAndroid Build Coastguard Worker        string caption = 3;
940*d5c09012SAndroid Build Coastguard Worker      }
941*d5c09012SAndroid Build Coastguard Worker
942*d5c09012SAndroid Build Coastguard Worker      // Represents a row in a table.
943*d5c09012SAndroid Build Coastguard Worker      message LayoutTableRow {
944*d5c09012SAndroid Build Coastguard Worker        // A table row is a list of table cells.
945*d5c09012SAndroid Build Coastguard Worker        repeated LayoutTableCell cells = 1;
946*d5c09012SAndroid Build Coastguard Worker      }
947*d5c09012SAndroid Build Coastguard Worker
948*d5c09012SAndroid Build Coastguard Worker      // Represents a cell in a table row.
949*d5c09012SAndroid Build Coastguard Worker      message LayoutTableCell {
950*d5c09012SAndroid Build Coastguard Worker        // A table cell is a list of blocks.
951*d5c09012SAndroid Build Coastguard Worker        // Repeated blocks support further hierarchies and nested blocks.
952*d5c09012SAndroid Build Coastguard Worker        repeated DocumentLayoutBlock blocks = 1;
953*d5c09012SAndroid Build Coastguard Worker
954*d5c09012SAndroid Build Coastguard Worker        // How many rows this cell spans.
955*d5c09012SAndroid Build Coastguard Worker        int32 row_span = 2;
956*d5c09012SAndroid Build Coastguard Worker
957*d5c09012SAndroid Build Coastguard Worker        // How many columns this cell spans.
958*d5c09012SAndroid Build Coastguard Worker        int32 col_span = 3;
959*d5c09012SAndroid Build Coastguard Worker      }
960*d5c09012SAndroid Build Coastguard Worker
961*d5c09012SAndroid Build Coastguard Worker      // Represents a list type block.
962*d5c09012SAndroid Build Coastguard Worker      message LayoutListBlock {
963*d5c09012SAndroid Build Coastguard Worker        // List entries that constitute a list block.
964*d5c09012SAndroid Build Coastguard Worker        repeated LayoutListEntry list_entries = 1;
965*d5c09012SAndroid Build Coastguard Worker
966*d5c09012SAndroid Build Coastguard Worker        // Type of the list_entries (if exist). Available options are `ordered`
967*d5c09012SAndroid Build Coastguard Worker        // and `unordered`.
968*d5c09012SAndroid Build Coastguard Worker        string type = 2;
969*d5c09012SAndroid Build Coastguard Worker      }
970*d5c09012SAndroid Build Coastguard Worker
971*d5c09012SAndroid Build Coastguard Worker      // Represents an entry in the list.
972*d5c09012SAndroid Build Coastguard Worker      message LayoutListEntry {
973*d5c09012SAndroid Build Coastguard Worker        // A list entry is a list of blocks.
974*d5c09012SAndroid Build Coastguard Worker        // Repeated blocks support further hierarchies and nested blocks.
975*d5c09012SAndroid Build Coastguard Worker        repeated DocumentLayoutBlock blocks = 1;
976*d5c09012SAndroid Build Coastguard Worker      }
977*d5c09012SAndroid Build Coastguard Worker
978*d5c09012SAndroid Build Coastguard Worker      oneof block {
979*d5c09012SAndroid Build Coastguard Worker        // Block consisting of text content.
980*d5c09012SAndroid Build Coastguard Worker        LayoutTextBlock text_block = 2;
981*d5c09012SAndroid Build Coastguard Worker
982*d5c09012SAndroid Build Coastguard Worker        // Block consisting of table content/structure.
983*d5c09012SAndroid Build Coastguard Worker        LayoutTableBlock table_block = 3;
984*d5c09012SAndroid Build Coastguard Worker
985*d5c09012SAndroid Build Coastguard Worker        // Block consisting of list content/structure.
986*d5c09012SAndroid Build Coastguard Worker        LayoutListBlock list_block = 4;
987*d5c09012SAndroid Build Coastguard Worker      }
988*d5c09012SAndroid Build Coastguard Worker
989*d5c09012SAndroid Build Coastguard Worker      // ID of the block.
990*d5c09012SAndroid Build Coastguard Worker      string block_id = 1;
991*d5c09012SAndroid Build Coastguard Worker
992*d5c09012SAndroid Build Coastguard Worker      // Page span of the block.
993*d5c09012SAndroid Build Coastguard Worker      LayoutPageSpan page_span = 5;
994*d5c09012SAndroid Build Coastguard Worker    }
995*d5c09012SAndroid Build Coastguard Worker
996*d5c09012SAndroid Build Coastguard Worker    // List of blocks in the document.
997*d5c09012SAndroid Build Coastguard Worker    repeated DocumentLayoutBlock blocks = 1;
998*d5c09012SAndroid Build Coastguard Worker  }
999*d5c09012SAndroid Build Coastguard Worker
1000*d5c09012SAndroid Build Coastguard Worker  // Represents the chunks that the document is divided into.
1001*d5c09012SAndroid Build Coastguard Worker  message ChunkedDocument {
1002*d5c09012SAndroid Build Coastguard Worker    // Represents a chunk.
1003*d5c09012SAndroid Build Coastguard Worker    message Chunk {
1004*d5c09012SAndroid Build Coastguard Worker      // Represents where the chunk starts and ends in the document.
1005*d5c09012SAndroid Build Coastguard Worker      message ChunkPageSpan {
1006*d5c09012SAndroid Build Coastguard Worker        // Page where chunk starts in the document.
1007*d5c09012SAndroid Build Coastguard Worker        int32 page_start = 1;
1008*d5c09012SAndroid Build Coastguard Worker
1009*d5c09012SAndroid Build Coastguard Worker        // Page where chunk ends in the document.
1010*d5c09012SAndroid Build Coastguard Worker        int32 page_end = 2;
1011*d5c09012SAndroid Build Coastguard Worker      }
1012*d5c09012SAndroid Build Coastguard Worker
1013*d5c09012SAndroid Build Coastguard Worker      // Represents the page header associated with the chunk.
1014*d5c09012SAndroid Build Coastguard Worker      message ChunkPageHeader {
1015*d5c09012SAndroid Build Coastguard Worker        // Header in text format.
1016*d5c09012SAndroid Build Coastguard Worker        string text = 1;
1017*d5c09012SAndroid Build Coastguard Worker
1018*d5c09012SAndroid Build Coastguard Worker        // Page span of the header.
1019*d5c09012SAndroid Build Coastguard Worker        ChunkPageSpan page_span = 2;
1020*d5c09012SAndroid Build Coastguard Worker      }
1021*d5c09012SAndroid Build Coastguard Worker
1022*d5c09012SAndroid Build Coastguard Worker      // Represents the page footer associated with the chunk.
1023*d5c09012SAndroid Build Coastguard Worker      message ChunkPageFooter {
1024*d5c09012SAndroid Build Coastguard Worker        // Footer in text format.
1025*d5c09012SAndroid Build Coastguard Worker        string text = 1;
1026*d5c09012SAndroid Build Coastguard Worker
1027*d5c09012SAndroid Build Coastguard Worker        // Page span of the footer.
1028*d5c09012SAndroid Build Coastguard Worker        ChunkPageSpan page_span = 2;
1029*d5c09012SAndroid Build Coastguard Worker      }
1030*d5c09012SAndroid Build Coastguard Worker
1031*d5c09012SAndroid Build Coastguard Worker      // ID of the chunk.
1032*d5c09012SAndroid Build Coastguard Worker      string chunk_id = 1;
1033*d5c09012SAndroid Build Coastguard Worker
1034*d5c09012SAndroid Build Coastguard Worker      // DO NOT USE.
1035*d5c09012SAndroid Build Coastguard Worker      // List of all parsed documents layout source blocks used to generate the
1036*d5c09012SAndroid Build Coastguard Worker      // chunk.
1037*d5c09012SAndroid Build Coastguard Worker      repeated string source_block_ids = 2;
1038*d5c09012SAndroid Build Coastguard Worker
1039*d5c09012SAndroid Build Coastguard Worker      // Text content of the chunk.
1040*d5c09012SAndroid Build Coastguard Worker      string content = 3;
1041*d5c09012SAndroid Build Coastguard Worker
1042*d5c09012SAndroid Build Coastguard Worker      // Page span of the chunk.
1043*d5c09012SAndroid Build Coastguard Worker      ChunkPageSpan page_span = 4;
1044*d5c09012SAndroid Build Coastguard Worker
1045*d5c09012SAndroid Build Coastguard Worker      // Page headers associated with the chunk.
1046*d5c09012SAndroid Build Coastguard Worker      repeated ChunkPageHeader page_headers = 5;
1047*d5c09012SAndroid Build Coastguard Worker
1048*d5c09012SAndroid Build Coastguard Worker      // Page footers associated with the chunk.
1049*d5c09012SAndroid Build Coastguard Worker      repeated ChunkPageFooter page_footers = 6;
1050*d5c09012SAndroid Build Coastguard Worker    }
1051*d5c09012SAndroid Build Coastguard Worker
1052*d5c09012SAndroid Build Coastguard Worker    // List of chunks.
1053*d5c09012SAndroid Build Coastguard Worker    repeated Chunk chunks = 1;
1054*d5c09012SAndroid Build Coastguard Worker  }
1055*d5c09012SAndroid Build Coastguard Worker
1056*d5c09012SAndroid Build Coastguard Worker  // Original source document from the user.
1057*d5c09012SAndroid Build Coastguard Worker  oneof source {
1058*d5c09012SAndroid Build Coastguard Worker    // Optional. Currently supports Google Cloud Storage URI of the form
1059*d5c09012SAndroid Build Coastguard Worker    // `gs://bucket_name/object_name`. Object versioning is not supported.
1060*d5c09012SAndroid Build Coastguard Worker    // For more information, refer to [Google Cloud Storage Request
1061*d5c09012SAndroid Build Coastguard Worker    // URIs](https://cloud.google.com/storage/docs/reference-uris).
1062*d5c09012SAndroid Build Coastguard Worker    string uri = 1 [(google.api.field_behavior) = OPTIONAL];
1063*d5c09012SAndroid Build Coastguard Worker
1064*d5c09012SAndroid Build Coastguard Worker    // Optional. Inline document content, represented as a stream of bytes.
1065*d5c09012SAndroid Build Coastguard Worker    // Note: As with all `bytes` fields, protobuffers use a pure binary
1066*d5c09012SAndroid Build Coastguard Worker    // representation, whereas JSON representations use base64.
1067*d5c09012SAndroid Build Coastguard Worker    bytes content = 2 [(google.api.field_behavior) = OPTIONAL];
1068*d5c09012SAndroid Build Coastguard Worker  }
1069*d5c09012SAndroid Build Coastguard Worker
1070*d5c09012SAndroid Build Coastguard Worker  // An IANA published [media type (MIME
1071*d5c09012SAndroid Build Coastguard Worker  // type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
1072*d5c09012SAndroid Build Coastguard Worker  string mime_type = 3;
1073*d5c09012SAndroid Build Coastguard Worker
1074*d5c09012SAndroid Build Coastguard Worker  // Optional. UTF-8 encoded text in reading order from the document.
1075*d5c09012SAndroid Build Coastguard Worker  string text = 4 [(google.api.field_behavior) = OPTIONAL];
1076*d5c09012SAndroid Build Coastguard Worker
1077*d5c09012SAndroid Build Coastguard Worker  // Styles for the
1078*d5c09012SAndroid Build Coastguard Worker  // [Document.text][google.cloud.documentai.v1beta3.Document.text].
1079*d5c09012SAndroid Build Coastguard Worker  repeated Style text_styles = 5 [deprecated = true];
1080*d5c09012SAndroid Build Coastguard Worker
1081*d5c09012SAndroid Build Coastguard Worker  // Visual page layout for the
1082*d5c09012SAndroid Build Coastguard Worker  // [Document][google.cloud.documentai.v1beta3.Document].
1083*d5c09012SAndroid Build Coastguard Worker  repeated Page pages = 6;
1084*d5c09012SAndroid Build Coastguard Worker
1085*d5c09012SAndroid Build Coastguard Worker  // A list of entities detected on
1086*d5c09012SAndroid Build Coastguard Worker  // [Document.text][google.cloud.documentai.v1beta3.Document.text]. For
1087*d5c09012SAndroid Build Coastguard Worker  // document shards, entities in this list may cross shard boundaries.
1088*d5c09012SAndroid Build Coastguard Worker  repeated Entity entities = 7;
1089*d5c09012SAndroid Build Coastguard Worker
1090*d5c09012SAndroid Build Coastguard Worker  // Placeholder.  Relationship among
1091*d5c09012SAndroid Build Coastguard Worker  // [Document.entities][google.cloud.documentai.v1beta3.Document.entities].
1092*d5c09012SAndroid Build Coastguard Worker  repeated EntityRelation entity_relations = 8;
1093*d5c09012SAndroid Build Coastguard Worker
1094*d5c09012SAndroid Build Coastguard Worker  // Placeholder.  A list of text corrections made to
1095*d5c09012SAndroid Build Coastguard Worker  // [Document.text][google.cloud.documentai.v1beta3.Document.text].  This is
1096*d5c09012SAndroid Build Coastguard Worker  // usually used for annotating corrections to OCR mistakes.  Text changes for
1097*d5c09012SAndroid Build Coastguard Worker  // a given revision may not overlap with each other.
1098*d5c09012SAndroid Build Coastguard Worker  repeated TextChange text_changes = 14;
1099*d5c09012SAndroid Build Coastguard Worker
1100*d5c09012SAndroid Build Coastguard Worker  // Information about the sharding if this document is sharded part of a larger
1101*d5c09012SAndroid Build Coastguard Worker  // document. If the document is not sharded, this message is not specified.
1102*d5c09012SAndroid Build Coastguard Worker  ShardInfo shard_info = 9;
1103*d5c09012SAndroid Build Coastguard Worker
1104*d5c09012SAndroid Build Coastguard Worker  // Any error that occurred while processing this document.
1105*d5c09012SAndroid Build Coastguard Worker  google.rpc.Status error = 10;
1106*d5c09012SAndroid Build Coastguard Worker
1107*d5c09012SAndroid Build Coastguard Worker  // Placeholder. Revision history of this document.
1108*d5c09012SAndroid Build Coastguard Worker  repeated Revision revisions = 13;
1109*d5c09012SAndroid Build Coastguard Worker
1110*d5c09012SAndroid Build Coastguard Worker  // Parsed layout of the document.
1111*d5c09012SAndroid Build Coastguard Worker  DocumentLayout document_layout = 17;
1112*d5c09012SAndroid Build Coastguard Worker
1113*d5c09012SAndroid Build Coastguard Worker  // Document chunked based on chunking config.
1114*d5c09012SAndroid Build Coastguard Worker  ChunkedDocument chunked_document = 18;
1115*d5c09012SAndroid Build Coastguard Worker}
1116*d5c09012SAndroid Build Coastguard Worker
1117*d5c09012SAndroid Build Coastguard Worker// The revision reference specifies which revision on the document to read.
1118*d5c09012SAndroid Build Coastguard Workermessage RevisionRef {
1119*d5c09012SAndroid Build Coastguard Worker  // Some predefined revision cases.
1120*d5c09012SAndroid Build Coastguard Worker  enum RevisionCase {
1121*d5c09012SAndroid Build Coastguard Worker    // Unspecified case, fall back to read the `LATEST_HUMAN_REVIEW`.
1122*d5c09012SAndroid Build Coastguard Worker    REVISION_CASE_UNSPECIFIED = 0;
1123*d5c09012SAndroid Build Coastguard Worker
1124*d5c09012SAndroid Build Coastguard Worker    // The latest revision made by a human.
1125*d5c09012SAndroid Build Coastguard Worker    LATEST_HUMAN_REVIEW = 1;
1126*d5c09012SAndroid Build Coastguard Worker
1127*d5c09012SAndroid Build Coastguard Worker    // The latest revision based on timestamp.
1128*d5c09012SAndroid Build Coastguard Worker    LATEST_TIMESTAMP = 2;
1129*d5c09012SAndroid Build Coastguard Worker
1130*d5c09012SAndroid Build Coastguard Worker    // The first (OCR) revision.
1131*d5c09012SAndroid Build Coastguard Worker    BASE_OCR_REVISION = 3;
1132*d5c09012SAndroid Build Coastguard Worker  }
1133*d5c09012SAndroid Build Coastguard Worker
1134*d5c09012SAndroid Build Coastguard Worker  // Specifies which revision to read.
1135*d5c09012SAndroid Build Coastguard Worker  oneof source {
1136*d5c09012SAndroid Build Coastguard Worker    // Reads the revision by the predefined case.
1137*d5c09012SAndroid Build Coastguard Worker    RevisionCase revision_case = 1;
1138*d5c09012SAndroid Build Coastguard Worker
1139*d5c09012SAndroid Build Coastguard Worker    // Reads the revision given by the id.
1140*d5c09012SAndroid Build Coastguard Worker    string revision_id = 2;
1141*d5c09012SAndroid Build Coastguard Worker
1142*d5c09012SAndroid Build Coastguard Worker    // Reads the revision generated by the processor version.
1143*d5c09012SAndroid Build Coastguard Worker    // The format takes the full resource name of processor version.
1144*d5c09012SAndroid Build Coastguard Worker    // `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
1145*d5c09012SAndroid Build Coastguard Worker    string latest_processor_version = 3;
1146*d5c09012SAndroid Build Coastguard Worker  }
1147*d5c09012SAndroid Build Coastguard Worker}
1148