xref: /aosp_15_r20/external/googleapis/google/cloud/documentai/v1/document.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1*d5c09012SAndroid Build Coastguard Worker// Copyright 2023 Google LLC
2*d5c09012SAndroid Build Coastguard Worker//
3*d5c09012SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License");
4*d5c09012SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License.
5*d5c09012SAndroid Build Coastguard Worker// You may obtain a copy of the License at
6*d5c09012SAndroid Build Coastguard Worker//
7*d5c09012SAndroid Build Coastguard Worker//     http://www.apache.org/licenses/LICENSE-2.0
8*d5c09012SAndroid Build Coastguard Worker//
9*d5c09012SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software
10*d5c09012SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS,
11*d5c09012SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*d5c09012SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and
13*d5c09012SAndroid Build Coastguard Worker// limitations under the License.
14*d5c09012SAndroid Build Coastguard Worker
15*d5c09012SAndroid Build Coastguard Workersyntax = "proto3";
16*d5c09012SAndroid Build Coastguard Worker
17*d5c09012SAndroid Build Coastguard Workerpackage google.cloud.documentai.v1;
18*d5c09012SAndroid Build Coastguard Worker
19*d5c09012SAndroid Build Coastguard Workerimport "google/api/field_behavior.proto";
20*d5c09012SAndroid Build Coastguard Workerimport "google/cloud/documentai/v1/barcode.proto";
21*d5c09012SAndroid Build Coastguard Workerimport "google/cloud/documentai/v1/geometry.proto";
22*d5c09012SAndroid Build Coastguard Workerimport "google/protobuf/timestamp.proto";
23*d5c09012SAndroid Build Coastguard Workerimport "google/rpc/status.proto";
24*d5c09012SAndroid Build Coastguard Workerimport "google/type/color.proto";
25*d5c09012SAndroid Build Coastguard Workerimport "google/type/date.proto";
26*d5c09012SAndroid Build Coastguard Workerimport "google/type/datetime.proto";
27*d5c09012SAndroid Build Coastguard Workerimport "google/type/money.proto";
28*d5c09012SAndroid Build Coastguard Workerimport "google/type/postal_address.proto";
29*d5c09012SAndroid Build Coastguard Worker
30*d5c09012SAndroid Build Coastguard Workeroption csharp_namespace = "Google.Cloud.DocumentAI.V1";
31*d5c09012SAndroid Build Coastguard Workeroption go_package = "cloud.google.com/go/documentai/apiv1/documentaipb;documentaipb";
32*d5c09012SAndroid Build Coastguard Workeroption java_multiple_files = true;
33*d5c09012SAndroid Build Coastguard Workeroption java_outer_classname = "DocumentProto";
34*d5c09012SAndroid Build Coastguard Workeroption java_package = "com.google.cloud.documentai.v1";
35*d5c09012SAndroid Build Coastguard Workeroption php_namespace = "Google\\Cloud\\DocumentAI\\V1";
36*d5c09012SAndroid Build Coastguard Workeroption ruby_package = "Google::Cloud::DocumentAI::V1";
37*d5c09012SAndroid Build Coastguard Worker
38*d5c09012SAndroid Build Coastguard Worker// Document represents the canonical document resource in Document AI. It is an
39*d5c09012SAndroid Build Coastguard Worker// interchange format that provides insights into documents and allows for
40*d5c09012SAndroid Build Coastguard Worker// collaboration between users and Document AI to iterate and optimize for
41*d5c09012SAndroid Build Coastguard Worker// quality.
42*d5c09012SAndroid Build Coastguard Workermessage Document {
43*d5c09012SAndroid Build Coastguard Worker  // For a large document, sharding may be performed to produce several
44*d5c09012SAndroid Build Coastguard Worker  // document shards. Each document shard contains this field to detail which
45*d5c09012SAndroid Build Coastguard Worker  // shard it is.
46*d5c09012SAndroid Build Coastguard Worker  message ShardInfo {
47*d5c09012SAndroid Build Coastguard Worker    // The 0-based index of this shard.
48*d5c09012SAndroid Build Coastguard Worker    int64 shard_index = 1;
49*d5c09012SAndroid Build Coastguard Worker
50*d5c09012SAndroid Build Coastguard Worker    // Total number of shards.
51*d5c09012SAndroid Build Coastguard Worker    int64 shard_count = 2;
52*d5c09012SAndroid Build Coastguard Worker
53*d5c09012SAndroid Build Coastguard Worker    // The index of the first character in [Document.text][google.cloud.documentai.v1.Document.text] in the overall
54*d5c09012SAndroid Build Coastguard Worker    // document global text.
55*d5c09012SAndroid Build Coastguard Worker    int64 text_offset = 3;
56*d5c09012SAndroid Build Coastguard Worker  }
57*d5c09012SAndroid Build Coastguard Worker
58*d5c09012SAndroid Build Coastguard Worker  // Annotation for common text style attributes. This adheres to CSS
59*d5c09012SAndroid Build Coastguard Worker  // conventions as much as possible.
60*d5c09012SAndroid Build Coastguard Worker  message Style {
61*d5c09012SAndroid Build Coastguard Worker    // Font size with unit.
62*d5c09012SAndroid Build Coastguard Worker    message FontSize {
63*d5c09012SAndroid Build Coastguard Worker      // Font size for the text.
64*d5c09012SAndroid Build Coastguard Worker      float size = 1;
65*d5c09012SAndroid Build Coastguard Worker
66*d5c09012SAndroid Build Coastguard Worker      // Unit for the font size. Follows CSS naming (such as `in`, `px`, and
67*d5c09012SAndroid Build Coastguard Worker      // `pt`).
68*d5c09012SAndroid Build Coastguard Worker      string unit = 2;
69*d5c09012SAndroid Build Coastguard Worker    }
70*d5c09012SAndroid Build Coastguard Worker
71*d5c09012SAndroid Build Coastguard Worker    // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text].
72*d5c09012SAndroid Build Coastguard Worker    TextAnchor text_anchor = 1;
73*d5c09012SAndroid Build Coastguard Worker
74*d5c09012SAndroid Build Coastguard Worker    // Text color.
75*d5c09012SAndroid Build Coastguard Worker    google.type.Color color = 2;
76*d5c09012SAndroid Build Coastguard Worker
77*d5c09012SAndroid Build Coastguard Worker    // Text background color.
78*d5c09012SAndroid Build Coastguard Worker    google.type.Color background_color = 3;
79*d5c09012SAndroid Build Coastguard Worker
80*d5c09012SAndroid Build Coastguard Worker    // [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp).
81*d5c09012SAndroid Build Coastguard Worker    // Possible values are `normal`, `bold`, `bolder`, and `lighter`.
82*d5c09012SAndroid Build Coastguard Worker    string font_weight = 4;
83*d5c09012SAndroid Build Coastguard Worker
84*d5c09012SAndroid Build Coastguard Worker    // [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp).
85*d5c09012SAndroid Build Coastguard Worker    // Possible values are `normal`, `italic`, and `oblique`.
86*d5c09012SAndroid Build Coastguard Worker    string text_style = 5;
87*d5c09012SAndroid Build Coastguard Worker
88*d5c09012SAndroid Build Coastguard Worker    // [Text
89*d5c09012SAndroid Build Coastguard Worker    // decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp).
90*d5c09012SAndroid Build Coastguard Worker    // Follows CSS standard. <text-decoration-line> <text-decoration-color>
91*d5c09012SAndroid Build Coastguard Worker    // <text-decoration-style>
92*d5c09012SAndroid Build Coastguard Worker    string text_decoration = 6;
93*d5c09012SAndroid Build Coastguard Worker
94*d5c09012SAndroid Build Coastguard Worker    // Font size.
95*d5c09012SAndroid Build Coastguard Worker    FontSize font_size = 7;
96*d5c09012SAndroid Build Coastguard Worker
97*d5c09012SAndroid Build Coastguard Worker    // Font family such as `Arial`, `Times New Roman`.
98*d5c09012SAndroid Build Coastguard Worker    // https://www.w3schools.com/cssref/pr_font_font-family.asp
99*d5c09012SAndroid Build Coastguard Worker    string font_family = 8;
100*d5c09012SAndroid Build Coastguard Worker  }
101*d5c09012SAndroid Build Coastguard Worker
102*d5c09012SAndroid Build Coastguard Worker  // A page in a [Document][google.cloud.documentai.v1.Document].
103*d5c09012SAndroid Build Coastguard Worker  message Page {
104*d5c09012SAndroid Build Coastguard Worker    // Dimension for the page.
105*d5c09012SAndroid Build Coastguard Worker    message Dimension {
106*d5c09012SAndroid Build Coastguard Worker      // Page width.
107*d5c09012SAndroid Build Coastguard Worker      float width = 1;
108*d5c09012SAndroid Build Coastguard Worker
109*d5c09012SAndroid Build Coastguard Worker      // Page height.
110*d5c09012SAndroid Build Coastguard Worker      float height = 2;
111*d5c09012SAndroid Build Coastguard Worker
112*d5c09012SAndroid Build Coastguard Worker      // Dimension unit.
113*d5c09012SAndroid Build Coastguard Worker      string unit = 3;
114*d5c09012SAndroid Build Coastguard Worker    }
115*d5c09012SAndroid Build Coastguard Worker
116*d5c09012SAndroid Build Coastguard Worker    // Rendered image contents for this page.
117*d5c09012SAndroid Build Coastguard Worker    message Image {
118*d5c09012SAndroid Build Coastguard Worker      // Raw byte content of the image.
119*d5c09012SAndroid Build Coastguard Worker      bytes content = 1;
120*d5c09012SAndroid Build Coastguard Worker
121*d5c09012SAndroid Build Coastguard Worker      // Encoding [media type (MIME
122*d5c09012SAndroid Build Coastguard Worker      // type)](https://www.iana.org/assignments/media-types/media-types.xhtml)
123*d5c09012SAndroid Build Coastguard Worker      // for the image.
124*d5c09012SAndroid Build Coastguard Worker      string mime_type = 2;
125*d5c09012SAndroid Build Coastguard Worker
126*d5c09012SAndroid Build Coastguard Worker      // Width of the image in pixels.
127*d5c09012SAndroid Build Coastguard Worker      int32 width = 3;
128*d5c09012SAndroid Build Coastguard Worker
129*d5c09012SAndroid Build Coastguard Worker      // Height of the image in pixels.
130*d5c09012SAndroid Build Coastguard Worker      int32 height = 4;
131*d5c09012SAndroid Build Coastguard Worker    }
132*d5c09012SAndroid Build Coastguard Worker
133*d5c09012SAndroid Build Coastguard Worker    // Representation for transformation matrix, intended to be compatible and
134*d5c09012SAndroid Build Coastguard Worker    // used with OpenCV format for image manipulation.
135*d5c09012SAndroid Build Coastguard Worker    message Matrix {
136*d5c09012SAndroid Build Coastguard Worker      // Number of rows in the matrix.
137*d5c09012SAndroid Build Coastguard Worker      int32 rows = 1;
138*d5c09012SAndroid Build Coastguard Worker
139*d5c09012SAndroid Build Coastguard Worker      // Number of columns in the matrix.
140*d5c09012SAndroid Build Coastguard Worker      int32 cols = 2;
141*d5c09012SAndroid Build Coastguard Worker
142*d5c09012SAndroid Build Coastguard Worker      // This encodes information about what data type the matrix uses.
143*d5c09012SAndroid Build Coastguard Worker      // For example, 0 (CV_8U) is an unsigned 8-bit image. For the full list
144*d5c09012SAndroid Build Coastguard Worker      // of OpenCV primitive data types, please refer to
145*d5c09012SAndroid Build Coastguard Worker      // https://docs.opencv.org/4.3.0/d1/d1b/group__core__hal__interface.html
146*d5c09012SAndroid Build Coastguard Worker      int32 type = 3;
147*d5c09012SAndroid Build Coastguard Worker
148*d5c09012SAndroid Build Coastguard Worker      // The matrix data.
149*d5c09012SAndroid Build Coastguard Worker      bytes data = 4;
150*d5c09012SAndroid Build Coastguard Worker    }
151*d5c09012SAndroid Build Coastguard Worker
152*d5c09012SAndroid Build Coastguard Worker    // Visual element describing a layout unit on a page.
153*d5c09012SAndroid Build Coastguard Worker    message Layout {
154*d5c09012SAndroid Build Coastguard Worker      // Detected human reading orientation.
155*d5c09012SAndroid Build Coastguard Worker      enum Orientation {
156*d5c09012SAndroid Build Coastguard Worker        // Unspecified orientation.
157*d5c09012SAndroid Build Coastguard Worker        ORIENTATION_UNSPECIFIED = 0;
158*d5c09012SAndroid Build Coastguard Worker
159*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page up.
160*d5c09012SAndroid Build Coastguard Worker        PAGE_UP = 1;
161*d5c09012SAndroid Build Coastguard Worker
162*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page right.
163*d5c09012SAndroid Build Coastguard Worker        // Turn the head 90 degrees clockwise from upright to read.
164*d5c09012SAndroid Build Coastguard Worker        PAGE_RIGHT = 2;
165*d5c09012SAndroid Build Coastguard Worker
166*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page down.
167*d5c09012SAndroid Build Coastguard Worker        // Turn the head 180 degrees from upright to read.
168*d5c09012SAndroid Build Coastguard Worker        PAGE_DOWN = 3;
169*d5c09012SAndroid Build Coastguard Worker
170*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page left.
171*d5c09012SAndroid Build Coastguard Worker        // Turn the head 90 degrees counterclockwise from upright to read.
172*d5c09012SAndroid Build Coastguard Worker        PAGE_LEFT = 4;
173*d5c09012SAndroid Build Coastguard Worker      }
174*d5c09012SAndroid Build Coastguard Worker
175*d5c09012SAndroid Build Coastguard Worker      // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text].
176*d5c09012SAndroid Build Coastguard Worker      TextAnchor text_anchor = 1;
177*d5c09012SAndroid Build Coastguard Worker
178*d5c09012SAndroid Build Coastguard Worker      // Confidence of the current [Layout][google.cloud.documentai.v1.Document.Page.Layout] within context of the object this
179*d5c09012SAndroid Build Coastguard Worker      // layout is for. e.g. confidence can be for a single token, a table,
180*d5c09012SAndroid Build Coastguard Worker      // a visual element, etc. depending on context. Range `[0, 1]`.
181*d5c09012SAndroid Build Coastguard Worker      float confidence = 2;
182*d5c09012SAndroid Build Coastguard Worker
183*d5c09012SAndroid Build Coastguard Worker      // The bounding polygon for the [Layout][google.cloud.documentai.v1.Document.Page.Layout].
184*d5c09012SAndroid Build Coastguard Worker      BoundingPoly bounding_poly = 3;
185*d5c09012SAndroid Build Coastguard Worker
186*d5c09012SAndroid Build Coastguard Worker      // Detected orientation for the [Layout][google.cloud.documentai.v1.Document.Page.Layout].
187*d5c09012SAndroid Build Coastguard Worker      Orientation orientation = 4;
188*d5c09012SAndroid Build Coastguard Worker    }
189*d5c09012SAndroid Build Coastguard Worker
190*d5c09012SAndroid Build Coastguard Worker    // A block has a set of lines (collected into paragraphs) that have a
191*d5c09012SAndroid Build Coastguard Worker    // common line-spacing and orientation.
192*d5c09012SAndroid Build Coastguard Worker    message Block {
193*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Block][google.cloud.documentai.v1.Document.Page.Block].
194*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
195*d5c09012SAndroid Build Coastguard Worker
196*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
197*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 2;
198*d5c09012SAndroid Build Coastguard Worker
199*d5c09012SAndroid Build Coastguard Worker      // The history of this annotation.
200*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 3 [deprecated = true];
201*d5c09012SAndroid Build Coastguard Worker    }
202*d5c09012SAndroid Build Coastguard Worker
203*d5c09012SAndroid Build Coastguard Worker    // A collection of lines that a human would perceive as a paragraph.
204*d5c09012SAndroid Build Coastguard Worker    message Paragraph {
205*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Paragraph][google.cloud.documentai.v1.Document.Page.Paragraph].
206*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
207*d5c09012SAndroid Build Coastguard Worker
208*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
209*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 2;
210*d5c09012SAndroid Build Coastguard Worker
211*d5c09012SAndroid Build Coastguard Worker      // The  history of this annotation.
212*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 3 [deprecated = true];
213*d5c09012SAndroid Build Coastguard Worker    }
214*d5c09012SAndroid Build Coastguard Worker
215*d5c09012SAndroid Build Coastguard Worker    // A collection of tokens that a human would perceive as a line.
216*d5c09012SAndroid Build Coastguard Worker    // Does not cross column boundaries, can be horizontal, vertical, etc.
217*d5c09012SAndroid Build Coastguard Worker    message Line {
218*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Line][google.cloud.documentai.v1.Document.Page.Line].
219*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
220*d5c09012SAndroid Build Coastguard Worker
221*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
222*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 2;
223*d5c09012SAndroid Build Coastguard Worker
224*d5c09012SAndroid Build Coastguard Worker      // The  history of this annotation.
225*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 3 [deprecated = true];
226*d5c09012SAndroid Build Coastguard Worker    }
227*d5c09012SAndroid Build Coastguard Worker
228*d5c09012SAndroid Build Coastguard Worker    // A detected token.
229*d5c09012SAndroid Build Coastguard Worker    message Token {
230*d5c09012SAndroid Build Coastguard Worker      // Detected break at the end of a [Token][google.cloud.documentai.v1.Document.Page.Token].
231*d5c09012SAndroid Build Coastguard Worker      message DetectedBreak {
232*d5c09012SAndroid Build Coastguard Worker        // Enum to denote the type of break found.
233*d5c09012SAndroid Build Coastguard Worker        enum Type {
234*d5c09012SAndroid Build Coastguard Worker          // Unspecified break type.
235*d5c09012SAndroid Build Coastguard Worker          TYPE_UNSPECIFIED = 0;
236*d5c09012SAndroid Build Coastguard Worker
237*d5c09012SAndroid Build Coastguard Worker          // A single whitespace.
238*d5c09012SAndroid Build Coastguard Worker          SPACE = 1;
239*d5c09012SAndroid Build Coastguard Worker
240*d5c09012SAndroid Build Coastguard Worker          // A wider whitespace.
241*d5c09012SAndroid Build Coastguard Worker          WIDE_SPACE = 2;
242*d5c09012SAndroid Build Coastguard Worker
243*d5c09012SAndroid Build Coastguard Worker          // A hyphen that indicates that a token has been split across lines.
244*d5c09012SAndroid Build Coastguard Worker          HYPHEN = 3;
245*d5c09012SAndroid Build Coastguard Worker        }
246*d5c09012SAndroid Build Coastguard Worker
247*d5c09012SAndroid Build Coastguard Worker        // Detected break type.
248*d5c09012SAndroid Build Coastguard Worker        Type type = 1;
249*d5c09012SAndroid Build Coastguard Worker      }
250*d5c09012SAndroid Build Coastguard Worker
251*d5c09012SAndroid Build Coastguard Worker      // Font and other text style attributes.
252*d5c09012SAndroid Build Coastguard Worker      message StyleInfo {
253*d5c09012SAndroid Build Coastguard Worker        // Font size in points (`1` point is `¹⁄₇₂` inches).
254*d5c09012SAndroid Build Coastguard Worker        int32 font_size = 1;
255*d5c09012SAndroid Build Coastguard Worker
256*d5c09012SAndroid Build Coastguard Worker        // Font size in pixels, equal to _unrounded [font_size][google.cloud.documentai.v1.Document.Page.Token.StyleInfo.font_size]_ *
257*d5c09012SAndroid Build Coastguard Worker        // _resolution_ ÷ `72.0`.
258*d5c09012SAndroid Build Coastguard Worker        double pixel_font_size = 2;
259*d5c09012SAndroid Build Coastguard Worker
260*d5c09012SAndroid Build Coastguard Worker        // Letter spacing in points.
261*d5c09012SAndroid Build Coastguard Worker        double letter_spacing = 3;
262*d5c09012SAndroid Build Coastguard Worker
263*d5c09012SAndroid Build Coastguard Worker        // Name or style of the font.
264*d5c09012SAndroid Build Coastguard Worker        string font_type = 4;
265*d5c09012SAndroid Build Coastguard Worker
266*d5c09012SAndroid Build Coastguard Worker        // Whether the text is bold (equivalent to [font_weight][google.cloud.documentai.v1.Document.Page.Token.StyleInfo.font_weight] is at least
267*d5c09012SAndroid Build Coastguard Worker        // `700`).
268*d5c09012SAndroid Build Coastguard Worker        bool bold = 5;
269*d5c09012SAndroid Build Coastguard Worker
270*d5c09012SAndroid Build Coastguard Worker        // Whether the text is italic.
271*d5c09012SAndroid Build Coastguard Worker        bool italic = 6;
272*d5c09012SAndroid Build Coastguard Worker
273*d5c09012SAndroid Build Coastguard Worker        // Whether the text is underlined.
274*d5c09012SAndroid Build Coastguard Worker        bool underlined = 7;
275*d5c09012SAndroid Build Coastguard Worker
276*d5c09012SAndroid Build Coastguard Worker        // Whether the text is strikethrough.
277*d5c09012SAndroid Build Coastguard Worker        bool strikeout = 8;
278*d5c09012SAndroid Build Coastguard Worker
279*d5c09012SAndroid Build Coastguard Worker        // Whether the text is a subscript.
280*d5c09012SAndroid Build Coastguard Worker        bool subscript = 9;
281*d5c09012SAndroid Build Coastguard Worker
282*d5c09012SAndroid Build Coastguard Worker        // Whether the text is a superscript.
283*d5c09012SAndroid Build Coastguard Worker        bool superscript = 10;
284*d5c09012SAndroid Build Coastguard Worker
285*d5c09012SAndroid Build Coastguard Worker        // Whether the text is in small caps.
286*d5c09012SAndroid Build Coastguard Worker        bool smallcaps = 11;
287*d5c09012SAndroid Build Coastguard Worker
288*d5c09012SAndroid Build Coastguard Worker        // TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
289*d5c09012SAndroid Build Coastguard Worker        // Normal is `400`, bold is `700`.
290*d5c09012SAndroid Build Coastguard Worker        int32 font_weight = 12;
291*d5c09012SAndroid Build Coastguard Worker
292*d5c09012SAndroid Build Coastguard Worker        // Whether the text is handwritten.
293*d5c09012SAndroid Build Coastguard Worker        bool handwritten = 13;
294*d5c09012SAndroid Build Coastguard Worker
295*d5c09012SAndroid Build Coastguard Worker        // Color of the text.
296*d5c09012SAndroid Build Coastguard Worker        google.type.Color text_color = 14;
297*d5c09012SAndroid Build Coastguard Worker
298*d5c09012SAndroid Build Coastguard Worker        // Color of the background.
299*d5c09012SAndroid Build Coastguard Worker        google.type.Color background_color = 15;
300*d5c09012SAndroid Build Coastguard Worker      }
301*d5c09012SAndroid Build Coastguard Worker
302*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Token][google.cloud.documentai.v1.Document.Page.Token].
303*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
304*d5c09012SAndroid Build Coastguard Worker
305*d5c09012SAndroid Build Coastguard Worker      // Detected break at the end of a [Token][google.cloud.documentai.v1.Document.Page.Token].
306*d5c09012SAndroid Build Coastguard Worker      DetectedBreak detected_break = 2;
307*d5c09012SAndroid Build Coastguard Worker
308*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
309*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 3;
310*d5c09012SAndroid Build Coastguard Worker
311*d5c09012SAndroid Build Coastguard Worker      // The history of this annotation.
312*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 4 [deprecated = true];
313*d5c09012SAndroid Build Coastguard Worker
314*d5c09012SAndroid Build Coastguard Worker      // Text style attributes.
315*d5c09012SAndroid Build Coastguard Worker      StyleInfo style_info = 5;
316*d5c09012SAndroid Build Coastguard Worker    }
317*d5c09012SAndroid Build Coastguard Worker
318*d5c09012SAndroid Build Coastguard Worker    // A detected symbol.
319*d5c09012SAndroid Build Coastguard Worker    message Symbol {
320*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Symbol][google.cloud.documentai.v1.Document.Page.Symbol].
321*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
322*d5c09012SAndroid Build Coastguard Worker
323*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
324*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 2;
325*d5c09012SAndroid Build Coastguard Worker    }
326*d5c09012SAndroid Build Coastguard Worker
327*d5c09012SAndroid Build Coastguard Worker    // Detected non-text visual elements e.g. checkbox, signature etc. on the
328*d5c09012SAndroid Build Coastguard Worker    // page.
329*d5c09012SAndroid Build Coastguard Worker    message VisualElement {
330*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [VisualElement][google.cloud.documentai.v1.Document.Page.VisualElement].
331*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
332*d5c09012SAndroid Build Coastguard Worker
333*d5c09012SAndroid Build Coastguard Worker      // Type of the [VisualElement][google.cloud.documentai.v1.Document.Page.VisualElement].
334*d5c09012SAndroid Build Coastguard Worker      string type = 2;
335*d5c09012SAndroid Build Coastguard Worker
336*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
337*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 3;
338*d5c09012SAndroid Build Coastguard Worker    }
339*d5c09012SAndroid Build Coastguard Worker
340*d5c09012SAndroid Build Coastguard Worker    // A table representation similar to HTML table structure.
341*d5c09012SAndroid Build Coastguard Worker    message Table {
342*d5c09012SAndroid Build Coastguard Worker      // A row of table cells.
343*d5c09012SAndroid Build Coastguard Worker      message TableRow {
344*d5c09012SAndroid Build Coastguard Worker        // Cells that make up this row.
345*d5c09012SAndroid Build Coastguard Worker        repeated TableCell cells = 1;
346*d5c09012SAndroid Build Coastguard Worker      }
347*d5c09012SAndroid Build Coastguard Worker
348*d5c09012SAndroid Build Coastguard Worker      // A cell representation inside the table.
349*d5c09012SAndroid Build Coastguard Worker      message TableCell {
350*d5c09012SAndroid Build Coastguard Worker        // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [TableCell][google.cloud.documentai.v1.Document.Page.Table.TableCell].
351*d5c09012SAndroid Build Coastguard Worker        Layout layout = 1;
352*d5c09012SAndroid Build Coastguard Worker
353*d5c09012SAndroid Build Coastguard Worker        // How many rows this cell spans.
354*d5c09012SAndroid Build Coastguard Worker        int32 row_span = 2;
355*d5c09012SAndroid Build Coastguard Worker
356*d5c09012SAndroid Build Coastguard Worker        // How many columns this cell spans.
357*d5c09012SAndroid Build Coastguard Worker        int32 col_span = 3;
358*d5c09012SAndroid Build Coastguard Worker
359*d5c09012SAndroid Build Coastguard Worker        // A list of detected languages together with confidence.
360*d5c09012SAndroid Build Coastguard Worker        repeated DetectedLanguage detected_languages = 4;
361*d5c09012SAndroid Build Coastguard Worker      }
362*d5c09012SAndroid Build Coastguard Worker
363*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Table][google.cloud.documentai.v1.Document.Page.Table].
364*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
365*d5c09012SAndroid Build Coastguard Worker
366*d5c09012SAndroid Build Coastguard Worker      // Header rows of the table.
367*d5c09012SAndroid Build Coastguard Worker      repeated TableRow header_rows = 2;
368*d5c09012SAndroid Build Coastguard Worker
369*d5c09012SAndroid Build Coastguard Worker      // Body rows of the table.
370*d5c09012SAndroid Build Coastguard Worker      repeated TableRow body_rows = 3;
371*d5c09012SAndroid Build Coastguard Worker
372*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
373*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 4;
374*d5c09012SAndroid Build Coastguard Worker
375*d5c09012SAndroid Build Coastguard Worker      // The history of this table.
376*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 5 [deprecated = true];
377*d5c09012SAndroid Build Coastguard Worker    }
378*d5c09012SAndroid Build Coastguard Worker
379*d5c09012SAndroid Build Coastguard Worker    // A form field detected on the page.
380*d5c09012SAndroid Build Coastguard Worker    message FormField {
381*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1.Document.Page.FormField] name. e.g. `Address`, `Email`,
382*d5c09012SAndroid Build Coastguard Worker      // `Grand total`, `Phone number`, etc.
383*d5c09012SAndroid Build Coastguard Worker      Layout field_name = 1;
384*d5c09012SAndroid Build Coastguard Worker
385*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1.Document.Page.FormField] value.
386*d5c09012SAndroid Build Coastguard Worker      Layout field_value = 2;
387*d5c09012SAndroid Build Coastguard Worker
388*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages for name together with confidence.
389*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage name_detected_languages = 3;
390*d5c09012SAndroid Build Coastguard Worker
391*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages for value together with confidence.
392*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage value_detected_languages = 4;
393*d5c09012SAndroid Build Coastguard Worker
394*d5c09012SAndroid Build Coastguard Worker      // If the value is non-textual, this field represents the type. Current
395*d5c09012SAndroid Build Coastguard Worker      // valid values are:
396*d5c09012SAndroid Build Coastguard Worker      //
397*d5c09012SAndroid Build Coastguard Worker      // - blank (this indicates the `field_value` is normal text)
398*d5c09012SAndroid Build Coastguard Worker      // - `unfilled_checkbox`
399*d5c09012SAndroid Build Coastguard Worker      // - `filled_checkbox`
400*d5c09012SAndroid Build Coastguard Worker      string value_type = 5;
401*d5c09012SAndroid Build Coastguard Worker
402*d5c09012SAndroid Build Coastguard Worker      // Created for Labeling UI to export key text.
403*d5c09012SAndroid Build Coastguard Worker      // If corrections were made to the text identified by the
404*d5c09012SAndroid Build Coastguard Worker      // `field_name.text_anchor`, this field will contain the correction.
405*d5c09012SAndroid Build Coastguard Worker      string corrected_key_text = 6;
406*d5c09012SAndroid Build Coastguard Worker
407*d5c09012SAndroid Build Coastguard Worker      // Created for Labeling UI to export value text.
408*d5c09012SAndroid Build Coastguard Worker      // If corrections were made to the text identified by the
409*d5c09012SAndroid Build Coastguard Worker      // `field_value.text_anchor`, this field will contain the correction.
410*d5c09012SAndroid Build Coastguard Worker      string corrected_value_text = 7;
411*d5c09012SAndroid Build Coastguard Worker
412*d5c09012SAndroid Build Coastguard Worker      // The history of this annotation.
413*d5c09012SAndroid Build Coastguard Worker      Provenance provenance = 8;
414*d5c09012SAndroid Build Coastguard Worker    }
415*d5c09012SAndroid Build Coastguard Worker
416*d5c09012SAndroid Build Coastguard Worker    // A detected barcode.
417*d5c09012SAndroid Build Coastguard Worker    message DetectedBarcode {
418*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [DetectedBarcode][google.cloud.documentai.v1.Document.Page.DetectedBarcode].
419*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
420*d5c09012SAndroid Build Coastguard Worker
421*d5c09012SAndroid Build Coastguard Worker      // Detailed barcode information of the [DetectedBarcode][google.cloud.documentai.v1.Document.Page.DetectedBarcode].
422*d5c09012SAndroid Build Coastguard Worker      Barcode barcode = 2;
423*d5c09012SAndroid Build Coastguard Worker    }
424*d5c09012SAndroid Build Coastguard Worker
425*d5c09012SAndroid Build Coastguard Worker    // Detected language for a structural component.
426*d5c09012SAndroid Build Coastguard Worker    message DetectedLanguage {
427*d5c09012SAndroid Build Coastguard Worker      // The [BCP-47 language
428*d5c09012SAndroid Build Coastguard Worker      // code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier),
429*d5c09012SAndroid Build Coastguard Worker      // such as `en-US` or `sr-Latn`.
430*d5c09012SAndroid Build Coastguard Worker      string language_code = 1;
431*d5c09012SAndroid Build Coastguard Worker
432*d5c09012SAndroid Build Coastguard Worker      // Confidence of detected language. Range `[0, 1]`.
433*d5c09012SAndroid Build Coastguard Worker      float confidence = 2;
434*d5c09012SAndroid Build Coastguard Worker    }
435*d5c09012SAndroid Build Coastguard Worker
436*d5c09012SAndroid Build Coastguard Worker    // Image quality scores for the page image.
437*d5c09012SAndroid Build Coastguard Worker    message ImageQualityScores {
438*d5c09012SAndroid Build Coastguard Worker      // Image Quality Defects
439*d5c09012SAndroid Build Coastguard Worker      message DetectedDefect {
440*d5c09012SAndroid Build Coastguard Worker        // Name of the defect type. Supported values are:
441*d5c09012SAndroid Build Coastguard Worker        //
442*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_blurry`
443*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_noisy`
444*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_dark`
445*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_faint`
446*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_text_too_small`
447*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_document_cutoff`
448*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_text_cutoff`
449*d5c09012SAndroid Build Coastguard Worker        // - `quality/defect_glare`
450*d5c09012SAndroid Build Coastguard Worker        string type = 1;
451*d5c09012SAndroid Build Coastguard Worker
452*d5c09012SAndroid Build Coastguard Worker        // Confidence of detected defect. Range `[0, 1]` where `1` indicates
453*d5c09012SAndroid Build Coastguard Worker        // strong confidence that the defect exists.
454*d5c09012SAndroid Build Coastguard Worker        float confidence = 2;
455*d5c09012SAndroid Build Coastguard Worker      }
456*d5c09012SAndroid Build Coastguard Worker
457*d5c09012SAndroid Build Coastguard Worker      // The overall quality score. Range `[0, 1]` where `1` is perfect quality.
458*d5c09012SAndroid Build Coastguard Worker      float quality_score = 1;
459*d5c09012SAndroid Build Coastguard Worker
460*d5c09012SAndroid Build Coastguard Worker      // A list of detected defects.
461*d5c09012SAndroid Build Coastguard Worker      repeated DetectedDefect detected_defects = 2;
462*d5c09012SAndroid Build Coastguard Worker    }
463*d5c09012SAndroid Build Coastguard Worker
464*d5c09012SAndroid Build Coastguard Worker    // 1-based index for current [Page][google.cloud.documentai.v1.Document.Page] in a parent [Document][google.cloud.documentai.v1.Document].
465*d5c09012SAndroid Build Coastguard Worker    // Useful when a page is taken out of a [Document][google.cloud.documentai.v1.Document] for individual
466*d5c09012SAndroid Build Coastguard Worker    // processing.
467*d5c09012SAndroid Build Coastguard Worker    int32 page_number = 1;
468*d5c09012SAndroid Build Coastguard Worker
469*d5c09012SAndroid Build Coastguard Worker    // Rendered image for this page. This image is preprocessed to remove any
470*d5c09012SAndroid Build Coastguard Worker    // skew, rotation, and distortions such that the annotation bounding boxes
471*d5c09012SAndroid Build Coastguard Worker    // can be upright and axis-aligned.
472*d5c09012SAndroid Build Coastguard Worker    Image image = 13;
473*d5c09012SAndroid Build Coastguard Worker
474*d5c09012SAndroid Build Coastguard Worker    // Transformation matrices that were applied to the original document image
475*d5c09012SAndroid Build Coastguard Worker    // to produce [Page.image][google.cloud.documentai.v1.Document.Page.image].
476*d5c09012SAndroid Build Coastguard Worker    repeated Matrix transforms = 14;
477*d5c09012SAndroid Build Coastguard Worker
478*d5c09012SAndroid Build Coastguard Worker    // Physical dimension of the page.
479*d5c09012SAndroid Build Coastguard Worker    Dimension dimension = 2;
480*d5c09012SAndroid Build Coastguard Worker
481*d5c09012SAndroid Build Coastguard Worker    // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for the page.
482*d5c09012SAndroid Build Coastguard Worker    Layout layout = 3;
483*d5c09012SAndroid Build Coastguard Worker
484*d5c09012SAndroid Build Coastguard Worker    // A list of detected languages together with confidence.
485*d5c09012SAndroid Build Coastguard Worker    repeated DetectedLanguage detected_languages = 4;
486*d5c09012SAndroid Build Coastguard Worker
487*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected text blocks on the page.
488*d5c09012SAndroid Build Coastguard Worker    // A block has a set of lines (collected into paragraphs) that have a common
489*d5c09012SAndroid Build Coastguard Worker    // line-spacing and orientation.
490*d5c09012SAndroid Build Coastguard Worker    repeated Block blocks = 5;
491*d5c09012SAndroid Build Coastguard Worker
492*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected text paragraphs on the page.
493*d5c09012SAndroid Build Coastguard Worker    // A collection of lines that a human would perceive as a paragraph.
494*d5c09012SAndroid Build Coastguard Worker    repeated Paragraph paragraphs = 6;
495*d5c09012SAndroid Build Coastguard Worker
496*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected text lines on the page.
497*d5c09012SAndroid Build Coastguard Worker    // A collection of tokens that a human would perceive as a line.
498*d5c09012SAndroid Build Coastguard Worker    repeated Line lines = 7;
499*d5c09012SAndroid Build Coastguard Worker
500*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected tokens on the page.
501*d5c09012SAndroid Build Coastguard Worker    repeated Token tokens = 8;
502*d5c09012SAndroid Build Coastguard Worker
503*d5c09012SAndroid Build Coastguard Worker    // A list of detected non-text visual elements e.g. checkbox,
504*d5c09012SAndroid Build Coastguard Worker    // signature etc. on the page.
505*d5c09012SAndroid Build Coastguard Worker    repeated VisualElement visual_elements = 9;
506*d5c09012SAndroid Build Coastguard Worker
507*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected tables on the page.
508*d5c09012SAndroid Build Coastguard Worker    repeated Table tables = 10;
509*d5c09012SAndroid Build Coastguard Worker
510*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected form fields on the page.
511*d5c09012SAndroid Build Coastguard Worker    repeated FormField form_fields = 11;
512*d5c09012SAndroid Build Coastguard Worker
513*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected symbols on the page.
514*d5c09012SAndroid Build Coastguard Worker    repeated Symbol symbols = 12;
515*d5c09012SAndroid Build Coastguard Worker
516*d5c09012SAndroid Build Coastguard Worker    // A list of detected barcodes.
517*d5c09012SAndroid Build Coastguard Worker    repeated DetectedBarcode detected_barcodes = 15;
518*d5c09012SAndroid Build Coastguard Worker
519*d5c09012SAndroid Build Coastguard Worker    // Image quality scores.
520*d5c09012SAndroid Build Coastguard Worker    ImageQualityScores image_quality_scores = 17;
521*d5c09012SAndroid Build Coastguard Worker
522*d5c09012SAndroid Build Coastguard Worker    // The history of this page.
523*d5c09012SAndroid Build Coastguard Worker    Provenance provenance = 16 [deprecated = true];
524*d5c09012SAndroid Build Coastguard Worker  }
525*d5c09012SAndroid Build Coastguard Worker
526*d5c09012SAndroid Build Coastguard Worker  // An entity that could be a phrase in the text or a property that belongs to
527*d5c09012SAndroid Build Coastguard Worker  // the document. It is a known entity type, such as a person, an organization,
528*d5c09012SAndroid Build Coastguard Worker  // or location.
529*d5c09012SAndroid Build Coastguard Worker  message Entity {
530*d5c09012SAndroid Build Coastguard Worker    // Parsed and normalized entity value.
531*d5c09012SAndroid Build Coastguard Worker    message NormalizedValue {
532*d5c09012SAndroid Build Coastguard Worker      // An optional structured entity value.
533*d5c09012SAndroid Build Coastguard Worker      // Must match entity type defined in schema if
534*d5c09012SAndroid Build Coastguard Worker      // known. If this field is present, the `text` field could also be
535*d5c09012SAndroid Build Coastguard Worker      // populated.
536*d5c09012SAndroid Build Coastguard Worker      oneof structured_value {
537*d5c09012SAndroid Build Coastguard Worker        // Money value. See also:
538*d5c09012SAndroid Build Coastguard Worker        // https://github.com/googleapis/googleapis/blob/master/google/type/money.proto
539*d5c09012SAndroid Build Coastguard Worker        google.type.Money money_value = 2;
540*d5c09012SAndroid Build Coastguard Worker
541*d5c09012SAndroid Build Coastguard Worker        // Date value. Includes year, month, day. See also:
542*d5c09012SAndroid Build Coastguard Worker        // https://github.com/googleapis/googleapis/blob/master/google/type/date.proto
543*d5c09012SAndroid Build Coastguard Worker        google.type.Date date_value = 3;
544*d5c09012SAndroid Build Coastguard Worker
545*d5c09012SAndroid Build Coastguard Worker        // DateTime value. Includes date, time, and timezone. See also:
546*d5c09012SAndroid Build Coastguard Worker        // https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto
547*d5c09012SAndroid Build Coastguard Worker        google.type.DateTime datetime_value = 4;
548*d5c09012SAndroid Build Coastguard Worker
549*d5c09012SAndroid Build Coastguard Worker        // Postal address. See also:
550*d5c09012SAndroid Build Coastguard Worker        // https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto
551*d5c09012SAndroid Build Coastguard Worker        google.type.PostalAddress address_value = 5;
552*d5c09012SAndroid Build Coastguard Worker
553*d5c09012SAndroid Build Coastguard Worker        // Boolean value. Can be used for entities with binary values, or for
554*d5c09012SAndroid Build Coastguard Worker        // checkboxes.
555*d5c09012SAndroid Build Coastguard Worker        bool boolean_value = 6;
556*d5c09012SAndroid Build Coastguard Worker
557*d5c09012SAndroid Build Coastguard Worker        // Integer value.
558*d5c09012SAndroid Build Coastguard Worker        int32 integer_value = 7;
559*d5c09012SAndroid Build Coastguard Worker
560*d5c09012SAndroid Build Coastguard Worker        // Float value.
561*d5c09012SAndroid Build Coastguard Worker        float float_value = 8;
562*d5c09012SAndroid Build Coastguard Worker      }
563*d5c09012SAndroid Build Coastguard Worker
564*d5c09012SAndroid Build Coastguard Worker      // Optional. An optional field to store a normalized string.
565*d5c09012SAndroid Build Coastguard Worker      // For some entity types, one of respective `structured_value` fields may
566*d5c09012SAndroid Build Coastguard Worker      // also be populated. Also not all the types of `structured_value` will be
567*d5c09012SAndroid Build Coastguard Worker      // normalized. For example, some processors may not generate `float`
568*d5c09012SAndroid Build Coastguard Worker      // or `integer` normalized text by default.
569*d5c09012SAndroid Build Coastguard Worker      //
570*d5c09012SAndroid Build Coastguard Worker      // Below are sample formats mapped to structured values.
571*d5c09012SAndroid Build Coastguard Worker      //
572*d5c09012SAndroid Build Coastguard Worker      // - Money/Currency type (`money_value`) is in the ISO 4217 text format.
573*d5c09012SAndroid Build Coastguard Worker      // - Date type (`date_value`) is in the ISO 8601 text format.
574*d5c09012SAndroid Build Coastguard Worker      // - Datetime type (`datetime_value`) is in the ISO 8601 text format.
575*d5c09012SAndroid Build Coastguard Worker      string text = 1 [(google.api.field_behavior) = OPTIONAL];
576*d5c09012SAndroid Build Coastguard Worker    }
577*d5c09012SAndroid Build Coastguard Worker
578*d5c09012SAndroid Build Coastguard Worker    // Optional. Provenance of the entity.
579*d5c09012SAndroid Build Coastguard Worker    // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text].
580*d5c09012SAndroid Build Coastguard Worker    TextAnchor text_anchor = 1 [(google.api.field_behavior) = OPTIONAL];
581*d5c09012SAndroid Build Coastguard Worker
582*d5c09012SAndroid Build Coastguard Worker    // Required. Entity type from a schema e.g. `Address`.
583*d5c09012SAndroid Build Coastguard Worker    string type = 2 [(google.api.field_behavior) = REQUIRED];
584*d5c09012SAndroid Build Coastguard Worker
585*d5c09012SAndroid Build Coastguard Worker    // Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`.
586*d5c09012SAndroid Build Coastguard Worker    string mention_text = 3 [(google.api.field_behavior) = OPTIONAL];
587*d5c09012SAndroid Build Coastguard Worker
588*d5c09012SAndroid Build Coastguard Worker    // Optional. Deprecated.  Use `id` field instead.
589*d5c09012SAndroid Build Coastguard Worker    string mention_id = 4 [(google.api.field_behavior) = OPTIONAL];
590*d5c09012SAndroid Build Coastguard Worker
591*d5c09012SAndroid Build Coastguard Worker    // Optional. Confidence of detected Schema entity. Range `[0, 1]`.
592*d5c09012SAndroid Build Coastguard Worker    float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
593*d5c09012SAndroid Build Coastguard Worker
594*d5c09012SAndroid Build Coastguard Worker    // Optional. Represents the provenance of this entity wrt. the location on the
595*d5c09012SAndroid Build Coastguard Worker    // page where it was found.
596*d5c09012SAndroid Build Coastguard Worker    PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL];
597*d5c09012SAndroid Build Coastguard Worker
598*d5c09012SAndroid Build Coastguard Worker    // Optional. Canonical id. This will be a unique value in the entity list
599*d5c09012SAndroid Build Coastguard Worker    // for this document.
600*d5c09012SAndroid Build Coastguard Worker    string id = 7 [(google.api.field_behavior) = OPTIONAL];
601*d5c09012SAndroid Build Coastguard Worker
602*d5c09012SAndroid Build Coastguard Worker    // Optional. Normalized entity value. Absent if the extracted value could not be
603*d5c09012SAndroid Build Coastguard Worker    // converted or the type (e.g. address) is not supported for certain
604*d5c09012SAndroid Build Coastguard Worker    // parsers. This field is also only populated for certain supported document
605*d5c09012SAndroid Build Coastguard Worker    // types.
606*d5c09012SAndroid Build Coastguard Worker    NormalizedValue normalized_value = 9 [(google.api.field_behavior) = OPTIONAL];
607*d5c09012SAndroid Build Coastguard Worker
608*d5c09012SAndroid Build Coastguard Worker    // Optional. Entities can be nested to form a hierarchical data structure representing
609*d5c09012SAndroid Build Coastguard Worker    // the content in the document.
610*d5c09012SAndroid Build Coastguard Worker    repeated Entity properties = 10 [(google.api.field_behavior) = OPTIONAL];
611*d5c09012SAndroid Build Coastguard Worker
612*d5c09012SAndroid Build Coastguard Worker    // Optional. The history of this annotation.
613*d5c09012SAndroid Build Coastguard Worker    Provenance provenance = 11 [(google.api.field_behavior) = OPTIONAL];
614*d5c09012SAndroid Build Coastguard Worker
615*d5c09012SAndroid Build Coastguard Worker    // Optional. Whether the entity will be redacted for de-identification purposes.
616*d5c09012SAndroid Build Coastguard Worker    bool redacted = 12 [(google.api.field_behavior) = OPTIONAL];
617*d5c09012SAndroid Build Coastguard Worker  }
618*d5c09012SAndroid Build Coastguard Worker
619*d5c09012SAndroid Build Coastguard Worker  // Relationship between [Entities][google.cloud.documentai.v1.Document.Entity].
620*d5c09012SAndroid Build Coastguard Worker  message EntityRelation {
621*d5c09012SAndroid Build Coastguard Worker    // Subject entity id.
622*d5c09012SAndroid Build Coastguard Worker    string subject_id = 1;
623*d5c09012SAndroid Build Coastguard Worker
624*d5c09012SAndroid Build Coastguard Worker    // Object entity id.
625*d5c09012SAndroid Build Coastguard Worker    string object_id = 2;
626*d5c09012SAndroid Build Coastguard Worker
627*d5c09012SAndroid Build Coastguard Worker    // Relationship description.
628*d5c09012SAndroid Build Coastguard Worker    string relation = 3;
629*d5c09012SAndroid Build Coastguard Worker  }
630*d5c09012SAndroid Build Coastguard Worker
631*d5c09012SAndroid Build Coastguard Worker  // Text reference indexing into the [Document.text][google.cloud.documentai.v1.Document.text].
632*d5c09012SAndroid Build Coastguard Worker  message TextAnchor {
633*d5c09012SAndroid Build Coastguard Worker    // A text segment in the [Document.text][google.cloud.documentai.v1.Document.text]. The indices may be out of bounds
634*d5c09012SAndroid Build Coastguard Worker    // which indicate that the text extends into another document shard for
635*d5c09012SAndroid Build Coastguard Worker    // large sharded documents. See [ShardInfo.text_offset][google.cloud.documentai.v1.Document.ShardInfo.text_offset]
636*d5c09012SAndroid Build Coastguard Worker    message TextSegment {
637*d5c09012SAndroid Build Coastguard Worker      // [TextSegment][google.cloud.documentai.v1.Document.TextAnchor.TextSegment] start UTF-8 char index in the [Document.text][google.cloud.documentai.v1.Document.text].
638*d5c09012SAndroid Build Coastguard Worker      int64 start_index = 1;
639*d5c09012SAndroid Build Coastguard Worker
640*d5c09012SAndroid Build Coastguard Worker      // [TextSegment][google.cloud.documentai.v1.Document.TextAnchor.TextSegment] half open end UTF-8 char index in the
641*d5c09012SAndroid Build Coastguard Worker      // [Document.text][google.cloud.documentai.v1.Document.text].
642*d5c09012SAndroid Build Coastguard Worker      int64 end_index = 2;
643*d5c09012SAndroid Build Coastguard Worker    }
644*d5c09012SAndroid Build Coastguard Worker
645*d5c09012SAndroid Build Coastguard Worker    // The text segments from the [Document.text][google.cloud.documentai.v1.Document.text].
646*d5c09012SAndroid Build Coastguard Worker    repeated TextSegment text_segments = 1;
647*d5c09012SAndroid Build Coastguard Worker
648*d5c09012SAndroid Build Coastguard Worker    // Contains the content of the text span so that users do
649*d5c09012SAndroid Build Coastguard Worker    // not have to look it up in the text_segments.  It is always
650*d5c09012SAndroid Build Coastguard Worker    // populated for formFields.
651*d5c09012SAndroid Build Coastguard Worker    string content = 2;
652*d5c09012SAndroid Build Coastguard Worker  }
653*d5c09012SAndroid Build Coastguard Worker
654*d5c09012SAndroid Build Coastguard Worker  // Referencing the visual context of the entity in the [Document.pages][google.cloud.documentai.v1.Document.pages].
655*d5c09012SAndroid Build Coastguard Worker  // Page anchors can be cross-page, consist of multiple bounding polygons and
656*d5c09012SAndroid Build Coastguard Worker  // optionally reference specific layout element types.
657*d5c09012SAndroid Build Coastguard Worker  message PageAnchor {
658*d5c09012SAndroid Build Coastguard Worker    // Represents a weak reference to a page element within a document.
659*d5c09012SAndroid Build Coastguard Worker    message PageRef {
660*d5c09012SAndroid Build Coastguard Worker      // The type of layout that is being referenced.
661*d5c09012SAndroid Build Coastguard Worker      enum LayoutType {
662*d5c09012SAndroid Build Coastguard Worker        // Layout Unspecified.
663*d5c09012SAndroid Build Coastguard Worker        LAYOUT_TYPE_UNSPECIFIED = 0;
664*d5c09012SAndroid Build Coastguard Worker
665*d5c09012SAndroid Build Coastguard Worker        // References a [Page.blocks][google.cloud.documentai.v1.Document.Page.blocks] element.
666*d5c09012SAndroid Build Coastguard Worker        BLOCK = 1;
667*d5c09012SAndroid Build Coastguard Worker
668*d5c09012SAndroid Build Coastguard Worker        // References a [Page.paragraphs][google.cloud.documentai.v1.Document.Page.paragraphs] element.
669*d5c09012SAndroid Build Coastguard Worker        PARAGRAPH = 2;
670*d5c09012SAndroid Build Coastguard Worker
671*d5c09012SAndroid Build Coastguard Worker        // References a [Page.lines][google.cloud.documentai.v1.Document.Page.lines] element.
672*d5c09012SAndroid Build Coastguard Worker        LINE = 3;
673*d5c09012SAndroid Build Coastguard Worker
674*d5c09012SAndroid Build Coastguard Worker        // References a [Page.tokens][google.cloud.documentai.v1.Document.Page.tokens] element.
675*d5c09012SAndroid Build Coastguard Worker        TOKEN = 4;
676*d5c09012SAndroid Build Coastguard Worker
677*d5c09012SAndroid Build Coastguard Worker        // References a [Page.visual_elements][google.cloud.documentai.v1.Document.Page.visual_elements] element.
678*d5c09012SAndroid Build Coastguard Worker        VISUAL_ELEMENT = 5;
679*d5c09012SAndroid Build Coastguard Worker
680*d5c09012SAndroid Build Coastguard Worker        // Refrrences a [Page.tables][google.cloud.documentai.v1.Document.Page.tables] element.
681*d5c09012SAndroid Build Coastguard Worker        TABLE = 6;
682*d5c09012SAndroid Build Coastguard Worker
683*d5c09012SAndroid Build Coastguard Worker        // References a [Page.form_fields][google.cloud.documentai.v1.Document.Page.form_fields] element.
684*d5c09012SAndroid Build Coastguard Worker        FORM_FIELD = 7;
685*d5c09012SAndroid Build Coastguard Worker      }
686*d5c09012SAndroid Build Coastguard Worker
687*d5c09012SAndroid Build Coastguard Worker      // Required. Index into the [Document.pages][google.cloud.documentai.v1.Document.pages] element, for example using
688*d5c09012SAndroid Build Coastguard Worker      // `[Document.pages][page_refs.page]` to locate the related page element.
689*d5c09012SAndroid Build Coastguard Worker      // This field is skipped when its value is the default `0`. See
690*d5c09012SAndroid Build Coastguard Worker      // https://developers.google.com/protocol-buffers/docs/proto3#json.
691*d5c09012SAndroid Build Coastguard Worker      int64 page = 1 [(google.api.field_behavior) = REQUIRED];
692*d5c09012SAndroid Build Coastguard Worker
693*d5c09012SAndroid Build Coastguard Worker      // Optional. The type of the layout element that is being referenced if any.
694*d5c09012SAndroid Build Coastguard Worker      LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL];
695*d5c09012SAndroid Build Coastguard Worker
696*d5c09012SAndroid Build Coastguard Worker      // Optional. Deprecated.  Use [PageRef.bounding_poly][google.cloud.documentai.v1.Document.PageAnchor.PageRef.bounding_poly] instead.
697*d5c09012SAndroid Build Coastguard Worker      string layout_id = 3 [
698*d5c09012SAndroid Build Coastguard Worker        deprecated = true,
699*d5c09012SAndroid Build Coastguard Worker        (google.api.field_behavior) = OPTIONAL
700*d5c09012SAndroid Build Coastguard Worker      ];
701*d5c09012SAndroid Build Coastguard Worker
702*d5c09012SAndroid Build Coastguard Worker      // Optional. Identifies the bounding polygon of a layout element on the page.
703*d5c09012SAndroid Build Coastguard Worker      // If `layout_type` is set, the bounding polygon must be exactly the same
704*d5c09012SAndroid Build Coastguard Worker      // to the layout element it's referring to.
705*d5c09012SAndroid Build Coastguard Worker      BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL];
706*d5c09012SAndroid Build Coastguard Worker
707*d5c09012SAndroid Build Coastguard Worker      // Optional. Confidence of detected page element, if applicable. Range `[0, 1]`.
708*d5c09012SAndroid Build Coastguard Worker      float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
709*d5c09012SAndroid Build Coastguard Worker    }
710*d5c09012SAndroid Build Coastguard Worker
711*d5c09012SAndroid Build Coastguard Worker    // One or more references to visual page elements
712*d5c09012SAndroid Build Coastguard Worker    repeated PageRef page_refs = 1;
713*d5c09012SAndroid Build Coastguard Worker  }
714*d5c09012SAndroid Build Coastguard Worker
715*d5c09012SAndroid Build Coastguard Worker  // Structure to identify provenance relationships between annotations in
716*d5c09012SAndroid Build Coastguard Worker  // different revisions.
717*d5c09012SAndroid Build Coastguard Worker  message Provenance {
718*d5c09012SAndroid Build Coastguard Worker    // The parent element the current element is based on. Used for
719*d5c09012SAndroid Build Coastguard Worker    // referencing/aligning, removal and replacement operations.
720*d5c09012SAndroid Build Coastguard Worker    message Parent {
721*d5c09012SAndroid Build Coastguard Worker      // The index of the index into current revision's parent_ids list.
722*d5c09012SAndroid Build Coastguard Worker      int32 revision = 1;
723*d5c09012SAndroid Build Coastguard Worker
724*d5c09012SAndroid Build Coastguard Worker      // The index of the parent item in the corresponding item list (eg. list
725*d5c09012SAndroid Build Coastguard Worker      // of entities, properties within entities, etc.) in the parent revision.
726*d5c09012SAndroid Build Coastguard Worker      int32 index = 3;
727*d5c09012SAndroid Build Coastguard Worker
728*d5c09012SAndroid Build Coastguard Worker      // The id of the parent provenance.
729*d5c09012SAndroid Build Coastguard Worker      int32 id = 2 [deprecated = true];
730*d5c09012SAndroid Build Coastguard Worker    }
731*d5c09012SAndroid Build Coastguard Worker
732*d5c09012SAndroid Build Coastguard Worker    // If a processor or agent does an explicit operation on existing elements.
733*d5c09012SAndroid Build Coastguard Worker    enum OperationType {
734*d5c09012SAndroid Build Coastguard Worker      // Operation type unspecified. If no operation is specified a provenance
735*d5c09012SAndroid Build Coastguard Worker      // entry is simply used to match against a `parent`.
736*d5c09012SAndroid Build Coastguard Worker      OPERATION_TYPE_UNSPECIFIED = 0;
737*d5c09012SAndroid Build Coastguard Worker
738*d5c09012SAndroid Build Coastguard Worker      // Add an element.
739*d5c09012SAndroid Build Coastguard Worker      ADD = 1;
740*d5c09012SAndroid Build Coastguard Worker
741*d5c09012SAndroid Build Coastguard Worker      // Remove an element identified by `parent`.
742*d5c09012SAndroid Build Coastguard Worker      REMOVE = 2;
743*d5c09012SAndroid Build Coastguard Worker
744*d5c09012SAndroid Build Coastguard Worker      // Updates any fields within the given provenance scope of the message. It
745*d5c09012SAndroid Build Coastguard Worker      // overwrites the fields rather than replacing them.  Use this when you
746*d5c09012SAndroid Build Coastguard Worker      // want to update a field value of an entity without also updating all the
747*d5c09012SAndroid Build Coastguard Worker      // child properties.
748*d5c09012SAndroid Build Coastguard Worker      UPDATE = 7;
749*d5c09012SAndroid Build Coastguard Worker
750*d5c09012SAndroid Build Coastguard Worker      // Currently unused. Replace an element identified by `parent`.
751*d5c09012SAndroid Build Coastguard Worker      REPLACE = 3;
752*d5c09012SAndroid Build Coastguard Worker
753*d5c09012SAndroid Build Coastguard Worker      // Deprecated. Request human review for the element identified by
754*d5c09012SAndroid Build Coastguard Worker      // `parent`.
755*d5c09012SAndroid Build Coastguard Worker      EVAL_REQUESTED = 4 [deprecated = true];
756*d5c09012SAndroid Build Coastguard Worker
757*d5c09012SAndroid Build Coastguard Worker      // Deprecated. Element is reviewed and approved at human review,
758*d5c09012SAndroid Build Coastguard Worker      // confidence will be set to 1.0.
759*d5c09012SAndroid Build Coastguard Worker      EVAL_APPROVED = 5 [deprecated = true];
760*d5c09012SAndroid Build Coastguard Worker
761*d5c09012SAndroid Build Coastguard Worker      // Deprecated. Element is skipped in the validation process.
762*d5c09012SAndroid Build Coastguard Worker      EVAL_SKIPPED = 6 [deprecated = true];
763*d5c09012SAndroid Build Coastguard Worker    }
764*d5c09012SAndroid Build Coastguard Worker
765*d5c09012SAndroid Build Coastguard Worker    // The index of the revision that produced this element.
766*d5c09012SAndroid Build Coastguard Worker    int32 revision = 1 [deprecated = true];
767*d5c09012SAndroid Build Coastguard Worker
768*d5c09012SAndroid Build Coastguard Worker    // The Id of this operation.  Needs to be unique within the scope of the
769*d5c09012SAndroid Build Coastguard Worker    // revision.
770*d5c09012SAndroid Build Coastguard Worker    int32 id = 2 [deprecated = true];
771*d5c09012SAndroid Build Coastguard Worker
772*d5c09012SAndroid Build Coastguard Worker    // References to the original elements that are replaced.
773*d5c09012SAndroid Build Coastguard Worker    repeated Parent parents = 3;
774*d5c09012SAndroid Build Coastguard Worker
775*d5c09012SAndroid Build Coastguard Worker    // The type of provenance operation.
776*d5c09012SAndroid Build Coastguard Worker    OperationType type = 4;
777*d5c09012SAndroid Build Coastguard Worker  }
778*d5c09012SAndroid Build Coastguard Worker
779*d5c09012SAndroid Build Coastguard Worker  // Contains past or forward revisions of this document.
780*d5c09012SAndroid Build Coastguard Worker  message Revision {
781*d5c09012SAndroid Build Coastguard Worker    // Human Review information of the document.
782*d5c09012SAndroid Build Coastguard Worker    message HumanReview {
783*d5c09012SAndroid Build Coastguard Worker      // Human review state. e.g. `requested`, `succeeded`, `rejected`.
784*d5c09012SAndroid Build Coastguard Worker      string state = 1;
785*d5c09012SAndroid Build Coastguard Worker
786*d5c09012SAndroid Build Coastguard Worker      // A message providing more details about the current state of processing.
787*d5c09012SAndroid Build Coastguard Worker      // For example, the rejection reason when the state is `rejected`.
788*d5c09012SAndroid Build Coastguard Worker      string state_message = 2;
789*d5c09012SAndroid Build Coastguard Worker    }
790*d5c09012SAndroid Build Coastguard Worker
791*d5c09012SAndroid Build Coastguard Worker    // Who/what made the change
792*d5c09012SAndroid Build Coastguard Worker    oneof source {
793*d5c09012SAndroid Build Coastguard Worker      // If the change was made by a person specify the name or id of that
794*d5c09012SAndroid Build Coastguard Worker      // person.
795*d5c09012SAndroid Build Coastguard Worker      string agent = 4;
796*d5c09012SAndroid Build Coastguard Worker
797*d5c09012SAndroid Build Coastguard Worker      // If the annotation was made by processor identify the processor by its
798*d5c09012SAndroid Build Coastguard Worker      // resource name.
799*d5c09012SAndroid Build Coastguard Worker      string processor = 5;
800*d5c09012SAndroid Build Coastguard Worker    }
801*d5c09012SAndroid Build Coastguard Worker
802*d5c09012SAndroid Build Coastguard Worker    // Id of the revision, internally generated by doc proto storage.
803*d5c09012SAndroid Build Coastguard Worker    // Unique within the context of the document.
804*d5c09012SAndroid Build Coastguard Worker    string id = 1;
805*d5c09012SAndroid Build Coastguard Worker
806*d5c09012SAndroid Build Coastguard Worker    // The revisions that this revision is based on.  This can include one or
807*d5c09012SAndroid Build Coastguard Worker    // more parent (when documents are merged.)  This field represents the
808*d5c09012SAndroid Build Coastguard Worker    // index into the `revisions` field.
809*d5c09012SAndroid Build Coastguard Worker    repeated int32 parent = 2 [deprecated = true];
810*d5c09012SAndroid Build Coastguard Worker
811*d5c09012SAndroid Build Coastguard Worker    // The revisions that this revision is based on. Must include all the ids
812*d5c09012SAndroid Build Coastguard Worker    // that have anything to do with this revision - eg. there are
813*d5c09012SAndroid Build Coastguard Worker    // `provenance.parent.revision` fields that index into this field.
814*d5c09012SAndroid Build Coastguard Worker    repeated string parent_ids = 7;
815*d5c09012SAndroid Build Coastguard Worker
816*d5c09012SAndroid Build Coastguard Worker    // The time that the revision was created, internally generated by
817*d5c09012SAndroid Build Coastguard Worker    // doc proto storage at the time of create.
818*d5c09012SAndroid Build Coastguard Worker    google.protobuf.Timestamp create_time = 3;
819*d5c09012SAndroid Build Coastguard Worker
820*d5c09012SAndroid Build Coastguard Worker    // Human Review information of this revision.
821*d5c09012SAndroid Build Coastguard Worker    HumanReview human_review = 6;
822*d5c09012SAndroid Build Coastguard Worker  }
823*d5c09012SAndroid Build Coastguard Worker
824*d5c09012SAndroid Build Coastguard Worker  // This message is used for text changes aka. OCR corrections.
825*d5c09012SAndroid Build Coastguard Worker  message TextChange {
826*d5c09012SAndroid Build Coastguard Worker    // Provenance of the correction.
827*d5c09012SAndroid Build Coastguard Worker    // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text].  There can only be a
828*d5c09012SAndroid Build Coastguard Worker    // single `TextAnchor.text_segments` element.  If the start and
829*d5c09012SAndroid Build Coastguard Worker    // end index of the text segment are the same, the text change is inserted
830*d5c09012SAndroid Build Coastguard Worker    // before that index.
831*d5c09012SAndroid Build Coastguard Worker    TextAnchor text_anchor = 1;
832*d5c09012SAndroid Build Coastguard Worker
833*d5c09012SAndroid Build Coastguard Worker    // The text that replaces the text identified in the `text_anchor`.
834*d5c09012SAndroid Build Coastguard Worker    string changed_text = 2;
835*d5c09012SAndroid Build Coastguard Worker
836*d5c09012SAndroid Build Coastguard Worker    // The history of this annotation.
837*d5c09012SAndroid Build Coastguard Worker    repeated Provenance provenance = 3 [deprecated = true];
838*d5c09012SAndroid Build Coastguard Worker  }
839*d5c09012SAndroid Build Coastguard Worker
840*d5c09012SAndroid Build Coastguard Worker  // Original source document from the user.
841*d5c09012SAndroid Build Coastguard Worker  oneof source {
842*d5c09012SAndroid Build Coastguard Worker    // Optional. Currently supports Google Cloud Storage URI of the form
843*d5c09012SAndroid Build Coastguard Worker    // `gs://bucket_name/object_name`. Object versioning is not supported.
844*d5c09012SAndroid Build Coastguard Worker    // For more information, refer to [Google Cloud Storage Request
845*d5c09012SAndroid Build Coastguard Worker    // URIs](https://cloud.google.com/storage/docs/reference-uris).
846*d5c09012SAndroid Build Coastguard Worker    string uri = 1 [(google.api.field_behavior) = OPTIONAL];
847*d5c09012SAndroid Build Coastguard Worker
848*d5c09012SAndroid Build Coastguard Worker    // Optional. Inline document content, represented as a stream of bytes.
849*d5c09012SAndroid Build Coastguard Worker    // Note: As with all `bytes` fields, protobuffers use a pure binary
850*d5c09012SAndroid Build Coastguard Worker    // representation, whereas JSON representations use base64.
851*d5c09012SAndroid Build Coastguard Worker    bytes content = 2 [(google.api.field_behavior) = OPTIONAL];
852*d5c09012SAndroid Build Coastguard Worker  }
853*d5c09012SAndroid Build Coastguard Worker
854*d5c09012SAndroid Build Coastguard Worker  // An IANA published [media type (MIME
855*d5c09012SAndroid Build Coastguard Worker  // type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
856*d5c09012SAndroid Build Coastguard Worker  string mime_type = 3;
857*d5c09012SAndroid Build Coastguard Worker
858*d5c09012SAndroid Build Coastguard Worker  // Optional. UTF-8 encoded text in reading order from the document.
859*d5c09012SAndroid Build Coastguard Worker  string text = 4 [(google.api.field_behavior) = OPTIONAL];
860*d5c09012SAndroid Build Coastguard Worker
861*d5c09012SAndroid Build Coastguard Worker  // Styles for the [Document.text][google.cloud.documentai.v1.Document.text].
862*d5c09012SAndroid Build Coastguard Worker  repeated Style text_styles = 5 [deprecated = true];
863*d5c09012SAndroid Build Coastguard Worker
864*d5c09012SAndroid Build Coastguard Worker  // Visual page layout for the [Document][google.cloud.documentai.v1.Document].
865*d5c09012SAndroid Build Coastguard Worker  repeated Page pages = 6;
866*d5c09012SAndroid Build Coastguard Worker
867*d5c09012SAndroid Build Coastguard Worker  // A list of entities detected on [Document.text][google.cloud.documentai.v1.Document.text]. For document shards,
868*d5c09012SAndroid Build Coastguard Worker  // entities in this list may cross shard boundaries.
869*d5c09012SAndroid Build Coastguard Worker  repeated Entity entities = 7;
870*d5c09012SAndroid Build Coastguard Worker
871*d5c09012SAndroid Build Coastguard Worker  // Placeholder.  Relationship among [Document.entities][google.cloud.documentai.v1.Document.entities].
872*d5c09012SAndroid Build Coastguard Worker  repeated EntityRelation entity_relations = 8;
873*d5c09012SAndroid Build Coastguard Worker
874*d5c09012SAndroid Build Coastguard Worker  // Placeholder.  A list of text corrections made to [Document.text][google.cloud.documentai.v1.Document.text].  This
875*d5c09012SAndroid Build Coastguard Worker  // is usually used for annotating corrections to OCR mistakes.  Text changes
876*d5c09012SAndroid Build Coastguard Worker  // for a given revision may not overlap with each other.
877*d5c09012SAndroid Build Coastguard Worker  repeated TextChange text_changes = 14;
878*d5c09012SAndroid Build Coastguard Worker
879*d5c09012SAndroid Build Coastguard Worker  // Information about the sharding if this document is sharded part of a larger
880*d5c09012SAndroid Build Coastguard Worker  // document. If the document is not sharded, this message is not specified.
881*d5c09012SAndroid Build Coastguard Worker  ShardInfo shard_info = 9;
882*d5c09012SAndroid Build Coastguard Worker
883*d5c09012SAndroid Build Coastguard Worker  // Any error that occurred while processing this document.
884*d5c09012SAndroid Build Coastguard Worker  google.rpc.Status error = 10;
885*d5c09012SAndroid Build Coastguard Worker
886*d5c09012SAndroid Build Coastguard Worker  // Placeholder. Revision history of this document.
887*d5c09012SAndroid Build Coastguard Worker  repeated Revision revisions = 13;
888*d5c09012SAndroid Build Coastguard Worker}
889