xref: /aosp_15_r20/external/googleapis/google/cloud/documentai/v1beta1/document.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1*d5c09012SAndroid Build Coastguard Worker// Copyright 2019 Google LLC.
2*d5c09012SAndroid Build Coastguard Worker//
3*d5c09012SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License");
4*d5c09012SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License.
5*d5c09012SAndroid Build Coastguard Worker// You may obtain a copy of the License at
6*d5c09012SAndroid Build Coastguard Worker//
7*d5c09012SAndroid Build Coastguard Worker//     http://www.apache.org/licenses/LICENSE-2.0
8*d5c09012SAndroid Build Coastguard Worker//
9*d5c09012SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software
10*d5c09012SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS,
11*d5c09012SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*d5c09012SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and
13*d5c09012SAndroid Build Coastguard Worker// limitations under the License.
14*d5c09012SAndroid Build Coastguard Worker//
15*d5c09012SAndroid Build Coastguard Worker
16*d5c09012SAndroid Build Coastguard Workersyntax = "proto3";
17*d5c09012SAndroid Build Coastguard Worker
18*d5c09012SAndroid Build Coastguard Workerpackage google.cloud.documentai.v1beta1;
19*d5c09012SAndroid Build Coastguard Worker
20*d5c09012SAndroid Build Coastguard Workerimport "google/api/field_behavior.proto";
21*d5c09012SAndroid Build Coastguard Workerimport "google/cloud/documentai/v1beta1/geometry.proto";
22*d5c09012SAndroid Build Coastguard Workerimport "google/rpc/status.proto";
23*d5c09012SAndroid Build Coastguard Workerimport "google/type/color.proto";
24*d5c09012SAndroid Build Coastguard Worker
25*d5c09012SAndroid Build Coastguard Workeroption csharp_namespace = "Google.Cloud.DocumentAI.V1Beta1";
26*d5c09012SAndroid Build Coastguard Workeroption go_package = "cloud.google.com/go/documentai/apiv1beta1/documentaipb;documentaipb";
27*d5c09012SAndroid Build Coastguard Workeroption java_multiple_files = true;
28*d5c09012SAndroid Build Coastguard Workeroption java_outer_classname = "DocumentProto";
29*d5c09012SAndroid Build Coastguard Workeroption java_package = "com.google.cloud.documentai.v1beta1";
30*d5c09012SAndroid Build Coastguard Workeroption php_namespace = "Google\\Cloud\\DocumentAI\\V1beta1";
31*d5c09012SAndroid Build Coastguard Workeroption ruby_package = "Google::Cloud::DocumentAI::V1beta1";
32*d5c09012SAndroid Build Coastguard Worker
33*d5c09012SAndroid Build Coastguard Worker// Document represents the canonical document resource in Document Understanding
34*d5c09012SAndroid Build Coastguard Worker// AI.
35*d5c09012SAndroid Build Coastguard Worker// It is an interchange format that provides insights into documents and allows
36*d5c09012SAndroid Build Coastguard Worker// for collaboration between users and Document Understanding AI to iterate and
37*d5c09012SAndroid Build Coastguard Worker// optimize for quality.
38*d5c09012SAndroid Build Coastguard Workermessage Document {
39*d5c09012SAndroid Build Coastguard Worker  // For a large document, sharding may be performed to produce several
40*d5c09012SAndroid Build Coastguard Worker  // document shards. Each document shard contains this field to detail which
41*d5c09012SAndroid Build Coastguard Worker  // shard it is.
42*d5c09012SAndroid Build Coastguard Worker  message ShardInfo {
43*d5c09012SAndroid Build Coastguard Worker    // The 0-based index of this shard.
44*d5c09012SAndroid Build Coastguard Worker    int64 shard_index = 1;
45*d5c09012SAndroid Build Coastguard Worker
46*d5c09012SAndroid Build Coastguard Worker    // Total number of shards.
47*d5c09012SAndroid Build Coastguard Worker    int64 shard_count = 2;
48*d5c09012SAndroid Build Coastguard Worker
49*d5c09012SAndroid Build Coastguard Worker    // The index of the first character in
50*d5c09012SAndroid Build Coastguard Worker    // [Document.text][google.cloud.documentai.v1beta1.Document.text] in the
51*d5c09012SAndroid Build Coastguard Worker    // overall document global text.
52*d5c09012SAndroid Build Coastguard Worker    int64 text_offset = 3;
53*d5c09012SAndroid Build Coastguard Worker  }
54*d5c09012SAndroid Build Coastguard Worker
55*d5c09012SAndroid Build Coastguard Worker  // Annotation for common text style attributes. This adheres to CSS
56*d5c09012SAndroid Build Coastguard Worker  // conventions as much as possible.
57*d5c09012SAndroid Build Coastguard Worker  message Style {
58*d5c09012SAndroid Build Coastguard Worker    // Font size with unit.
59*d5c09012SAndroid Build Coastguard Worker    message FontSize {
60*d5c09012SAndroid Build Coastguard Worker      // Font size for the text.
61*d5c09012SAndroid Build Coastguard Worker      float size = 1;
62*d5c09012SAndroid Build Coastguard Worker
63*d5c09012SAndroid Build Coastguard Worker      // Unit for the font size. Follows CSS naming (in, px, pt, etc.).
64*d5c09012SAndroid Build Coastguard Worker      string unit = 2;
65*d5c09012SAndroid Build Coastguard Worker    }
66*d5c09012SAndroid Build Coastguard Worker
67*d5c09012SAndroid Build Coastguard Worker    // Text anchor indexing into the
68*d5c09012SAndroid Build Coastguard Worker    // [Document.text][google.cloud.documentai.v1beta1.Document.text].
69*d5c09012SAndroid Build Coastguard Worker    TextAnchor text_anchor = 1;
70*d5c09012SAndroid Build Coastguard Worker
71*d5c09012SAndroid Build Coastguard Worker    // Text color.
72*d5c09012SAndroid Build Coastguard Worker    google.type.Color color = 2;
73*d5c09012SAndroid Build Coastguard Worker
74*d5c09012SAndroid Build Coastguard Worker    // Text background color.
75*d5c09012SAndroid Build Coastguard Worker    google.type.Color background_color = 3;
76*d5c09012SAndroid Build Coastguard Worker
77*d5c09012SAndroid Build Coastguard Worker    // Font weight. Possible values are normal, bold, bolder, and lighter.
78*d5c09012SAndroid Build Coastguard Worker    // https://www.w3schools.com/cssref/pr_font_weight.asp
79*d5c09012SAndroid Build Coastguard Worker    string font_weight = 4;
80*d5c09012SAndroid Build Coastguard Worker
81*d5c09012SAndroid Build Coastguard Worker    // Text style. Possible values are normal, italic, and oblique.
82*d5c09012SAndroid Build Coastguard Worker    // https://www.w3schools.com/cssref/pr_font_font-style.asp
83*d5c09012SAndroid Build Coastguard Worker    string text_style = 5;
84*d5c09012SAndroid Build Coastguard Worker
85*d5c09012SAndroid Build Coastguard Worker    // Text decoration. Follows CSS standard.
86*d5c09012SAndroid Build Coastguard Worker    // <text-decoration-line> <text-decoration-color> <text-decoration-style>
87*d5c09012SAndroid Build Coastguard Worker    // https://www.w3schools.com/cssref/pr_text_text-decoration.asp
88*d5c09012SAndroid Build Coastguard Worker    string text_decoration = 6;
89*d5c09012SAndroid Build Coastguard Worker
90*d5c09012SAndroid Build Coastguard Worker    // Font size.
91*d5c09012SAndroid Build Coastguard Worker    FontSize font_size = 7;
92*d5c09012SAndroid Build Coastguard Worker  }
93*d5c09012SAndroid Build Coastguard Worker
94*d5c09012SAndroid Build Coastguard Worker  // A page in a [Document][google.cloud.documentai.v1beta1.Document].
95*d5c09012SAndroid Build Coastguard Worker  message Page {
96*d5c09012SAndroid Build Coastguard Worker    // Dimension for the page.
97*d5c09012SAndroid Build Coastguard Worker    message Dimension {
98*d5c09012SAndroid Build Coastguard Worker      // Page width.
99*d5c09012SAndroid Build Coastguard Worker      float width = 1;
100*d5c09012SAndroid Build Coastguard Worker
101*d5c09012SAndroid Build Coastguard Worker      // Page height.
102*d5c09012SAndroid Build Coastguard Worker      float height = 2;
103*d5c09012SAndroid Build Coastguard Worker
104*d5c09012SAndroid Build Coastguard Worker      // Dimension unit.
105*d5c09012SAndroid Build Coastguard Worker      string unit = 3;
106*d5c09012SAndroid Build Coastguard Worker    }
107*d5c09012SAndroid Build Coastguard Worker
108*d5c09012SAndroid Build Coastguard Worker    // Visual element describing a layout unit on a page.
109*d5c09012SAndroid Build Coastguard Worker    message Layout {
110*d5c09012SAndroid Build Coastguard Worker      // Detected human reading orientation.
111*d5c09012SAndroid Build Coastguard Worker      enum Orientation {
112*d5c09012SAndroid Build Coastguard Worker        // Unspecified orientation.
113*d5c09012SAndroid Build Coastguard Worker        ORIENTATION_UNSPECIFIED = 0;
114*d5c09012SAndroid Build Coastguard Worker
115*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page up.
116*d5c09012SAndroid Build Coastguard Worker        PAGE_UP = 1;
117*d5c09012SAndroid Build Coastguard Worker
118*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page right.
119*d5c09012SAndroid Build Coastguard Worker        // Turn the head 90 degrees clockwise from upright to read.
120*d5c09012SAndroid Build Coastguard Worker        PAGE_RIGHT = 2;
121*d5c09012SAndroid Build Coastguard Worker
122*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page down.
123*d5c09012SAndroid Build Coastguard Worker        // Turn the head 180 degrees from upright to read.
124*d5c09012SAndroid Build Coastguard Worker        PAGE_DOWN = 3;
125*d5c09012SAndroid Build Coastguard Worker
126*d5c09012SAndroid Build Coastguard Worker        // Orientation is aligned with page left.
127*d5c09012SAndroid Build Coastguard Worker        // Turn the head 90 degrees counterclockwise from upright to read.
128*d5c09012SAndroid Build Coastguard Worker        PAGE_LEFT = 4;
129*d5c09012SAndroid Build Coastguard Worker      }
130*d5c09012SAndroid Build Coastguard Worker
131*d5c09012SAndroid Build Coastguard Worker      // Text anchor indexing into the
132*d5c09012SAndroid Build Coastguard Worker      // [Document.text][google.cloud.documentai.v1beta1.Document.text].
133*d5c09012SAndroid Build Coastguard Worker      TextAnchor text_anchor = 1;
134*d5c09012SAndroid Build Coastguard Worker
135*d5c09012SAndroid Build Coastguard Worker      // Confidence of the current
136*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] within
137*d5c09012SAndroid Build Coastguard Worker      // context of the object this layout is for. e.g. confidence can be for a
138*d5c09012SAndroid Build Coastguard Worker      // single token, a table, a visual element, etc. depending on context.
139*d5c09012SAndroid Build Coastguard Worker      // Range [0, 1].
140*d5c09012SAndroid Build Coastguard Worker      float confidence = 2;
141*d5c09012SAndroid Build Coastguard Worker
142*d5c09012SAndroid Build Coastguard Worker      // The bounding polygon for the
143*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout].
144*d5c09012SAndroid Build Coastguard Worker      BoundingPoly bounding_poly = 3;
145*d5c09012SAndroid Build Coastguard Worker
146*d5c09012SAndroid Build Coastguard Worker      // Detected orientation for the
147*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout].
148*d5c09012SAndroid Build Coastguard Worker      Orientation orientation = 4;
149*d5c09012SAndroid Build Coastguard Worker    }
150*d5c09012SAndroid Build Coastguard Worker
151*d5c09012SAndroid Build Coastguard Worker    // A block has a set of lines (collected into paragraphs) that have a
152*d5c09012SAndroid Build Coastguard Worker    // common line-spacing and orientation.
153*d5c09012SAndroid Build Coastguard Worker    message Block {
154*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
155*d5c09012SAndroid Build Coastguard Worker      // [Block][google.cloud.documentai.v1beta1.Document.Page.Block].
156*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
157*d5c09012SAndroid Build Coastguard Worker
158*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
159*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 2;
160*d5c09012SAndroid Build Coastguard Worker    }
161*d5c09012SAndroid Build Coastguard Worker
162*d5c09012SAndroid Build Coastguard Worker    // A collection of lines that a human would perceive as a paragraph.
163*d5c09012SAndroid Build Coastguard Worker    message Paragraph {
164*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
165*d5c09012SAndroid Build Coastguard Worker      // [Paragraph][google.cloud.documentai.v1beta1.Document.Page.Paragraph].
166*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
167*d5c09012SAndroid Build Coastguard Worker
168*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
169*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 2;
170*d5c09012SAndroid Build Coastguard Worker    }
171*d5c09012SAndroid Build Coastguard Worker
172*d5c09012SAndroid Build Coastguard Worker    // A collection of tokens that a human would perceive as a line.
173*d5c09012SAndroid Build Coastguard Worker    // Does not cross column boundaries, can be horizontal, vertical, etc.
174*d5c09012SAndroid Build Coastguard Worker    message Line {
175*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
176*d5c09012SAndroid Build Coastguard Worker      // [Line][google.cloud.documentai.v1beta1.Document.Page.Line].
177*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
178*d5c09012SAndroid Build Coastguard Worker
179*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
180*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 2;
181*d5c09012SAndroid Build Coastguard Worker    }
182*d5c09012SAndroid Build Coastguard Worker
183*d5c09012SAndroid Build Coastguard Worker    // A detected token.
184*d5c09012SAndroid Build Coastguard Worker    message Token {
185*d5c09012SAndroid Build Coastguard Worker      // Detected break at the end of a
186*d5c09012SAndroid Build Coastguard Worker      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
187*d5c09012SAndroid Build Coastguard Worker      message DetectedBreak {
188*d5c09012SAndroid Build Coastguard Worker        // Enum to denote the type of break found.
189*d5c09012SAndroid Build Coastguard Worker        enum Type {
190*d5c09012SAndroid Build Coastguard Worker          // Unspecified break type.
191*d5c09012SAndroid Build Coastguard Worker          TYPE_UNSPECIFIED = 0;
192*d5c09012SAndroid Build Coastguard Worker
193*d5c09012SAndroid Build Coastguard Worker          // A single whitespace.
194*d5c09012SAndroid Build Coastguard Worker          SPACE = 1;
195*d5c09012SAndroid Build Coastguard Worker
196*d5c09012SAndroid Build Coastguard Worker          // A wider whitespace.
197*d5c09012SAndroid Build Coastguard Worker          WIDE_SPACE = 2;
198*d5c09012SAndroid Build Coastguard Worker
199*d5c09012SAndroid Build Coastguard Worker          // A hyphen that indicates that a token has been split across lines.
200*d5c09012SAndroid Build Coastguard Worker          HYPHEN = 3;
201*d5c09012SAndroid Build Coastguard Worker        }
202*d5c09012SAndroid Build Coastguard Worker
203*d5c09012SAndroid Build Coastguard Worker        // Detected break type.
204*d5c09012SAndroid Build Coastguard Worker        Type type = 1;
205*d5c09012SAndroid Build Coastguard Worker      }
206*d5c09012SAndroid Build Coastguard Worker
207*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
208*d5c09012SAndroid Build Coastguard Worker      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
209*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
210*d5c09012SAndroid Build Coastguard Worker
211*d5c09012SAndroid Build Coastguard Worker      // Detected break at the end of a
212*d5c09012SAndroid Build Coastguard Worker      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
213*d5c09012SAndroid Build Coastguard Worker      DetectedBreak detected_break = 2;
214*d5c09012SAndroid Build Coastguard Worker
215*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
216*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 3;
217*d5c09012SAndroid Build Coastguard Worker    }
218*d5c09012SAndroid Build Coastguard Worker
219*d5c09012SAndroid Build Coastguard Worker    // Detected non-text visual elements e.g. checkbox, signature etc. on the
220*d5c09012SAndroid Build Coastguard Worker    // page.
221*d5c09012SAndroid Build Coastguard Worker    message VisualElement {
222*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
223*d5c09012SAndroid Build Coastguard Worker      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
224*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
225*d5c09012SAndroid Build Coastguard Worker
226*d5c09012SAndroid Build Coastguard Worker      // Type of the
227*d5c09012SAndroid Build Coastguard Worker      // [VisualElement][google.cloud.documentai.v1beta1.Document.Page.VisualElement].
228*d5c09012SAndroid Build Coastguard Worker      string type = 2;
229*d5c09012SAndroid Build Coastguard Worker
230*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
231*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 3;
232*d5c09012SAndroid Build Coastguard Worker    }
233*d5c09012SAndroid Build Coastguard Worker
234*d5c09012SAndroid Build Coastguard Worker    // A table representation similar to HTML table structure.
235*d5c09012SAndroid Build Coastguard Worker    message Table {
236*d5c09012SAndroid Build Coastguard Worker      // A row of table cells.
237*d5c09012SAndroid Build Coastguard Worker      message TableRow {
238*d5c09012SAndroid Build Coastguard Worker        // Cells that make up this row.
239*d5c09012SAndroid Build Coastguard Worker        repeated TableCell cells = 1;
240*d5c09012SAndroid Build Coastguard Worker      }
241*d5c09012SAndroid Build Coastguard Worker
242*d5c09012SAndroid Build Coastguard Worker      // A cell representation inside the table.
243*d5c09012SAndroid Build Coastguard Worker      message TableCell {
244*d5c09012SAndroid Build Coastguard Worker        // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
245*d5c09012SAndroid Build Coastguard Worker        // [TableCell][google.cloud.documentai.v1beta1.Document.Page.Table.TableCell].
246*d5c09012SAndroid Build Coastguard Worker        Layout layout = 1;
247*d5c09012SAndroid Build Coastguard Worker
248*d5c09012SAndroid Build Coastguard Worker        // How many rows this cell spans.
249*d5c09012SAndroid Build Coastguard Worker        int32 row_span = 2;
250*d5c09012SAndroid Build Coastguard Worker
251*d5c09012SAndroid Build Coastguard Worker        // How many columns this cell spans.
252*d5c09012SAndroid Build Coastguard Worker        int32 col_span = 3;
253*d5c09012SAndroid Build Coastguard Worker
254*d5c09012SAndroid Build Coastguard Worker        // A list of detected languages together with confidence.
255*d5c09012SAndroid Build Coastguard Worker        repeated DetectedLanguage detected_languages = 4;
256*d5c09012SAndroid Build Coastguard Worker      }
257*d5c09012SAndroid Build Coastguard Worker
258*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
259*d5c09012SAndroid Build Coastguard Worker      // [Table][google.cloud.documentai.v1beta1.Document.Page.Table].
260*d5c09012SAndroid Build Coastguard Worker      Layout layout = 1;
261*d5c09012SAndroid Build Coastguard Worker
262*d5c09012SAndroid Build Coastguard Worker      // Header rows of the table.
263*d5c09012SAndroid Build Coastguard Worker      repeated TableRow header_rows = 2;
264*d5c09012SAndroid Build Coastguard Worker
265*d5c09012SAndroid Build Coastguard Worker      // Body rows of the table.
266*d5c09012SAndroid Build Coastguard Worker      repeated TableRow body_rows = 3;
267*d5c09012SAndroid Build Coastguard Worker
268*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages together with confidence.
269*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage detected_languages = 4;
270*d5c09012SAndroid Build Coastguard Worker    }
271*d5c09012SAndroid Build Coastguard Worker
272*d5c09012SAndroid Build Coastguard Worker    // A form field detected on the page.
273*d5c09012SAndroid Build Coastguard Worker    message FormField {
274*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
275*d5c09012SAndroid Build Coastguard Worker      // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField]
276*d5c09012SAndroid Build Coastguard Worker      // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
277*d5c09012SAndroid Build Coastguard Worker      Layout field_name = 1;
278*d5c09012SAndroid Build Coastguard Worker
279*d5c09012SAndroid Build Coastguard Worker      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
280*d5c09012SAndroid Build Coastguard Worker      // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField]
281*d5c09012SAndroid Build Coastguard Worker      // value.
282*d5c09012SAndroid Build Coastguard Worker      Layout field_value = 2;
283*d5c09012SAndroid Build Coastguard Worker
284*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages for name together with confidence.
285*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage name_detected_languages = 3;
286*d5c09012SAndroid Build Coastguard Worker
287*d5c09012SAndroid Build Coastguard Worker      // A list of detected languages for value together with confidence.
288*d5c09012SAndroid Build Coastguard Worker      repeated DetectedLanguage value_detected_languages = 4;
289*d5c09012SAndroid Build Coastguard Worker    }
290*d5c09012SAndroid Build Coastguard Worker
291*d5c09012SAndroid Build Coastguard Worker    // Detected language for a structural component.
292*d5c09012SAndroid Build Coastguard Worker    message DetectedLanguage {
293*d5c09012SAndroid Build Coastguard Worker      // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
294*d5c09012SAndroid Build Coastguard Worker      // information, see
295*d5c09012SAndroid Build Coastguard Worker      // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
296*d5c09012SAndroid Build Coastguard Worker      string language_code = 1;
297*d5c09012SAndroid Build Coastguard Worker
298*d5c09012SAndroid Build Coastguard Worker      // Confidence of detected language. Range [0, 1].
299*d5c09012SAndroid Build Coastguard Worker      float confidence = 2;
300*d5c09012SAndroid Build Coastguard Worker    }
301*d5c09012SAndroid Build Coastguard Worker
302*d5c09012SAndroid Build Coastguard Worker    // 1-based index for current
303*d5c09012SAndroid Build Coastguard Worker    // [Page][google.cloud.documentai.v1beta1.Document.Page] in a parent
304*d5c09012SAndroid Build Coastguard Worker    // [Document][google.cloud.documentai.v1beta1.Document]. Useful when a page
305*d5c09012SAndroid Build Coastguard Worker    // is taken out of a [Document][google.cloud.documentai.v1beta1.Document]
306*d5c09012SAndroid Build Coastguard Worker    // for individual processing.
307*d5c09012SAndroid Build Coastguard Worker    int32 page_number = 1;
308*d5c09012SAndroid Build Coastguard Worker
309*d5c09012SAndroid Build Coastguard Worker    // Physical dimension of the page.
310*d5c09012SAndroid Build Coastguard Worker    Dimension dimension = 2;
311*d5c09012SAndroid Build Coastguard Worker
312*d5c09012SAndroid Build Coastguard Worker    // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
313*d5c09012SAndroid Build Coastguard Worker    // page.
314*d5c09012SAndroid Build Coastguard Worker    Layout layout = 3;
315*d5c09012SAndroid Build Coastguard Worker
316*d5c09012SAndroid Build Coastguard Worker    // A list of detected languages together with confidence.
317*d5c09012SAndroid Build Coastguard Worker    repeated DetectedLanguage detected_languages = 4;
318*d5c09012SAndroid Build Coastguard Worker
319*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected text blocks on the page.
320*d5c09012SAndroid Build Coastguard Worker    // A block has a set of lines (collected into paragraphs) that have a common
321*d5c09012SAndroid Build Coastguard Worker    // line-spacing and orientation.
322*d5c09012SAndroid Build Coastguard Worker    repeated Block blocks = 5;
323*d5c09012SAndroid Build Coastguard Worker
324*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected text paragraphs on the page.
325*d5c09012SAndroid Build Coastguard Worker    // A collection of lines that a human would perceive as a paragraph.
326*d5c09012SAndroid Build Coastguard Worker    repeated Paragraph paragraphs = 6;
327*d5c09012SAndroid Build Coastguard Worker
328*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected text lines on the page.
329*d5c09012SAndroid Build Coastguard Worker    // A collection of tokens that a human would perceive as a line.
330*d5c09012SAndroid Build Coastguard Worker    repeated Line lines = 7;
331*d5c09012SAndroid Build Coastguard Worker
332*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected tokens on the page.
333*d5c09012SAndroid Build Coastguard Worker    repeated Token tokens = 8;
334*d5c09012SAndroid Build Coastguard Worker
335*d5c09012SAndroid Build Coastguard Worker    // A list of detected non-text visual elements e.g. checkbox,
336*d5c09012SAndroid Build Coastguard Worker    // signature etc. on the page.
337*d5c09012SAndroid Build Coastguard Worker    repeated VisualElement visual_elements = 9;
338*d5c09012SAndroid Build Coastguard Worker
339*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected tables on the page.
340*d5c09012SAndroid Build Coastguard Worker    repeated Table tables = 10;
341*d5c09012SAndroid Build Coastguard Worker
342*d5c09012SAndroid Build Coastguard Worker    // A list of visually detected form fields on the page.
343*d5c09012SAndroid Build Coastguard Worker    repeated FormField form_fields = 11;
344*d5c09012SAndroid Build Coastguard Worker  }
345*d5c09012SAndroid Build Coastguard Worker
346*d5c09012SAndroid Build Coastguard Worker  // A phrase in the text that is a known entity type, such as a person, an
347*d5c09012SAndroid Build Coastguard Worker  // organization, or location.
348*d5c09012SAndroid Build Coastguard Worker  message Entity {
349*d5c09012SAndroid Build Coastguard Worker    // Provenance of the entity.
350*d5c09012SAndroid Build Coastguard Worker    // Text anchor indexing into the
351*d5c09012SAndroid Build Coastguard Worker    // [Document.text][google.cloud.documentai.v1beta1.Document.text].
352*d5c09012SAndroid Build Coastguard Worker    TextAnchor text_anchor = 1;
353*d5c09012SAndroid Build Coastguard Worker
354*d5c09012SAndroid Build Coastguard Worker    // Required. Entity type from a schema e.g. `Address`.
355*d5c09012SAndroid Build Coastguard Worker    string type = 2 [(google.api.field_behavior) = REQUIRED];
356*d5c09012SAndroid Build Coastguard Worker
357*d5c09012SAndroid Build Coastguard Worker    // Text value in the document e.g. `1600 Amphitheatre Pkwy`.
358*d5c09012SAndroid Build Coastguard Worker    string mention_text = 3;
359*d5c09012SAndroid Build Coastguard Worker
360*d5c09012SAndroid Build Coastguard Worker    // Canonical mention name. This will be a unique value in the entity list
361*d5c09012SAndroid Build Coastguard Worker    // for this document.
362*d5c09012SAndroid Build Coastguard Worker    string mention_id = 4;
363*d5c09012SAndroid Build Coastguard Worker  }
364*d5c09012SAndroid Build Coastguard Worker
365*d5c09012SAndroid Build Coastguard Worker  // Relationship between
366*d5c09012SAndroid Build Coastguard Worker  // [Entities][google.cloud.documentai.v1beta1.Document.Entity].
367*d5c09012SAndroid Build Coastguard Worker  message EntityRelation {
368*d5c09012SAndroid Build Coastguard Worker    // Subject entity mention_id.
369*d5c09012SAndroid Build Coastguard Worker    string subject_id = 1;
370*d5c09012SAndroid Build Coastguard Worker
371*d5c09012SAndroid Build Coastguard Worker    // Object entity mention_id.
372*d5c09012SAndroid Build Coastguard Worker    string object_id = 2;
373*d5c09012SAndroid Build Coastguard Worker
374*d5c09012SAndroid Build Coastguard Worker    // Relationship description.
375*d5c09012SAndroid Build Coastguard Worker    string relation = 3;
376*d5c09012SAndroid Build Coastguard Worker  }
377*d5c09012SAndroid Build Coastguard Worker
378*d5c09012SAndroid Build Coastguard Worker  // Text reference indexing into the
379*d5c09012SAndroid Build Coastguard Worker  // [Document.text][google.cloud.documentai.v1beta1.Document.text].
380*d5c09012SAndroid Build Coastguard Worker  message TextAnchor {
381*d5c09012SAndroid Build Coastguard Worker    // A text segment in the
382*d5c09012SAndroid Build Coastguard Worker    // [Document.text][google.cloud.documentai.v1beta1.Document.text]. The
383*d5c09012SAndroid Build Coastguard Worker    // indices may be out of bounds which indicate that the text extends into
384*d5c09012SAndroid Build Coastguard Worker    // another document shard for large sharded documents. See
385*d5c09012SAndroid Build Coastguard Worker    // [ShardInfo.text_offset][google.cloud.documentai.v1beta1.Document.ShardInfo.text_offset]
386*d5c09012SAndroid Build Coastguard Worker    message TextSegment {
387*d5c09012SAndroid Build Coastguard Worker      // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment]
388*d5c09012SAndroid Build Coastguard Worker      // start UTF-8 char index in the
389*d5c09012SAndroid Build Coastguard Worker      // [Document.text][google.cloud.documentai.v1beta1.Document.text].
390*d5c09012SAndroid Build Coastguard Worker      int64 start_index = 1;
391*d5c09012SAndroid Build Coastguard Worker
392*d5c09012SAndroid Build Coastguard Worker      // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment]
393*d5c09012SAndroid Build Coastguard Worker      // half open end UTF-8 char index in the
394*d5c09012SAndroid Build Coastguard Worker      // [Document.text][google.cloud.documentai.v1beta1.Document.text].
395*d5c09012SAndroid Build Coastguard Worker      int64 end_index = 2;
396*d5c09012SAndroid Build Coastguard Worker    }
397*d5c09012SAndroid Build Coastguard Worker
398*d5c09012SAndroid Build Coastguard Worker    // The text segments from the
399*d5c09012SAndroid Build Coastguard Worker    // [Document.text][google.cloud.documentai.v1beta1.Document.text].
400*d5c09012SAndroid Build Coastguard Worker    repeated TextSegment text_segments = 1;
401*d5c09012SAndroid Build Coastguard Worker  }
402*d5c09012SAndroid Build Coastguard Worker
403*d5c09012SAndroid Build Coastguard Worker  // Original source document from the user.
404*d5c09012SAndroid Build Coastguard Worker  oneof source {
405*d5c09012SAndroid Build Coastguard Worker    // Currently supports Google Cloud Storage URI of the form
406*d5c09012SAndroid Build Coastguard Worker    //    `gs://bucket_name/object_name`. Object versioning is not supported.
407*d5c09012SAndroid Build Coastguard Worker    //    See [Google Cloud Storage Request
408*d5c09012SAndroid Build Coastguard Worker    //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
409*d5c09012SAndroid Build Coastguard Worker    //    info.
410*d5c09012SAndroid Build Coastguard Worker    string uri = 1;
411*d5c09012SAndroid Build Coastguard Worker
412*d5c09012SAndroid Build Coastguard Worker    // Inline document content, represented as a stream of bytes.
413*d5c09012SAndroid Build Coastguard Worker    // Note: As with all `bytes` fields, protobuffers use a pure binary
414*d5c09012SAndroid Build Coastguard Worker    // representation, whereas JSON representations use base64.
415*d5c09012SAndroid Build Coastguard Worker    bytes content = 2;
416*d5c09012SAndroid Build Coastguard Worker  }
417*d5c09012SAndroid Build Coastguard Worker
418*d5c09012SAndroid Build Coastguard Worker  // An IANA published MIME type (also referred to as media type). For more
419*d5c09012SAndroid Build Coastguard Worker  // information, see
420*d5c09012SAndroid Build Coastguard Worker  // https://www.iana.org/assignments/media-types/media-types.xhtml.
421*d5c09012SAndroid Build Coastguard Worker  string mime_type = 3;
422*d5c09012SAndroid Build Coastguard Worker
423*d5c09012SAndroid Build Coastguard Worker  // UTF-8 encoded text in reading order from the document.
424*d5c09012SAndroid Build Coastguard Worker  string text = 4;
425*d5c09012SAndroid Build Coastguard Worker
426*d5c09012SAndroid Build Coastguard Worker  // Styles for the
427*d5c09012SAndroid Build Coastguard Worker  // [Document.text][google.cloud.documentai.v1beta1.Document.text].
428*d5c09012SAndroid Build Coastguard Worker  repeated Style text_styles = 5;
429*d5c09012SAndroid Build Coastguard Worker
430*d5c09012SAndroid Build Coastguard Worker  // Visual page layout for the
431*d5c09012SAndroid Build Coastguard Worker  // [Document][google.cloud.documentai.v1beta1.Document].
432*d5c09012SAndroid Build Coastguard Worker  repeated Page pages = 6;
433*d5c09012SAndroid Build Coastguard Worker
434*d5c09012SAndroid Build Coastguard Worker  // A list of entities detected on
435*d5c09012SAndroid Build Coastguard Worker  // [Document.text][google.cloud.documentai.v1beta1.Document.text]. For
436*d5c09012SAndroid Build Coastguard Worker  // document shards, entities in this list may cross shard boundaries.
437*d5c09012SAndroid Build Coastguard Worker  repeated Entity entities = 7;
438*d5c09012SAndroid Build Coastguard Worker
439*d5c09012SAndroid Build Coastguard Worker  // Relationship among
440*d5c09012SAndroid Build Coastguard Worker  // [Document.entities][google.cloud.documentai.v1beta1.Document.entities].
441*d5c09012SAndroid Build Coastguard Worker  repeated EntityRelation entity_relations = 8;
442*d5c09012SAndroid Build Coastguard Worker
443*d5c09012SAndroid Build Coastguard Worker  // Information about the sharding if this document is sharded part of a larger
444*d5c09012SAndroid Build Coastguard Worker  // document. If the document is not sharded, this message is not specified.
445*d5c09012SAndroid Build Coastguard Worker  ShardInfo shard_info = 9;
446*d5c09012SAndroid Build Coastguard Worker
447*d5c09012SAndroid Build Coastguard Worker  // Any error that occurred while processing this document.
448*d5c09012SAndroid Build Coastguard Worker  google.rpc.Status error = 10;
449*d5c09012SAndroid Build Coastguard Worker}
450