1*d5c09012SAndroid Build Coastguard Worker// Copyright 2019 Google LLC. 2*d5c09012SAndroid Build Coastguard Worker// 3*d5c09012SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License"); 4*d5c09012SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License. 5*d5c09012SAndroid Build Coastguard Worker// You may obtain a copy of the License at 6*d5c09012SAndroid Build Coastguard Worker// 7*d5c09012SAndroid Build Coastguard Worker// http://www.apache.org/licenses/LICENSE-2.0 8*d5c09012SAndroid Build Coastguard Worker// 9*d5c09012SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software 10*d5c09012SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS, 11*d5c09012SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*d5c09012SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and 13*d5c09012SAndroid Build Coastguard Worker// limitations under the License. 14*d5c09012SAndroid Build Coastguard Worker// 15*d5c09012SAndroid Build Coastguard Worker 16*d5c09012SAndroid Build Coastguard Workersyntax = "proto3"; 17*d5c09012SAndroid Build Coastguard Worker 18*d5c09012SAndroid Build Coastguard Workerpackage google.cloud.documentai.v1beta1; 19*d5c09012SAndroid Build Coastguard Worker 20*d5c09012SAndroid Build Coastguard Workerimport "google/api/field_behavior.proto"; 21*d5c09012SAndroid Build Coastguard Workerimport "google/cloud/documentai/v1beta1/geometry.proto"; 22*d5c09012SAndroid Build Coastguard Workerimport "google/rpc/status.proto"; 23*d5c09012SAndroid Build Coastguard Workerimport "google/type/color.proto"; 24*d5c09012SAndroid Build Coastguard Worker 25*d5c09012SAndroid Build Coastguard Workeroption csharp_namespace = "Google.Cloud.DocumentAI.V1Beta1"; 26*d5c09012SAndroid Build Coastguard Workeroption go_package = "cloud.google.com/go/documentai/apiv1beta1/documentaipb;documentaipb"; 27*d5c09012SAndroid Build Coastguard Workeroption java_multiple_files = true; 28*d5c09012SAndroid Build Coastguard Workeroption java_outer_classname = "DocumentProto"; 29*d5c09012SAndroid Build Coastguard Workeroption java_package = "com.google.cloud.documentai.v1beta1"; 30*d5c09012SAndroid Build Coastguard Workeroption php_namespace = "Google\\Cloud\\DocumentAI\\V1beta1"; 31*d5c09012SAndroid Build Coastguard Workeroption ruby_package = "Google::Cloud::DocumentAI::V1beta1"; 32*d5c09012SAndroid Build Coastguard Worker 33*d5c09012SAndroid Build Coastguard Worker// Document represents the canonical document resource in Document Understanding 34*d5c09012SAndroid Build Coastguard Worker// AI. 35*d5c09012SAndroid Build Coastguard Worker// It is an interchange format that provides insights into documents and allows 36*d5c09012SAndroid Build Coastguard Worker// for collaboration between users and Document Understanding AI to iterate and 37*d5c09012SAndroid Build Coastguard Worker// optimize for quality. 38*d5c09012SAndroid Build Coastguard Workermessage Document { 39*d5c09012SAndroid Build Coastguard Worker // For a large document, sharding may be performed to produce several 40*d5c09012SAndroid Build Coastguard Worker // document shards. Each document shard contains this field to detail which 41*d5c09012SAndroid Build Coastguard Worker // shard it is. 42*d5c09012SAndroid Build Coastguard Worker message ShardInfo { 43*d5c09012SAndroid Build Coastguard Worker // The 0-based index of this shard. 44*d5c09012SAndroid Build Coastguard Worker int64 shard_index = 1; 45*d5c09012SAndroid Build Coastguard Worker 46*d5c09012SAndroid Build Coastguard Worker // Total number of shards. 47*d5c09012SAndroid Build Coastguard Worker int64 shard_count = 2; 48*d5c09012SAndroid Build Coastguard Worker 49*d5c09012SAndroid Build Coastguard Worker // The index of the first character in 50*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta1.Document.text] in the 51*d5c09012SAndroid Build Coastguard Worker // overall document global text. 52*d5c09012SAndroid Build Coastguard Worker int64 text_offset = 3; 53*d5c09012SAndroid Build Coastguard Worker } 54*d5c09012SAndroid Build Coastguard Worker 55*d5c09012SAndroid Build Coastguard Worker // Annotation for common text style attributes. This adheres to CSS 56*d5c09012SAndroid Build Coastguard Worker // conventions as much as possible. 57*d5c09012SAndroid Build Coastguard Worker message Style { 58*d5c09012SAndroid Build Coastguard Worker // Font size with unit. 59*d5c09012SAndroid Build Coastguard Worker message FontSize { 60*d5c09012SAndroid Build Coastguard Worker // Font size for the text. 61*d5c09012SAndroid Build Coastguard Worker float size = 1; 62*d5c09012SAndroid Build Coastguard Worker 63*d5c09012SAndroid Build Coastguard Worker // Unit for the font size. Follows CSS naming (in, px, pt, etc.). 64*d5c09012SAndroid Build Coastguard Worker string unit = 2; 65*d5c09012SAndroid Build Coastguard Worker } 66*d5c09012SAndroid Build Coastguard Worker 67*d5c09012SAndroid Build Coastguard Worker // Text anchor indexing into the 68*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 69*d5c09012SAndroid Build Coastguard Worker TextAnchor text_anchor = 1; 70*d5c09012SAndroid Build Coastguard Worker 71*d5c09012SAndroid Build Coastguard Worker // Text color. 72*d5c09012SAndroid Build Coastguard Worker google.type.Color color = 2; 73*d5c09012SAndroid Build Coastguard Worker 74*d5c09012SAndroid Build Coastguard Worker // Text background color. 75*d5c09012SAndroid Build Coastguard Worker google.type.Color background_color = 3; 76*d5c09012SAndroid Build Coastguard Worker 77*d5c09012SAndroid Build Coastguard Worker // Font weight. Possible values are normal, bold, bolder, and lighter. 78*d5c09012SAndroid Build Coastguard Worker // https://www.w3schools.com/cssref/pr_font_weight.asp 79*d5c09012SAndroid Build Coastguard Worker string font_weight = 4; 80*d5c09012SAndroid Build Coastguard Worker 81*d5c09012SAndroid Build Coastguard Worker // Text style. Possible values are normal, italic, and oblique. 82*d5c09012SAndroid Build Coastguard Worker // https://www.w3schools.com/cssref/pr_font_font-style.asp 83*d5c09012SAndroid Build Coastguard Worker string text_style = 5; 84*d5c09012SAndroid Build Coastguard Worker 85*d5c09012SAndroid Build Coastguard Worker // Text decoration. Follows CSS standard. 86*d5c09012SAndroid Build Coastguard Worker // <text-decoration-line> <text-decoration-color> <text-decoration-style> 87*d5c09012SAndroid Build Coastguard Worker // https://www.w3schools.com/cssref/pr_text_text-decoration.asp 88*d5c09012SAndroid Build Coastguard Worker string text_decoration = 6; 89*d5c09012SAndroid Build Coastguard Worker 90*d5c09012SAndroid Build Coastguard Worker // Font size. 91*d5c09012SAndroid Build Coastguard Worker FontSize font_size = 7; 92*d5c09012SAndroid Build Coastguard Worker } 93*d5c09012SAndroid Build Coastguard Worker 94*d5c09012SAndroid Build Coastguard Worker // A page in a [Document][google.cloud.documentai.v1beta1.Document]. 95*d5c09012SAndroid Build Coastguard Worker message Page { 96*d5c09012SAndroid Build Coastguard Worker // Dimension for the page. 97*d5c09012SAndroid Build Coastguard Worker message Dimension { 98*d5c09012SAndroid Build Coastguard Worker // Page width. 99*d5c09012SAndroid Build Coastguard Worker float width = 1; 100*d5c09012SAndroid Build Coastguard Worker 101*d5c09012SAndroid Build Coastguard Worker // Page height. 102*d5c09012SAndroid Build Coastguard Worker float height = 2; 103*d5c09012SAndroid Build Coastguard Worker 104*d5c09012SAndroid Build Coastguard Worker // Dimension unit. 105*d5c09012SAndroid Build Coastguard Worker string unit = 3; 106*d5c09012SAndroid Build Coastguard Worker } 107*d5c09012SAndroid Build Coastguard Worker 108*d5c09012SAndroid Build Coastguard Worker // Visual element describing a layout unit on a page. 109*d5c09012SAndroid Build Coastguard Worker message Layout { 110*d5c09012SAndroid Build Coastguard Worker // Detected human reading orientation. 111*d5c09012SAndroid Build Coastguard Worker enum Orientation { 112*d5c09012SAndroid Build Coastguard Worker // Unspecified orientation. 113*d5c09012SAndroid Build Coastguard Worker ORIENTATION_UNSPECIFIED = 0; 114*d5c09012SAndroid Build Coastguard Worker 115*d5c09012SAndroid Build Coastguard Worker // Orientation is aligned with page up. 116*d5c09012SAndroid Build Coastguard Worker PAGE_UP = 1; 117*d5c09012SAndroid Build Coastguard Worker 118*d5c09012SAndroid Build Coastguard Worker // Orientation is aligned with page right. 119*d5c09012SAndroid Build Coastguard Worker // Turn the head 90 degrees clockwise from upright to read. 120*d5c09012SAndroid Build Coastguard Worker PAGE_RIGHT = 2; 121*d5c09012SAndroid Build Coastguard Worker 122*d5c09012SAndroid Build Coastguard Worker // Orientation is aligned with page down. 123*d5c09012SAndroid Build Coastguard Worker // Turn the head 180 degrees from upright to read. 124*d5c09012SAndroid Build Coastguard Worker PAGE_DOWN = 3; 125*d5c09012SAndroid Build Coastguard Worker 126*d5c09012SAndroid Build Coastguard Worker // Orientation is aligned with page left. 127*d5c09012SAndroid Build Coastguard Worker // Turn the head 90 degrees counterclockwise from upright to read. 128*d5c09012SAndroid Build Coastguard Worker PAGE_LEFT = 4; 129*d5c09012SAndroid Build Coastguard Worker } 130*d5c09012SAndroid Build Coastguard Worker 131*d5c09012SAndroid Build Coastguard Worker // Text anchor indexing into the 132*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 133*d5c09012SAndroid Build Coastguard Worker TextAnchor text_anchor = 1; 134*d5c09012SAndroid Build Coastguard Worker 135*d5c09012SAndroid Build Coastguard Worker // Confidence of the current 136*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] within 137*d5c09012SAndroid Build Coastguard Worker // context of the object this layout is for. e.g. confidence can be for a 138*d5c09012SAndroid Build Coastguard Worker // single token, a table, a visual element, etc. depending on context. 139*d5c09012SAndroid Build Coastguard Worker // Range [0, 1]. 140*d5c09012SAndroid Build Coastguard Worker float confidence = 2; 141*d5c09012SAndroid Build Coastguard Worker 142*d5c09012SAndroid Build Coastguard Worker // The bounding polygon for the 143*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout]. 144*d5c09012SAndroid Build Coastguard Worker BoundingPoly bounding_poly = 3; 145*d5c09012SAndroid Build Coastguard Worker 146*d5c09012SAndroid Build Coastguard Worker // Detected orientation for the 147*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout]. 148*d5c09012SAndroid Build Coastguard Worker Orientation orientation = 4; 149*d5c09012SAndroid Build Coastguard Worker } 150*d5c09012SAndroid Build Coastguard Worker 151*d5c09012SAndroid Build Coastguard Worker // A block has a set of lines (collected into paragraphs) that have a 152*d5c09012SAndroid Build Coastguard Worker // common line-spacing and orientation. 153*d5c09012SAndroid Build Coastguard Worker message Block { 154*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 155*d5c09012SAndroid Build Coastguard Worker // [Block][google.cloud.documentai.v1beta1.Document.Page.Block]. 156*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 157*d5c09012SAndroid Build Coastguard Worker 158*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 159*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 2; 160*d5c09012SAndroid Build Coastguard Worker } 161*d5c09012SAndroid Build Coastguard Worker 162*d5c09012SAndroid Build Coastguard Worker // A collection of lines that a human would perceive as a paragraph. 163*d5c09012SAndroid Build Coastguard Worker message Paragraph { 164*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 165*d5c09012SAndroid Build Coastguard Worker // [Paragraph][google.cloud.documentai.v1beta1.Document.Page.Paragraph]. 166*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 167*d5c09012SAndroid Build Coastguard Worker 168*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 169*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 2; 170*d5c09012SAndroid Build Coastguard Worker } 171*d5c09012SAndroid Build Coastguard Worker 172*d5c09012SAndroid Build Coastguard Worker // A collection of tokens that a human would perceive as a line. 173*d5c09012SAndroid Build Coastguard Worker // Does not cross column boundaries, can be horizontal, vertical, etc. 174*d5c09012SAndroid Build Coastguard Worker message Line { 175*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 176*d5c09012SAndroid Build Coastguard Worker // [Line][google.cloud.documentai.v1beta1.Document.Page.Line]. 177*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 178*d5c09012SAndroid Build Coastguard Worker 179*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 180*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 2; 181*d5c09012SAndroid Build Coastguard Worker } 182*d5c09012SAndroid Build Coastguard Worker 183*d5c09012SAndroid Build Coastguard Worker // A detected token. 184*d5c09012SAndroid Build Coastguard Worker message Token { 185*d5c09012SAndroid Build Coastguard Worker // Detected break at the end of a 186*d5c09012SAndroid Build Coastguard Worker // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. 187*d5c09012SAndroid Build Coastguard Worker message DetectedBreak { 188*d5c09012SAndroid Build Coastguard Worker // Enum to denote the type of break found. 189*d5c09012SAndroid Build Coastguard Worker enum Type { 190*d5c09012SAndroid Build Coastguard Worker // Unspecified break type. 191*d5c09012SAndroid Build Coastguard Worker TYPE_UNSPECIFIED = 0; 192*d5c09012SAndroid Build Coastguard Worker 193*d5c09012SAndroid Build Coastguard Worker // A single whitespace. 194*d5c09012SAndroid Build Coastguard Worker SPACE = 1; 195*d5c09012SAndroid Build Coastguard Worker 196*d5c09012SAndroid Build Coastguard Worker // A wider whitespace. 197*d5c09012SAndroid Build Coastguard Worker WIDE_SPACE = 2; 198*d5c09012SAndroid Build Coastguard Worker 199*d5c09012SAndroid Build Coastguard Worker // A hyphen that indicates that a token has been split across lines. 200*d5c09012SAndroid Build Coastguard Worker HYPHEN = 3; 201*d5c09012SAndroid Build Coastguard Worker } 202*d5c09012SAndroid Build Coastguard Worker 203*d5c09012SAndroid Build Coastguard Worker // Detected break type. 204*d5c09012SAndroid Build Coastguard Worker Type type = 1; 205*d5c09012SAndroid Build Coastguard Worker } 206*d5c09012SAndroid Build Coastguard Worker 207*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 208*d5c09012SAndroid Build Coastguard Worker // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. 209*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 210*d5c09012SAndroid Build Coastguard Worker 211*d5c09012SAndroid Build Coastguard Worker // Detected break at the end of a 212*d5c09012SAndroid Build Coastguard Worker // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. 213*d5c09012SAndroid Build Coastguard Worker DetectedBreak detected_break = 2; 214*d5c09012SAndroid Build Coastguard Worker 215*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 216*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 3; 217*d5c09012SAndroid Build Coastguard Worker } 218*d5c09012SAndroid Build Coastguard Worker 219*d5c09012SAndroid Build Coastguard Worker // Detected non-text visual elements e.g. checkbox, signature etc. on the 220*d5c09012SAndroid Build Coastguard Worker // page. 221*d5c09012SAndroid Build Coastguard Worker message VisualElement { 222*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 223*d5c09012SAndroid Build Coastguard Worker // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. 224*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 225*d5c09012SAndroid Build Coastguard Worker 226*d5c09012SAndroid Build Coastguard Worker // Type of the 227*d5c09012SAndroid Build Coastguard Worker // [VisualElement][google.cloud.documentai.v1beta1.Document.Page.VisualElement]. 228*d5c09012SAndroid Build Coastguard Worker string type = 2; 229*d5c09012SAndroid Build Coastguard Worker 230*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 231*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 3; 232*d5c09012SAndroid Build Coastguard Worker } 233*d5c09012SAndroid Build Coastguard Worker 234*d5c09012SAndroid Build Coastguard Worker // A table representation similar to HTML table structure. 235*d5c09012SAndroid Build Coastguard Worker message Table { 236*d5c09012SAndroid Build Coastguard Worker // A row of table cells. 237*d5c09012SAndroid Build Coastguard Worker message TableRow { 238*d5c09012SAndroid Build Coastguard Worker // Cells that make up this row. 239*d5c09012SAndroid Build Coastguard Worker repeated TableCell cells = 1; 240*d5c09012SAndroid Build Coastguard Worker } 241*d5c09012SAndroid Build Coastguard Worker 242*d5c09012SAndroid Build Coastguard Worker // A cell representation inside the table. 243*d5c09012SAndroid Build Coastguard Worker message TableCell { 244*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 245*d5c09012SAndroid Build Coastguard Worker // [TableCell][google.cloud.documentai.v1beta1.Document.Page.Table.TableCell]. 246*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 247*d5c09012SAndroid Build Coastguard Worker 248*d5c09012SAndroid Build Coastguard Worker // How many rows this cell spans. 249*d5c09012SAndroid Build Coastguard Worker int32 row_span = 2; 250*d5c09012SAndroid Build Coastguard Worker 251*d5c09012SAndroid Build Coastguard Worker // How many columns this cell spans. 252*d5c09012SAndroid Build Coastguard Worker int32 col_span = 3; 253*d5c09012SAndroid Build Coastguard Worker 254*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 255*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 4; 256*d5c09012SAndroid Build Coastguard Worker } 257*d5c09012SAndroid Build Coastguard Worker 258*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 259*d5c09012SAndroid Build Coastguard Worker // [Table][google.cloud.documentai.v1beta1.Document.Page.Table]. 260*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 261*d5c09012SAndroid Build Coastguard Worker 262*d5c09012SAndroid Build Coastguard Worker // Header rows of the table. 263*d5c09012SAndroid Build Coastguard Worker repeated TableRow header_rows = 2; 264*d5c09012SAndroid Build Coastguard Worker 265*d5c09012SAndroid Build Coastguard Worker // Body rows of the table. 266*d5c09012SAndroid Build Coastguard Worker repeated TableRow body_rows = 3; 267*d5c09012SAndroid Build Coastguard Worker 268*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 269*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 4; 270*d5c09012SAndroid Build Coastguard Worker } 271*d5c09012SAndroid Build Coastguard Worker 272*d5c09012SAndroid Build Coastguard Worker // A form field detected on the page. 273*d5c09012SAndroid Build Coastguard Worker message FormField { 274*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the 275*d5c09012SAndroid Build Coastguard Worker // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField] 276*d5c09012SAndroid Build Coastguard Worker // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc. 277*d5c09012SAndroid Build Coastguard Worker Layout field_name = 1; 278*d5c09012SAndroid Build Coastguard Worker 279*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the 280*d5c09012SAndroid Build Coastguard Worker // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField] 281*d5c09012SAndroid Build Coastguard Worker // value. 282*d5c09012SAndroid Build Coastguard Worker Layout field_value = 2; 283*d5c09012SAndroid Build Coastguard Worker 284*d5c09012SAndroid Build Coastguard Worker // A list of detected languages for name together with confidence. 285*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage name_detected_languages = 3; 286*d5c09012SAndroid Build Coastguard Worker 287*d5c09012SAndroid Build Coastguard Worker // A list of detected languages for value together with confidence. 288*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage value_detected_languages = 4; 289*d5c09012SAndroid Build Coastguard Worker } 290*d5c09012SAndroid Build Coastguard Worker 291*d5c09012SAndroid Build Coastguard Worker // Detected language for a structural component. 292*d5c09012SAndroid Build Coastguard Worker message DetectedLanguage { 293*d5c09012SAndroid Build Coastguard Worker // The BCP-47 language code, such as "en-US" or "sr-Latn". For more 294*d5c09012SAndroid Build Coastguard Worker // information, see 295*d5c09012SAndroid Build Coastguard Worker // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. 296*d5c09012SAndroid Build Coastguard Worker string language_code = 1; 297*d5c09012SAndroid Build Coastguard Worker 298*d5c09012SAndroid Build Coastguard Worker // Confidence of detected language. Range [0, 1]. 299*d5c09012SAndroid Build Coastguard Worker float confidence = 2; 300*d5c09012SAndroid Build Coastguard Worker } 301*d5c09012SAndroid Build Coastguard Worker 302*d5c09012SAndroid Build Coastguard Worker // 1-based index for current 303*d5c09012SAndroid Build Coastguard Worker // [Page][google.cloud.documentai.v1beta1.Document.Page] in a parent 304*d5c09012SAndroid Build Coastguard Worker // [Document][google.cloud.documentai.v1beta1.Document]. Useful when a page 305*d5c09012SAndroid Build Coastguard Worker // is taken out of a [Document][google.cloud.documentai.v1beta1.Document] 306*d5c09012SAndroid Build Coastguard Worker // for individual processing. 307*d5c09012SAndroid Build Coastguard Worker int32 page_number = 1; 308*d5c09012SAndroid Build Coastguard Worker 309*d5c09012SAndroid Build Coastguard Worker // Physical dimension of the page. 310*d5c09012SAndroid Build Coastguard Worker Dimension dimension = 2; 311*d5c09012SAndroid Build Coastguard Worker 312*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the 313*d5c09012SAndroid Build Coastguard Worker // page. 314*d5c09012SAndroid Build Coastguard Worker Layout layout = 3; 315*d5c09012SAndroid Build Coastguard Worker 316*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 317*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 4; 318*d5c09012SAndroid Build Coastguard Worker 319*d5c09012SAndroid Build Coastguard Worker // A list of visually detected text blocks on the page. 320*d5c09012SAndroid Build Coastguard Worker // A block has a set of lines (collected into paragraphs) that have a common 321*d5c09012SAndroid Build Coastguard Worker // line-spacing and orientation. 322*d5c09012SAndroid Build Coastguard Worker repeated Block blocks = 5; 323*d5c09012SAndroid Build Coastguard Worker 324*d5c09012SAndroid Build Coastguard Worker // A list of visually detected text paragraphs on the page. 325*d5c09012SAndroid Build Coastguard Worker // A collection of lines that a human would perceive as a paragraph. 326*d5c09012SAndroid Build Coastguard Worker repeated Paragraph paragraphs = 6; 327*d5c09012SAndroid Build Coastguard Worker 328*d5c09012SAndroid Build Coastguard Worker // A list of visually detected text lines on the page. 329*d5c09012SAndroid Build Coastguard Worker // A collection of tokens that a human would perceive as a line. 330*d5c09012SAndroid Build Coastguard Worker repeated Line lines = 7; 331*d5c09012SAndroid Build Coastguard Worker 332*d5c09012SAndroid Build Coastguard Worker // A list of visually detected tokens on the page. 333*d5c09012SAndroid Build Coastguard Worker repeated Token tokens = 8; 334*d5c09012SAndroid Build Coastguard Worker 335*d5c09012SAndroid Build Coastguard Worker // A list of detected non-text visual elements e.g. checkbox, 336*d5c09012SAndroid Build Coastguard Worker // signature etc. on the page. 337*d5c09012SAndroid Build Coastguard Worker repeated VisualElement visual_elements = 9; 338*d5c09012SAndroid Build Coastguard Worker 339*d5c09012SAndroid Build Coastguard Worker // A list of visually detected tables on the page. 340*d5c09012SAndroid Build Coastguard Worker repeated Table tables = 10; 341*d5c09012SAndroid Build Coastguard Worker 342*d5c09012SAndroid Build Coastguard Worker // A list of visually detected form fields on the page. 343*d5c09012SAndroid Build Coastguard Worker repeated FormField form_fields = 11; 344*d5c09012SAndroid Build Coastguard Worker } 345*d5c09012SAndroid Build Coastguard Worker 346*d5c09012SAndroid Build Coastguard Worker // A phrase in the text that is a known entity type, such as a person, an 347*d5c09012SAndroid Build Coastguard Worker // organization, or location. 348*d5c09012SAndroid Build Coastguard Worker message Entity { 349*d5c09012SAndroid Build Coastguard Worker // Provenance of the entity. 350*d5c09012SAndroid Build Coastguard Worker // Text anchor indexing into the 351*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 352*d5c09012SAndroid Build Coastguard Worker TextAnchor text_anchor = 1; 353*d5c09012SAndroid Build Coastguard Worker 354*d5c09012SAndroid Build Coastguard Worker // Required. Entity type from a schema e.g. `Address`. 355*d5c09012SAndroid Build Coastguard Worker string type = 2 [(google.api.field_behavior) = REQUIRED]; 356*d5c09012SAndroid Build Coastguard Worker 357*d5c09012SAndroid Build Coastguard Worker // Text value in the document e.g. `1600 Amphitheatre Pkwy`. 358*d5c09012SAndroid Build Coastguard Worker string mention_text = 3; 359*d5c09012SAndroid Build Coastguard Worker 360*d5c09012SAndroid Build Coastguard Worker // Canonical mention name. This will be a unique value in the entity list 361*d5c09012SAndroid Build Coastguard Worker // for this document. 362*d5c09012SAndroid Build Coastguard Worker string mention_id = 4; 363*d5c09012SAndroid Build Coastguard Worker } 364*d5c09012SAndroid Build Coastguard Worker 365*d5c09012SAndroid Build Coastguard Worker // Relationship between 366*d5c09012SAndroid Build Coastguard Worker // [Entities][google.cloud.documentai.v1beta1.Document.Entity]. 367*d5c09012SAndroid Build Coastguard Worker message EntityRelation { 368*d5c09012SAndroid Build Coastguard Worker // Subject entity mention_id. 369*d5c09012SAndroid Build Coastguard Worker string subject_id = 1; 370*d5c09012SAndroid Build Coastguard Worker 371*d5c09012SAndroid Build Coastguard Worker // Object entity mention_id. 372*d5c09012SAndroid Build Coastguard Worker string object_id = 2; 373*d5c09012SAndroid Build Coastguard Worker 374*d5c09012SAndroid Build Coastguard Worker // Relationship description. 375*d5c09012SAndroid Build Coastguard Worker string relation = 3; 376*d5c09012SAndroid Build Coastguard Worker } 377*d5c09012SAndroid Build Coastguard Worker 378*d5c09012SAndroid Build Coastguard Worker // Text reference indexing into the 379*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 380*d5c09012SAndroid Build Coastguard Worker message TextAnchor { 381*d5c09012SAndroid Build Coastguard Worker // A text segment in the 382*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta1.Document.text]. The 383*d5c09012SAndroid Build Coastguard Worker // indices may be out of bounds which indicate that the text extends into 384*d5c09012SAndroid Build Coastguard Worker // another document shard for large sharded documents. See 385*d5c09012SAndroid Build Coastguard Worker // [ShardInfo.text_offset][google.cloud.documentai.v1beta1.Document.ShardInfo.text_offset] 386*d5c09012SAndroid Build Coastguard Worker message TextSegment { 387*d5c09012SAndroid Build Coastguard Worker // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment] 388*d5c09012SAndroid Build Coastguard Worker // start UTF-8 char index in the 389*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 390*d5c09012SAndroid Build Coastguard Worker int64 start_index = 1; 391*d5c09012SAndroid Build Coastguard Worker 392*d5c09012SAndroid Build Coastguard Worker // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment] 393*d5c09012SAndroid Build Coastguard Worker // half open end UTF-8 char index in the 394*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 395*d5c09012SAndroid Build Coastguard Worker int64 end_index = 2; 396*d5c09012SAndroid Build Coastguard Worker } 397*d5c09012SAndroid Build Coastguard Worker 398*d5c09012SAndroid Build Coastguard Worker // The text segments from the 399*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 400*d5c09012SAndroid Build Coastguard Worker repeated TextSegment text_segments = 1; 401*d5c09012SAndroid Build Coastguard Worker } 402*d5c09012SAndroid Build Coastguard Worker 403*d5c09012SAndroid Build Coastguard Worker // Original source document from the user. 404*d5c09012SAndroid Build Coastguard Worker oneof source { 405*d5c09012SAndroid Build Coastguard Worker // Currently supports Google Cloud Storage URI of the form 406*d5c09012SAndroid Build Coastguard Worker // `gs://bucket_name/object_name`. Object versioning is not supported. 407*d5c09012SAndroid Build Coastguard Worker // See [Google Cloud Storage Request 408*d5c09012SAndroid Build Coastguard Worker // URIs](https://cloud.google.com/storage/docs/reference-uris) for more 409*d5c09012SAndroid Build Coastguard Worker // info. 410*d5c09012SAndroid Build Coastguard Worker string uri = 1; 411*d5c09012SAndroid Build Coastguard Worker 412*d5c09012SAndroid Build Coastguard Worker // Inline document content, represented as a stream of bytes. 413*d5c09012SAndroid Build Coastguard Worker // Note: As with all `bytes` fields, protobuffers use a pure binary 414*d5c09012SAndroid Build Coastguard Worker // representation, whereas JSON representations use base64. 415*d5c09012SAndroid Build Coastguard Worker bytes content = 2; 416*d5c09012SAndroid Build Coastguard Worker } 417*d5c09012SAndroid Build Coastguard Worker 418*d5c09012SAndroid Build Coastguard Worker // An IANA published MIME type (also referred to as media type). For more 419*d5c09012SAndroid Build Coastguard Worker // information, see 420*d5c09012SAndroid Build Coastguard Worker // https://www.iana.org/assignments/media-types/media-types.xhtml. 421*d5c09012SAndroid Build Coastguard Worker string mime_type = 3; 422*d5c09012SAndroid Build Coastguard Worker 423*d5c09012SAndroid Build Coastguard Worker // UTF-8 encoded text in reading order from the document. 424*d5c09012SAndroid Build Coastguard Worker string text = 4; 425*d5c09012SAndroid Build Coastguard Worker 426*d5c09012SAndroid Build Coastguard Worker // Styles for the 427*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 428*d5c09012SAndroid Build Coastguard Worker repeated Style text_styles = 5; 429*d5c09012SAndroid Build Coastguard Worker 430*d5c09012SAndroid Build Coastguard Worker // Visual page layout for the 431*d5c09012SAndroid Build Coastguard Worker // [Document][google.cloud.documentai.v1beta1.Document]. 432*d5c09012SAndroid Build Coastguard Worker repeated Page pages = 6; 433*d5c09012SAndroid Build Coastguard Worker 434*d5c09012SAndroid Build Coastguard Worker // A list of entities detected on 435*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta1.Document.text]. For 436*d5c09012SAndroid Build Coastguard Worker // document shards, entities in this list may cross shard boundaries. 437*d5c09012SAndroid Build Coastguard Worker repeated Entity entities = 7; 438*d5c09012SAndroid Build Coastguard Worker 439*d5c09012SAndroid Build Coastguard Worker // Relationship among 440*d5c09012SAndroid Build Coastguard Worker // [Document.entities][google.cloud.documentai.v1beta1.Document.entities]. 441*d5c09012SAndroid Build Coastguard Worker repeated EntityRelation entity_relations = 8; 442*d5c09012SAndroid Build Coastguard Worker 443*d5c09012SAndroid Build Coastguard Worker // Information about the sharding if this document is sharded part of a larger 444*d5c09012SAndroid Build Coastguard Worker // document. If the document is not sharded, this message is not specified. 445*d5c09012SAndroid Build Coastguard Worker ShardInfo shard_info = 9; 446*d5c09012SAndroid Build Coastguard Worker 447*d5c09012SAndroid Build Coastguard Worker // Any error that occurred while processing this document. 448*d5c09012SAndroid Build Coastguard Worker google.rpc.Status error = 10; 449*d5c09012SAndroid Build Coastguard Worker} 450