1*d5c09012SAndroid Build Coastguard Worker// Copyright 2023 Google LLC 2*d5c09012SAndroid Build Coastguard Worker// 3*d5c09012SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License"); 4*d5c09012SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License. 5*d5c09012SAndroid Build Coastguard Worker// You may obtain a copy of the License at 6*d5c09012SAndroid Build Coastguard Worker// 7*d5c09012SAndroid Build Coastguard Worker// http://www.apache.org/licenses/LICENSE-2.0 8*d5c09012SAndroid Build Coastguard Worker// 9*d5c09012SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software 10*d5c09012SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS, 11*d5c09012SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*d5c09012SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and 13*d5c09012SAndroid Build Coastguard Worker// limitations under the License. 14*d5c09012SAndroid Build Coastguard Worker 15*d5c09012SAndroid Build Coastguard Workersyntax = "proto3"; 16*d5c09012SAndroid Build Coastguard Worker 17*d5c09012SAndroid Build Coastguard Workerpackage google.cloud.documentai.v1beta3; 18*d5c09012SAndroid Build Coastguard Worker 19*d5c09012SAndroid Build Coastguard Workerimport "google/api/field_behavior.proto"; 20*d5c09012SAndroid Build Coastguard Workerimport "google/cloud/documentai/v1beta3/barcode.proto"; 21*d5c09012SAndroid Build Coastguard Workerimport "google/cloud/documentai/v1beta3/geometry.proto"; 22*d5c09012SAndroid Build Coastguard Workerimport "google/protobuf/timestamp.proto"; 23*d5c09012SAndroid Build Coastguard Workerimport "google/rpc/status.proto"; 24*d5c09012SAndroid Build Coastguard Workerimport "google/type/color.proto"; 25*d5c09012SAndroid Build Coastguard Workerimport "google/type/date.proto"; 26*d5c09012SAndroid Build Coastguard Workerimport "google/type/datetime.proto"; 27*d5c09012SAndroid Build Coastguard Workerimport "google/type/money.proto"; 28*d5c09012SAndroid Build Coastguard Workerimport "google/type/postal_address.proto"; 29*d5c09012SAndroid Build Coastguard Worker 30*d5c09012SAndroid Build Coastguard Workeroption csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3"; 31*d5c09012SAndroid Build Coastguard Workeroption go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb"; 32*d5c09012SAndroid Build Coastguard Workeroption java_multiple_files = true; 33*d5c09012SAndroid Build Coastguard Workeroption java_outer_classname = "DocumentProto"; 34*d5c09012SAndroid Build Coastguard Workeroption java_package = "com.google.cloud.documentai.v1beta3"; 35*d5c09012SAndroid Build Coastguard Workeroption php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3"; 36*d5c09012SAndroid Build Coastguard Workeroption ruby_package = "Google::Cloud::DocumentAI::V1beta3"; 37*d5c09012SAndroid Build Coastguard Worker 38*d5c09012SAndroid Build Coastguard Worker// Document represents the canonical document resource in Document AI. It is an 39*d5c09012SAndroid Build Coastguard Worker// interchange format that provides insights into documents and allows for 40*d5c09012SAndroid Build Coastguard Worker// collaboration between users and Document AI to iterate and optimize for 41*d5c09012SAndroid Build Coastguard Worker// quality. 42*d5c09012SAndroid Build Coastguard Workermessage Document { 43*d5c09012SAndroid Build Coastguard Worker // For a large document, sharding may be performed to produce several 44*d5c09012SAndroid Build Coastguard Worker // document shards. Each document shard contains this field to detail which 45*d5c09012SAndroid Build Coastguard Worker // shard it is. 46*d5c09012SAndroid Build Coastguard Worker message ShardInfo { 47*d5c09012SAndroid Build Coastguard Worker // The 0-based index of this shard. 48*d5c09012SAndroid Build Coastguard Worker int64 shard_index = 1; 49*d5c09012SAndroid Build Coastguard Worker 50*d5c09012SAndroid Build Coastguard Worker // Total number of shards. 51*d5c09012SAndroid Build Coastguard Worker int64 shard_count = 2; 52*d5c09012SAndroid Build Coastguard Worker 53*d5c09012SAndroid Build Coastguard Worker // The index of the first character in 54*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text] in the 55*d5c09012SAndroid Build Coastguard Worker // overall document global text. 56*d5c09012SAndroid Build Coastguard Worker int64 text_offset = 3; 57*d5c09012SAndroid Build Coastguard Worker } 58*d5c09012SAndroid Build Coastguard Worker 59*d5c09012SAndroid Build Coastguard Worker // Annotation for common text style attributes. This adheres to CSS 60*d5c09012SAndroid Build Coastguard Worker // conventions as much as possible. 61*d5c09012SAndroid Build Coastguard Worker message Style { 62*d5c09012SAndroid Build Coastguard Worker // Font size with unit. 63*d5c09012SAndroid Build Coastguard Worker message FontSize { 64*d5c09012SAndroid Build Coastguard Worker // Font size for the text. 65*d5c09012SAndroid Build Coastguard Worker float size = 1; 66*d5c09012SAndroid Build Coastguard Worker 67*d5c09012SAndroid Build Coastguard Worker // Unit for the font size. Follows CSS naming (such as `in`, `px`, and 68*d5c09012SAndroid Build Coastguard Worker // `pt`). 69*d5c09012SAndroid Build Coastguard Worker string unit = 2; 70*d5c09012SAndroid Build Coastguard Worker } 71*d5c09012SAndroid Build Coastguard Worker 72*d5c09012SAndroid Build Coastguard Worker // Text anchor indexing into the 73*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 74*d5c09012SAndroid Build Coastguard Worker TextAnchor text_anchor = 1; 75*d5c09012SAndroid Build Coastguard Worker 76*d5c09012SAndroid Build Coastguard Worker // Text color. 77*d5c09012SAndroid Build Coastguard Worker google.type.Color color = 2; 78*d5c09012SAndroid Build Coastguard Worker 79*d5c09012SAndroid Build Coastguard Worker // Text background color. 80*d5c09012SAndroid Build Coastguard Worker google.type.Color background_color = 3; 81*d5c09012SAndroid Build Coastguard Worker 82*d5c09012SAndroid Build Coastguard Worker // [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp). 83*d5c09012SAndroid Build Coastguard Worker // Possible values are `normal`, `bold`, `bolder`, and `lighter`. 84*d5c09012SAndroid Build Coastguard Worker string font_weight = 4; 85*d5c09012SAndroid Build Coastguard Worker 86*d5c09012SAndroid Build Coastguard Worker // [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp). 87*d5c09012SAndroid Build Coastguard Worker // Possible values are `normal`, `italic`, and `oblique`. 88*d5c09012SAndroid Build Coastguard Worker string text_style = 5; 89*d5c09012SAndroid Build Coastguard Worker 90*d5c09012SAndroid Build Coastguard Worker // [Text 91*d5c09012SAndroid Build Coastguard Worker // decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp). 92*d5c09012SAndroid Build Coastguard Worker // Follows CSS standard. <text-decoration-line> <text-decoration-color> 93*d5c09012SAndroid Build Coastguard Worker // <text-decoration-style> 94*d5c09012SAndroid Build Coastguard Worker string text_decoration = 6; 95*d5c09012SAndroid Build Coastguard Worker 96*d5c09012SAndroid Build Coastguard Worker // Font size. 97*d5c09012SAndroid Build Coastguard Worker FontSize font_size = 7; 98*d5c09012SAndroid Build Coastguard Worker 99*d5c09012SAndroid Build Coastguard Worker // Font family such as `Arial`, `Times New Roman`. 100*d5c09012SAndroid Build Coastguard Worker // https://www.w3schools.com/cssref/pr_font_font-family.asp 101*d5c09012SAndroid Build Coastguard Worker string font_family = 8; 102*d5c09012SAndroid Build Coastguard Worker } 103*d5c09012SAndroid Build Coastguard Worker 104*d5c09012SAndroid Build Coastguard Worker // A page in a [Document][google.cloud.documentai.v1beta3.Document]. 105*d5c09012SAndroid Build Coastguard Worker message Page { 106*d5c09012SAndroid Build Coastguard Worker // Dimension for the page. 107*d5c09012SAndroid Build Coastguard Worker message Dimension { 108*d5c09012SAndroid Build Coastguard Worker // Page width. 109*d5c09012SAndroid Build Coastguard Worker float width = 1; 110*d5c09012SAndroid Build Coastguard Worker 111*d5c09012SAndroid Build Coastguard Worker // Page height. 112*d5c09012SAndroid Build Coastguard Worker float height = 2; 113*d5c09012SAndroid Build Coastguard Worker 114*d5c09012SAndroid Build Coastguard Worker // Dimension unit. 115*d5c09012SAndroid Build Coastguard Worker string unit = 3; 116*d5c09012SAndroid Build Coastguard Worker } 117*d5c09012SAndroid Build Coastguard Worker 118*d5c09012SAndroid Build Coastguard Worker // Rendered image contents for this page. 119*d5c09012SAndroid Build Coastguard Worker message Image { 120*d5c09012SAndroid Build Coastguard Worker // Raw byte content of the image. 121*d5c09012SAndroid Build Coastguard Worker bytes content = 1; 122*d5c09012SAndroid Build Coastguard Worker 123*d5c09012SAndroid Build Coastguard Worker // Encoding [media type (MIME 124*d5c09012SAndroid Build Coastguard Worker // type)](https://www.iana.org/assignments/media-types/media-types.xhtml) 125*d5c09012SAndroid Build Coastguard Worker // for the image. 126*d5c09012SAndroid Build Coastguard Worker string mime_type = 2; 127*d5c09012SAndroid Build Coastguard Worker 128*d5c09012SAndroid Build Coastguard Worker // Width of the image in pixels. 129*d5c09012SAndroid Build Coastguard Worker int32 width = 3; 130*d5c09012SAndroid Build Coastguard Worker 131*d5c09012SAndroid Build Coastguard Worker // Height of the image in pixels. 132*d5c09012SAndroid Build Coastguard Worker int32 height = 4; 133*d5c09012SAndroid Build Coastguard Worker } 134*d5c09012SAndroid Build Coastguard Worker 135*d5c09012SAndroid Build Coastguard Worker // Representation for transformation matrix, intended to be compatible and 136*d5c09012SAndroid Build Coastguard Worker // used with OpenCV format for image manipulation. 137*d5c09012SAndroid Build Coastguard Worker message Matrix { 138*d5c09012SAndroid Build Coastguard Worker // Number of rows in the matrix. 139*d5c09012SAndroid Build Coastguard Worker int32 rows = 1; 140*d5c09012SAndroid Build Coastguard Worker 141*d5c09012SAndroid Build Coastguard Worker // Number of columns in the matrix. 142*d5c09012SAndroid Build Coastguard Worker int32 cols = 2; 143*d5c09012SAndroid Build Coastguard Worker 144*d5c09012SAndroid Build Coastguard Worker // This encodes information about what data type the matrix uses. 145*d5c09012SAndroid Build Coastguard Worker // For example, 0 (CV_8U) is an unsigned 8-bit image. For the full list 146*d5c09012SAndroid Build Coastguard Worker // of OpenCV primitive data types, please refer to 147*d5c09012SAndroid Build Coastguard Worker // https://docs.opencv.org/4.3.0/d1/d1b/group__core__hal__interface.html 148*d5c09012SAndroid Build Coastguard Worker int32 type = 3; 149*d5c09012SAndroid Build Coastguard Worker 150*d5c09012SAndroid Build Coastguard Worker // The matrix data. 151*d5c09012SAndroid Build Coastguard Worker bytes data = 4; 152*d5c09012SAndroid Build Coastguard Worker } 153*d5c09012SAndroid Build Coastguard Worker 154*d5c09012SAndroid Build Coastguard Worker // Visual element describing a layout unit on a page. 155*d5c09012SAndroid Build Coastguard Worker message Layout { 156*d5c09012SAndroid Build Coastguard Worker // Detected human reading orientation. 157*d5c09012SAndroid Build Coastguard Worker enum Orientation { 158*d5c09012SAndroid Build Coastguard Worker // Unspecified orientation. 159*d5c09012SAndroid Build Coastguard Worker ORIENTATION_UNSPECIFIED = 0; 160*d5c09012SAndroid Build Coastguard Worker 161*d5c09012SAndroid Build Coastguard Worker // Orientation is aligned with page up. 162*d5c09012SAndroid Build Coastguard Worker PAGE_UP = 1; 163*d5c09012SAndroid Build Coastguard Worker 164*d5c09012SAndroid Build Coastguard Worker // Orientation is aligned with page right. 165*d5c09012SAndroid Build Coastguard Worker // Turn the head 90 degrees clockwise from upright to read. 166*d5c09012SAndroid Build Coastguard Worker PAGE_RIGHT = 2; 167*d5c09012SAndroid Build Coastguard Worker 168*d5c09012SAndroid Build Coastguard Worker // Orientation is aligned with page down. 169*d5c09012SAndroid Build Coastguard Worker // Turn the head 180 degrees from upright to read. 170*d5c09012SAndroid Build Coastguard Worker PAGE_DOWN = 3; 171*d5c09012SAndroid Build Coastguard Worker 172*d5c09012SAndroid Build Coastguard Worker // Orientation is aligned with page left. 173*d5c09012SAndroid Build Coastguard Worker // Turn the head 90 degrees counterclockwise from upright to read. 174*d5c09012SAndroid Build Coastguard Worker PAGE_LEFT = 4; 175*d5c09012SAndroid Build Coastguard Worker } 176*d5c09012SAndroid Build Coastguard Worker 177*d5c09012SAndroid Build Coastguard Worker // Text anchor indexing into the 178*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 179*d5c09012SAndroid Build Coastguard Worker TextAnchor text_anchor = 1; 180*d5c09012SAndroid Build Coastguard Worker 181*d5c09012SAndroid Build Coastguard Worker // Confidence of the current 182*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] within 183*d5c09012SAndroid Build Coastguard Worker // context of the object this layout is for. e.g. confidence can be for a 184*d5c09012SAndroid Build Coastguard Worker // single token, a table, a visual element, etc. depending on context. 185*d5c09012SAndroid Build Coastguard Worker // Range `[0, 1]`. 186*d5c09012SAndroid Build Coastguard Worker float confidence = 2; 187*d5c09012SAndroid Build Coastguard Worker 188*d5c09012SAndroid Build Coastguard Worker // The bounding polygon for the 189*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout]. 190*d5c09012SAndroid Build Coastguard Worker BoundingPoly bounding_poly = 3; 191*d5c09012SAndroid Build Coastguard Worker 192*d5c09012SAndroid Build Coastguard Worker // Detected orientation for the 193*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout]. 194*d5c09012SAndroid Build Coastguard Worker Orientation orientation = 4; 195*d5c09012SAndroid Build Coastguard Worker } 196*d5c09012SAndroid Build Coastguard Worker 197*d5c09012SAndroid Build Coastguard Worker // A block has a set of lines (collected into paragraphs) that have a 198*d5c09012SAndroid Build Coastguard Worker // common line-spacing and orientation. 199*d5c09012SAndroid Build Coastguard Worker message Block { 200*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 201*d5c09012SAndroid Build Coastguard Worker // [Block][google.cloud.documentai.v1beta3.Document.Page.Block]. 202*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 203*d5c09012SAndroid Build Coastguard Worker 204*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 205*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 2; 206*d5c09012SAndroid Build Coastguard Worker 207*d5c09012SAndroid Build Coastguard Worker // The history of this annotation. 208*d5c09012SAndroid Build Coastguard Worker Provenance provenance = 3 [deprecated = true]; 209*d5c09012SAndroid Build Coastguard Worker } 210*d5c09012SAndroid Build Coastguard Worker 211*d5c09012SAndroid Build Coastguard Worker // A collection of lines that a human would perceive as a paragraph. 212*d5c09012SAndroid Build Coastguard Worker message Paragraph { 213*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 214*d5c09012SAndroid Build Coastguard Worker // [Paragraph][google.cloud.documentai.v1beta3.Document.Page.Paragraph]. 215*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 216*d5c09012SAndroid Build Coastguard Worker 217*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 218*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 2; 219*d5c09012SAndroid Build Coastguard Worker 220*d5c09012SAndroid Build Coastguard Worker // The history of this annotation. 221*d5c09012SAndroid Build Coastguard Worker Provenance provenance = 3 [deprecated = true]; 222*d5c09012SAndroid Build Coastguard Worker } 223*d5c09012SAndroid Build Coastguard Worker 224*d5c09012SAndroid Build Coastguard Worker // A collection of tokens that a human would perceive as a line. 225*d5c09012SAndroid Build Coastguard Worker // Does not cross column boundaries, can be horizontal, vertical, etc. 226*d5c09012SAndroid Build Coastguard Worker message Line { 227*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 228*d5c09012SAndroid Build Coastguard Worker // [Line][google.cloud.documentai.v1beta3.Document.Page.Line]. 229*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 230*d5c09012SAndroid Build Coastguard Worker 231*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 232*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 2; 233*d5c09012SAndroid Build Coastguard Worker 234*d5c09012SAndroid Build Coastguard Worker // The history of this annotation. 235*d5c09012SAndroid Build Coastguard Worker Provenance provenance = 3 [deprecated = true]; 236*d5c09012SAndroid Build Coastguard Worker } 237*d5c09012SAndroid Build Coastguard Worker 238*d5c09012SAndroid Build Coastguard Worker // A detected token. 239*d5c09012SAndroid Build Coastguard Worker message Token { 240*d5c09012SAndroid Build Coastguard Worker // Detected break at the end of a 241*d5c09012SAndroid Build Coastguard Worker // [Token][google.cloud.documentai.v1beta3.Document.Page.Token]. 242*d5c09012SAndroid Build Coastguard Worker message DetectedBreak { 243*d5c09012SAndroid Build Coastguard Worker // Enum to denote the type of break found. 244*d5c09012SAndroid Build Coastguard Worker enum Type { 245*d5c09012SAndroid Build Coastguard Worker // Unspecified break type. 246*d5c09012SAndroid Build Coastguard Worker TYPE_UNSPECIFIED = 0; 247*d5c09012SAndroid Build Coastguard Worker 248*d5c09012SAndroid Build Coastguard Worker // A single whitespace. 249*d5c09012SAndroid Build Coastguard Worker SPACE = 1; 250*d5c09012SAndroid Build Coastguard Worker 251*d5c09012SAndroid Build Coastguard Worker // A wider whitespace. 252*d5c09012SAndroid Build Coastguard Worker WIDE_SPACE = 2; 253*d5c09012SAndroid Build Coastguard Worker 254*d5c09012SAndroid Build Coastguard Worker // A hyphen that indicates that a token has been split across lines. 255*d5c09012SAndroid Build Coastguard Worker HYPHEN = 3; 256*d5c09012SAndroid Build Coastguard Worker } 257*d5c09012SAndroid Build Coastguard Worker 258*d5c09012SAndroid Build Coastguard Worker // Detected break type. 259*d5c09012SAndroid Build Coastguard Worker Type type = 1; 260*d5c09012SAndroid Build Coastguard Worker } 261*d5c09012SAndroid Build Coastguard Worker 262*d5c09012SAndroid Build Coastguard Worker // Font and other text style attributes. 263*d5c09012SAndroid Build Coastguard Worker message StyleInfo { 264*d5c09012SAndroid Build Coastguard Worker // Font size in points (`1` point is `¹⁄₇₂` inches). 265*d5c09012SAndroid Build Coastguard Worker int32 font_size = 1; 266*d5c09012SAndroid Build Coastguard Worker 267*d5c09012SAndroid Build Coastguard Worker // Font size in pixels, equal to _unrounded 268*d5c09012SAndroid Build Coastguard Worker // [font_size][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_size]_ 269*d5c09012SAndroid Build Coastguard Worker // * _resolution_ ÷ `72.0`. 270*d5c09012SAndroid Build Coastguard Worker double pixel_font_size = 2; 271*d5c09012SAndroid Build Coastguard Worker 272*d5c09012SAndroid Build Coastguard Worker // Letter spacing in points. 273*d5c09012SAndroid Build Coastguard Worker double letter_spacing = 3; 274*d5c09012SAndroid Build Coastguard Worker 275*d5c09012SAndroid Build Coastguard Worker // Name or style of the font. 276*d5c09012SAndroid Build Coastguard Worker string font_type = 4; 277*d5c09012SAndroid Build Coastguard Worker 278*d5c09012SAndroid Build Coastguard Worker // Whether the text is bold (equivalent to 279*d5c09012SAndroid Build Coastguard Worker // [font_weight][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_weight] 280*d5c09012SAndroid Build Coastguard Worker // is at least `700`). 281*d5c09012SAndroid Build Coastguard Worker bool bold = 5; 282*d5c09012SAndroid Build Coastguard Worker 283*d5c09012SAndroid Build Coastguard Worker // Whether the text is italic. 284*d5c09012SAndroid Build Coastguard Worker bool italic = 6; 285*d5c09012SAndroid Build Coastguard Worker 286*d5c09012SAndroid Build Coastguard Worker // Whether the text is underlined. 287*d5c09012SAndroid Build Coastguard Worker bool underlined = 7; 288*d5c09012SAndroid Build Coastguard Worker 289*d5c09012SAndroid Build Coastguard Worker // Whether the text is strikethrough. This feature is not supported yet. 290*d5c09012SAndroid Build Coastguard Worker bool strikeout = 8; 291*d5c09012SAndroid Build Coastguard Worker 292*d5c09012SAndroid Build Coastguard Worker // Whether the text is a subscript. This feature is not supported yet. 293*d5c09012SAndroid Build Coastguard Worker bool subscript = 9; 294*d5c09012SAndroid Build Coastguard Worker 295*d5c09012SAndroid Build Coastguard Worker // Whether the text is a superscript. This feature is not supported yet. 296*d5c09012SAndroid Build Coastguard Worker bool superscript = 10; 297*d5c09012SAndroid Build Coastguard Worker 298*d5c09012SAndroid Build Coastguard Worker // Whether the text is in small caps. This feature is not supported yet. 299*d5c09012SAndroid Build Coastguard Worker bool smallcaps = 11; 300*d5c09012SAndroid Build Coastguard Worker 301*d5c09012SAndroid Build Coastguard Worker // TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy). 302*d5c09012SAndroid Build Coastguard Worker // Normal is `400`, bold is `700`. 303*d5c09012SAndroid Build Coastguard Worker int32 font_weight = 12; 304*d5c09012SAndroid Build Coastguard Worker 305*d5c09012SAndroid Build Coastguard Worker // Whether the text is handwritten. 306*d5c09012SAndroid Build Coastguard Worker bool handwritten = 13; 307*d5c09012SAndroid Build Coastguard Worker 308*d5c09012SAndroid Build Coastguard Worker // Color of the text. 309*d5c09012SAndroid Build Coastguard Worker google.type.Color text_color = 14; 310*d5c09012SAndroid Build Coastguard Worker 311*d5c09012SAndroid Build Coastguard Worker // Color of the background. 312*d5c09012SAndroid Build Coastguard Worker google.type.Color background_color = 15; 313*d5c09012SAndroid Build Coastguard Worker } 314*d5c09012SAndroid Build Coastguard Worker 315*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 316*d5c09012SAndroid Build Coastguard Worker // [Token][google.cloud.documentai.v1beta3.Document.Page.Token]. 317*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 318*d5c09012SAndroid Build Coastguard Worker 319*d5c09012SAndroid Build Coastguard Worker // Detected break at the end of a 320*d5c09012SAndroid Build Coastguard Worker // [Token][google.cloud.documentai.v1beta3.Document.Page.Token]. 321*d5c09012SAndroid Build Coastguard Worker DetectedBreak detected_break = 2; 322*d5c09012SAndroid Build Coastguard Worker 323*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 324*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 3; 325*d5c09012SAndroid Build Coastguard Worker 326*d5c09012SAndroid Build Coastguard Worker // The history of this annotation. 327*d5c09012SAndroid Build Coastguard Worker Provenance provenance = 4 [deprecated = true]; 328*d5c09012SAndroid Build Coastguard Worker 329*d5c09012SAndroid Build Coastguard Worker // Text style attributes. 330*d5c09012SAndroid Build Coastguard Worker StyleInfo style_info = 5; 331*d5c09012SAndroid Build Coastguard Worker } 332*d5c09012SAndroid Build Coastguard Worker 333*d5c09012SAndroid Build Coastguard Worker // A detected symbol. 334*d5c09012SAndroid Build Coastguard Worker message Symbol { 335*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 336*d5c09012SAndroid Build Coastguard Worker // [Symbol][google.cloud.documentai.v1beta3.Document.Page.Symbol]. 337*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 338*d5c09012SAndroid Build Coastguard Worker 339*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 340*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 2; 341*d5c09012SAndroid Build Coastguard Worker } 342*d5c09012SAndroid Build Coastguard Worker 343*d5c09012SAndroid Build Coastguard Worker // Detected non-text visual elements e.g. checkbox, signature etc. on the 344*d5c09012SAndroid Build Coastguard Worker // page. 345*d5c09012SAndroid Build Coastguard Worker message VisualElement { 346*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 347*d5c09012SAndroid Build Coastguard Worker // [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement]. 348*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 349*d5c09012SAndroid Build Coastguard Worker 350*d5c09012SAndroid Build Coastguard Worker // Type of the 351*d5c09012SAndroid Build Coastguard Worker // [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement]. 352*d5c09012SAndroid Build Coastguard Worker string type = 2; 353*d5c09012SAndroid Build Coastguard Worker 354*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 355*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 3; 356*d5c09012SAndroid Build Coastguard Worker } 357*d5c09012SAndroid Build Coastguard Worker 358*d5c09012SAndroid Build Coastguard Worker // A table representation similar to HTML table structure. 359*d5c09012SAndroid Build Coastguard Worker message Table { 360*d5c09012SAndroid Build Coastguard Worker // A row of table cells. 361*d5c09012SAndroid Build Coastguard Worker message TableRow { 362*d5c09012SAndroid Build Coastguard Worker // Cells that make up this row. 363*d5c09012SAndroid Build Coastguard Worker repeated TableCell cells = 1; 364*d5c09012SAndroid Build Coastguard Worker } 365*d5c09012SAndroid Build Coastguard Worker 366*d5c09012SAndroid Build Coastguard Worker // A cell representation inside the table. 367*d5c09012SAndroid Build Coastguard Worker message TableCell { 368*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 369*d5c09012SAndroid Build Coastguard Worker // [TableCell][google.cloud.documentai.v1beta3.Document.Page.Table.TableCell]. 370*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 371*d5c09012SAndroid Build Coastguard Worker 372*d5c09012SAndroid Build Coastguard Worker // How many rows this cell spans. 373*d5c09012SAndroid Build Coastguard Worker int32 row_span = 2; 374*d5c09012SAndroid Build Coastguard Worker 375*d5c09012SAndroid Build Coastguard Worker // How many columns this cell spans. 376*d5c09012SAndroid Build Coastguard Worker int32 col_span = 3; 377*d5c09012SAndroid Build Coastguard Worker 378*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 379*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 4; 380*d5c09012SAndroid Build Coastguard Worker } 381*d5c09012SAndroid Build Coastguard Worker 382*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 383*d5c09012SAndroid Build Coastguard Worker // [Table][google.cloud.documentai.v1beta3.Document.Page.Table]. 384*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 385*d5c09012SAndroid Build Coastguard Worker 386*d5c09012SAndroid Build Coastguard Worker // Header rows of the table. 387*d5c09012SAndroid Build Coastguard Worker repeated TableRow header_rows = 2; 388*d5c09012SAndroid Build Coastguard Worker 389*d5c09012SAndroid Build Coastguard Worker // Body rows of the table. 390*d5c09012SAndroid Build Coastguard Worker repeated TableRow body_rows = 3; 391*d5c09012SAndroid Build Coastguard Worker 392*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 393*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 4; 394*d5c09012SAndroid Build Coastguard Worker 395*d5c09012SAndroid Build Coastguard Worker // The history of this table. 396*d5c09012SAndroid Build Coastguard Worker Provenance provenance = 5 [deprecated = true]; 397*d5c09012SAndroid Build Coastguard Worker } 398*d5c09012SAndroid Build Coastguard Worker 399*d5c09012SAndroid Build Coastguard Worker // A form field detected on the page. 400*d5c09012SAndroid Build Coastguard Worker message FormField { 401*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the 402*d5c09012SAndroid Build Coastguard Worker // [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField] 403*d5c09012SAndroid Build Coastguard Worker // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc. 404*d5c09012SAndroid Build Coastguard Worker Layout field_name = 1; 405*d5c09012SAndroid Build Coastguard Worker 406*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the 407*d5c09012SAndroid Build Coastguard Worker // [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField] 408*d5c09012SAndroid Build Coastguard Worker // value. 409*d5c09012SAndroid Build Coastguard Worker Layout field_value = 2; 410*d5c09012SAndroid Build Coastguard Worker 411*d5c09012SAndroid Build Coastguard Worker // A list of detected languages for name together with confidence. 412*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage name_detected_languages = 3; 413*d5c09012SAndroid Build Coastguard Worker 414*d5c09012SAndroid Build Coastguard Worker // A list of detected languages for value together with confidence. 415*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage value_detected_languages = 4; 416*d5c09012SAndroid Build Coastguard Worker 417*d5c09012SAndroid Build Coastguard Worker // If the value is non-textual, this field represents the type. Current 418*d5c09012SAndroid Build Coastguard Worker // valid values are: 419*d5c09012SAndroid Build Coastguard Worker // 420*d5c09012SAndroid Build Coastguard Worker // - blank (this indicates the `field_value` is normal text) 421*d5c09012SAndroid Build Coastguard Worker // - `unfilled_checkbox` 422*d5c09012SAndroid Build Coastguard Worker // - `filled_checkbox` 423*d5c09012SAndroid Build Coastguard Worker string value_type = 5; 424*d5c09012SAndroid Build Coastguard Worker 425*d5c09012SAndroid Build Coastguard Worker // Created for Labeling UI to export key text. 426*d5c09012SAndroid Build Coastguard Worker // If corrections were made to the text identified by the 427*d5c09012SAndroid Build Coastguard Worker // `field_name.text_anchor`, this field will contain the correction. 428*d5c09012SAndroid Build Coastguard Worker string corrected_key_text = 6; 429*d5c09012SAndroid Build Coastguard Worker 430*d5c09012SAndroid Build Coastguard Worker // Created for Labeling UI to export value text. 431*d5c09012SAndroid Build Coastguard Worker // If corrections were made to the text identified by the 432*d5c09012SAndroid Build Coastguard Worker // `field_value.text_anchor`, this field will contain the correction. 433*d5c09012SAndroid Build Coastguard Worker string corrected_value_text = 7; 434*d5c09012SAndroid Build Coastguard Worker 435*d5c09012SAndroid Build Coastguard Worker // The history of this annotation. 436*d5c09012SAndroid Build Coastguard Worker Provenance provenance = 8; 437*d5c09012SAndroid Build Coastguard Worker } 438*d5c09012SAndroid Build Coastguard Worker 439*d5c09012SAndroid Build Coastguard Worker // A detected barcode. 440*d5c09012SAndroid Build Coastguard Worker message DetectedBarcode { 441*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 442*d5c09012SAndroid Build Coastguard Worker // [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode]. 443*d5c09012SAndroid Build Coastguard Worker Layout layout = 1; 444*d5c09012SAndroid Build Coastguard Worker 445*d5c09012SAndroid Build Coastguard Worker // Detailed barcode information of the 446*d5c09012SAndroid Build Coastguard Worker // [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode]. 447*d5c09012SAndroid Build Coastguard Worker Barcode barcode = 2; 448*d5c09012SAndroid Build Coastguard Worker } 449*d5c09012SAndroid Build Coastguard Worker 450*d5c09012SAndroid Build Coastguard Worker // Detected language for a structural component. 451*d5c09012SAndroid Build Coastguard Worker message DetectedLanguage { 452*d5c09012SAndroid Build Coastguard Worker // The [BCP-47 language 453*d5c09012SAndroid Build Coastguard Worker // code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier), 454*d5c09012SAndroid Build Coastguard Worker // such as `en-US` or `sr-Latn`. 455*d5c09012SAndroid Build Coastguard Worker string language_code = 1; 456*d5c09012SAndroid Build Coastguard Worker 457*d5c09012SAndroid Build Coastguard Worker // Confidence of detected language. Range `[0, 1]`. 458*d5c09012SAndroid Build Coastguard Worker float confidence = 2; 459*d5c09012SAndroid Build Coastguard Worker } 460*d5c09012SAndroid Build Coastguard Worker 461*d5c09012SAndroid Build Coastguard Worker // Image quality scores for the page image. 462*d5c09012SAndroid Build Coastguard Worker message ImageQualityScores { 463*d5c09012SAndroid Build Coastguard Worker // Image Quality Defects 464*d5c09012SAndroid Build Coastguard Worker message DetectedDefect { 465*d5c09012SAndroid Build Coastguard Worker // Name of the defect type. Supported values are: 466*d5c09012SAndroid Build Coastguard Worker // 467*d5c09012SAndroid Build Coastguard Worker // - `quality/defect_blurry` 468*d5c09012SAndroid Build Coastguard Worker // - `quality/defect_noisy` 469*d5c09012SAndroid Build Coastguard Worker // - `quality/defect_dark` 470*d5c09012SAndroid Build Coastguard Worker // - `quality/defect_faint` 471*d5c09012SAndroid Build Coastguard Worker // - `quality/defect_text_too_small` 472*d5c09012SAndroid Build Coastguard Worker // - `quality/defect_document_cutoff` 473*d5c09012SAndroid Build Coastguard Worker // - `quality/defect_text_cutoff` 474*d5c09012SAndroid Build Coastguard Worker // - `quality/defect_glare` 475*d5c09012SAndroid Build Coastguard Worker string type = 1; 476*d5c09012SAndroid Build Coastguard Worker 477*d5c09012SAndroid Build Coastguard Worker // Confidence of detected defect. Range `[0, 1]` where `1` indicates 478*d5c09012SAndroid Build Coastguard Worker // strong confidence that the defect exists. 479*d5c09012SAndroid Build Coastguard Worker float confidence = 2; 480*d5c09012SAndroid Build Coastguard Worker } 481*d5c09012SAndroid Build Coastguard Worker 482*d5c09012SAndroid Build Coastguard Worker // The overall quality score. Range `[0, 1]` where `1` is perfect quality. 483*d5c09012SAndroid Build Coastguard Worker float quality_score = 1; 484*d5c09012SAndroid Build Coastguard Worker 485*d5c09012SAndroid Build Coastguard Worker // A list of detected defects. 486*d5c09012SAndroid Build Coastguard Worker repeated DetectedDefect detected_defects = 2; 487*d5c09012SAndroid Build Coastguard Worker } 488*d5c09012SAndroid Build Coastguard Worker 489*d5c09012SAndroid Build Coastguard Worker // 1-based index for current 490*d5c09012SAndroid Build Coastguard Worker // [Page][google.cloud.documentai.v1beta3.Document.Page] in a parent 491*d5c09012SAndroid Build Coastguard Worker // [Document][google.cloud.documentai.v1beta3.Document]. Useful when a page 492*d5c09012SAndroid Build Coastguard Worker // is taken out of a [Document][google.cloud.documentai.v1beta3.Document] 493*d5c09012SAndroid Build Coastguard Worker // for individual processing. 494*d5c09012SAndroid Build Coastguard Worker int32 page_number = 1; 495*d5c09012SAndroid Build Coastguard Worker 496*d5c09012SAndroid Build Coastguard Worker // Rendered image for this page. This image is preprocessed to remove any 497*d5c09012SAndroid Build Coastguard Worker // skew, rotation, and distortions such that the annotation bounding boxes 498*d5c09012SAndroid Build Coastguard Worker // can be upright and axis-aligned. 499*d5c09012SAndroid Build Coastguard Worker Image image = 13; 500*d5c09012SAndroid Build Coastguard Worker 501*d5c09012SAndroid Build Coastguard Worker // Transformation matrices that were applied to the original document image 502*d5c09012SAndroid Build Coastguard Worker // to produce 503*d5c09012SAndroid Build Coastguard Worker // [Page.image][google.cloud.documentai.v1beta3.Document.Page.image]. 504*d5c09012SAndroid Build Coastguard Worker repeated Matrix transforms = 14; 505*d5c09012SAndroid Build Coastguard Worker 506*d5c09012SAndroid Build Coastguard Worker // Physical dimension of the page. 507*d5c09012SAndroid Build Coastguard Worker Dimension dimension = 2; 508*d5c09012SAndroid Build Coastguard Worker 509*d5c09012SAndroid Build Coastguard Worker // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the 510*d5c09012SAndroid Build Coastguard Worker // page. 511*d5c09012SAndroid Build Coastguard Worker Layout layout = 3; 512*d5c09012SAndroid Build Coastguard Worker 513*d5c09012SAndroid Build Coastguard Worker // A list of detected languages together with confidence. 514*d5c09012SAndroid Build Coastguard Worker repeated DetectedLanguage detected_languages = 4; 515*d5c09012SAndroid Build Coastguard Worker 516*d5c09012SAndroid Build Coastguard Worker // A list of visually detected text blocks on the page. 517*d5c09012SAndroid Build Coastguard Worker // A block has a set of lines (collected into paragraphs) that have a common 518*d5c09012SAndroid Build Coastguard Worker // line-spacing and orientation. 519*d5c09012SAndroid Build Coastguard Worker repeated Block blocks = 5; 520*d5c09012SAndroid Build Coastguard Worker 521*d5c09012SAndroid Build Coastguard Worker // A list of visually detected text paragraphs on the page. 522*d5c09012SAndroid Build Coastguard Worker // A collection of lines that a human would perceive as a paragraph. 523*d5c09012SAndroid Build Coastguard Worker repeated Paragraph paragraphs = 6; 524*d5c09012SAndroid Build Coastguard Worker 525*d5c09012SAndroid Build Coastguard Worker // A list of visually detected text lines on the page. 526*d5c09012SAndroid Build Coastguard Worker // A collection of tokens that a human would perceive as a line. 527*d5c09012SAndroid Build Coastguard Worker repeated Line lines = 7; 528*d5c09012SAndroid Build Coastguard Worker 529*d5c09012SAndroid Build Coastguard Worker // A list of visually detected tokens on the page. 530*d5c09012SAndroid Build Coastguard Worker repeated Token tokens = 8; 531*d5c09012SAndroid Build Coastguard Worker 532*d5c09012SAndroid Build Coastguard Worker // A list of detected non-text visual elements e.g. checkbox, 533*d5c09012SAndroid Build Coastguard Worker // signature etc. on the page. 534*d5c09012SAndroid Build Coastguard Worker repeated VisualElement visual_elements = 9; 535*d5c09012SAndroid Build Coastguard Worker 536*d5c09012SAndroid Build Coastguard Worker // A list of visually detected tables on the page. 537*d5c09012SAndroid Build Coastguard Worker repeated Table tables = 10; 538*d5c09012SAndroid Build Coastguard Worker 539*d5c09012SAndroid Build Coastguard Worker // A list of visually detected form fields on the page. 540*d5c09012SAndroid Build Coastguard Worker repeated FormField form_fields = 11; 541*d5c09012SAndroid Build Coastguard Worker 542*d5c09012SAndroid Build Coastguard Worker // A list of visually detected symbols on the page. 543*d5c09012SAndroid Build Coastguard Worker repeated Symbol symbols = 12; 544*d5c09012SAndroid Build Coastguard Worker 545*d5c09012SAndroid Build Coastguard Worker // A list of detected barcodes. 546*d5c09012SAndroid Build Coastguard Worker repeated DetectedBarcode detected_barcodes = 15; 547*d5c09012SAndroid Build Coastguard Worker 548*d5c09012SAndroid Build Coastguard Worker // Image quality scores. 549*d5c09012SAndroid Build Coastguard Worker ImageQualityScores image_quality_scores = 17; 550*d5c09012SAndroid Build Coastguard Worker 551*d5c09012SAndroid Build Coastguard Worker // The history of this page. 552*d5c09012SAndroid Build Coastguard Worker Provenance provenance = 16 [deprecated = true]; 553*d5c09012SAndroid Build Coastguard Worker } 554*d5c09012SAndroid Build Coastguard Worker 555*d5c09012SAndroid Build Coastguard Worker // An entity that could be a phrase in the text or a property that belongs to 556*d5c09012SAndroid Build Coastguard Worker // the document. It is a known entity type, such as a person, an organization, 557*d5c09012SAndroid Build Coastguard Worker // or location. 558*d5c09012SAndroid Build Coastguard Worker message Entity { 559*d5c09012SAndroid Build Coastguard Worker // Parsed and normalized entity value. 560*d5c09012SAndroid Build Coastguard Worker message NormalizedValue { 561*d5c09012SAndroid Build Coastguard Worker // An optional structured entity value. 562*d5c09012SAndroid Build Coastguard Worker // Must match entity type defined in schema if 563*d5c09012SAndroid Build Coastguard Worker // known. If this field is present, the `text` field could also be 564*d5c09012SAndroid Build Coastguard Worker // populated. 565*d5c09012SAndroid Build Coastguard Worker oneof structured_value { 566*d5c09012SAndroid Build Coastguard Worker // Money value. See also: 567*d5c09012SAndroid Build Coastguard Worker // https://github.com/googleapis/googleapis/blob/master/google/type/money.proto 568*d5c09012SAndroid Build Coastguard Worker google.type.Money money_value = 2; 569*d5c09012SAndroid Build Coastguard Worker 570*d5c09012SAndroid Build Coastguard Worker // Date value. Includes year, month, day. See also: 571*d5c09012SAndroid Build Coastguard Worker // https://github.com/googleapis/googleapis/blob/master/google/type/date.proto 572*d5c09012SAndroid Build Coastguard Worker google.type.Date date_value = 3; 573*d5c09012SAndroid Build Coastguard Worker 574*d5c09012SAndroid Build Coastguard Worker // DateTime value. Includes date, time, and timezone. See also: 575*d5c09012SAndroid Build Coastguard Worker // https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto 576*d5c09012SAndroid Build Coastguard Worker google.type.DateTime datetime_value = 4; 577*d5c09012SAndroid Build Coastguard Worker 578*d5c09012SAndroid Build Coastguard Worker // Postal address. See also: 579*d5c09012SAndroid Build Coastguard Worker // https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto 580*d5c09012SAndroid Build Coastguard Worker google.type.PostalAddress address_value = 5; 581*d5c09012SAndroid Build Coastguard Worker 582*d5c09012SAndroid Build Coastguard Worker // Boolean value. Can be used for entities with binary values, or for 583*d5c09012SAndroid Build Coastguard Worker // checkboxes. 584*d5c09012SAndroid Build Coastguard Worker bool boolean_value = 6; 585*d5c09012SAndroid Build Coastguard Worker 586*d5c09012SAndroid Build Coastguard Worker // Integer value. 587*d5c09012SAndroid Build Coastguard Worker int32 integer_value = 7; 588*d5c09012SAndroid Build Coastguard Worker 589*d5c09012SAndroid Build Coastguard Worker // Float value. 590*d5c09012SAndroid Build Coastguard Worker float float_value = 8; 591*d5c09012SAndroid Build Coastguard Worker } 592*d5c09012SAndroid Build Coastguard Worker 593*d5c09012SAndroid Build Coastguard Worker // Optional. An optional field to store a normalized string. 594*d5c09012SAndroid Build Coastguard Worker // For some entity types, one of respective `structured_value` fields may 595*d5c09012SAndroid Build Coastguard Worker // also be populated. Also not all the types of `structured_value` will be 596*d5c09012SAndroid Build Coastguard Worker // normalized. For example, some processors may not generate `float` 597*d5c09012SAndroid Build Coastguard Worker // or `integer` normalized text by default. 598*d5c09012SAndroid Build Coastguard Worker // 599*d5c09012SAndroid Build Coastguard Worker // Below are sample formats mapped to structured values. 600*d5c09012SAndroid Build Coastguard Worker // 601*d5c09012SAndroid Build Coastguard Worker // - Money/Currency type (`money_value`) is in the ISO 4217 text format. 602*d5c09012SAndroid Build Coastguard Worker // - Date type (`date_value`) is in the ISO 8601 text format. 603*d5c09012SAndroid Build Coastguard Worker // - Datetime type (`datetime_value`) is in the ISO 8601 text format. 604*d5c09012SAndroid Build Coastguard Worker string text = 1 [(google.api.field_behavior) = OPTIONAL]; 605*d5c09012SAndroid Build Coastguard Worker } 606*d5c09012SAndroid Build Coastguard Worker 607*d5c09012SAndroid Build Coastguard Worker // Optional. Provenance of the entity. 608*d5c09012SAndroid Build Coastguard Worker // Text anchor indexing into the 609*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 610*d5c09012SAndroid Build Coastguard Worker TextAnchor text_anchor = 1 [(google.api.field_behavior) = OPTIONAL]; 611*d5c09012SAndroid Build Coastguard Worker 612*d5c09012SAndroid Build Coastguard Worker // Required. Entity type from a schema e.g. `Address`. 613*d5c09012SAndroid Build Coastguard Worker string type = 2 [(google.api.field_behavior) = REQUIRED]; 614*d5c09012SAndroid Build Coastguard Worker 615*d5c09012SAndroid Build Coastguard Worker // Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`. 616*d5c09012SAndroid Build Coastguard Worker string mention_text = 3 [(google.api.field_behavior) = OPTIONAL]; 617*d5c09012SAndroid Build Coastguard Worker 618*d5c09012SAndroid Build Coastguard Worker // Optional. Deprecated. Use `id` field instead. 619*d5c09012SAndroid Build Coastguard Worker string mention_id = 4 [(google.api.field_behavior) = OPTIONAL]; 620*d5c09012SAndroid Build Coastguard Worker 621*d5c09012SAndroid Build Coastguard Worker // Optional. Confidence of detected Schema entity. Range `[0, 1]`. 622*d5c09012SAndroid Build Coastguard Worker float confidence = 5 [(google.api.field_behavior) = OPTIONAL]; 623*d5c09012SAndroid Build Coastguard Worker 624*d5c09012SAndroid Build Coastguard Worker // Optional. Represents the provenance of this entity wrt. the location on 625*d5c09012SAndroid Build Coastguard Worker // the page where it was found. 626*d5c09012SAndroid Build Coastguard Worker PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL]; 627*d5c09012SAndroid Build Coastguard Worker 628*d5c09012SAndroid Build Coastguard Worker // Optional. Canonical id. This will be a unique value in the entity list 629*d5c09012SAndroid Build Coastguard Worker // for this document. 630*d5c09012SAndroid Build Coastguard Worker string id = 7 [(google.api.field_behavior) = OPTIONAL]; 631*d5c09012SAndroid Build Coastguard Worker 632*d5c09012SAndroid Build Coastguard Worker // Optional. Normalized entity value. Absent if the extracted value could 633*d5c09012SAndroid Build Coastguard Worker // not be converted or the type (e.g. address) is not supported for certain 634*d5c09012SAndroid Build Coastguard Worker // parsers. This field is also only populated for certain supported document 635*d5c09012SAndroid Build Coastguard Worker // types. 636*d5c09012SAndroid Build Coastguard Worker NormalizedValue normalized_value = 9 637*d5c09012SAndroid Build Coastguard Worker [(google.api.field_behavior) = OPTIONAL]; 638*d5c09012SAndroid Build Coastguard Worker 639*d5c09012SAndroid Build Coastguard Worker // Optional. Entities can be nested to form a hierarchical data structure 640*d5c09012SAndroid Build Coastguard Worker // representing the content in the document. 641*d5c09012SAndroid Build Coastguard Worker repeated Entity properties = 10 [(google.api.field_behavior) = OPTIONAL]; 642*d5c09012SAndroid Build Coastguard Worker 643*d5c09012SAndroid Build Coastguard Worker // Optional. The history of this annotation. 644*d5c09012SAndroid Build Coastguard Worker Provenance provenance = 11 [(google.api.field_behavior) = OPTIONAL]; 645*d5c09012SAndroid Build Coastguard Worker 646*d5c09012SAndroid Build Coastguard Worker // Optional. Whether the entity will be redacted for de-identification 647*d5c09012SAndroid Build Coastguard Worker // purposes. 648*d5c09012SAndroid Build Coastguard Worker bool redacted = 12 [(google.api.field_behavior) = OPTIONAL]; 649*d5c09012SAndroid Build Coastguard Worker } 650*d5c09012SAndroid Build Coastguard Worker 651*d5c09012SAndroid Build Coastguard Worker // Relationship between 652*d5c09012SAndroid Build Coastguard Worker // [Entities][google.cloud.documentai.v1beta3.Document.Entity]. 653*d5c09012SAndroid Build Coastguard Worker message EntityRelation { 654*d5c09012SAndroid Build Coastguard Worker // Subject entity id. 655*d5c09012SAndroid Build Coastguard Worker string subject_id = 1; 656*d5c09012SAndroid Build Coastguard Worker 657*d5c09012SAndroid Build Coastguard Worker // Object entity id. 658*d5c09012SAndroid Build Coastguard Worker string object_id = 2; 659*d5c09012SAndroid Build Coastguard Worker 660*d5c09012SAndroid Build Coastguard Worker // Relationship description. 661*d5c09012SAndroid Build Coastguard Worker string relation = 3; 662*d5c09012SAndroid Build Coastguard Worker } 663*d5c09012SAndroid Build Coastguard Worker 664*d5c09012SAndroid Build Coastguard Worker // Text reference indexing into the 665*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 666*d5c09012SAndroid Build Coastguard Worker message TextAnchor { 667*d5c09012SAndroid Build Coastguard Worker // A text segment in the 668*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. The 669*d5c09012SAndroid Build Coastguard Worker // indices may be out of bounds which indicate that the text extends into 670*d5c09012SAndroid Build Coastguard Worker // another document shard for large sharded documents. See 671*d5c09012SAndroid Build Coastguard Worker // [ShardInfo.text_offset][google.cloud.documentai.v1beta3.Document.ShardInfo.text_offset] 672*d5c09012SAndroid Build Coastguard Worker message TextSegment { 673*d5c09012SAndroid Build Coastguard Worker // [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment] 674*d5c09012SAndroid Build Coastguard Worker // start UTF-8 char index in the 675*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 676*d5c09012SAndroid Build Coastguard Worker int64 start_index = 1; 677*d5c09012SAndroid Build Coastguard Worker 678*d5c09012SAndroid Build Coastguard Worker // [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment] 679*d5c09012SAndroid Build Coastguard Worker // half open end UTF-8 char index in the 680*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 681*d5c09012SAndroid Build Coastguard Worker int64 end_index = 2; 682*d5c09012SAndroid Build Coastguard Worker } 683*d5c09012SAndroid Build Coastguard Worker 684*d5c09012SAndroid Build Coastguard Worker // The text segments from the 685*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 686*d5c09012SAndroid Build Coastguard Worker repeated TextSegment text_segments = 1; 687*d5c09012SAndroid Build Coastguard Worker 688*d5c09012SAndroid Build Coastguard Worker // Contains the content of the text span so that users do 689*d5c09012SAndroid Build Coastguard Worker // not have to look it up in the text_segments. It is always 690*d5c09012SAndroid Build Coastguard Worker // populated for formFields. 691*d5c09012SAndroid Build Coastguard Worker string content = 2; 692*d5c09012SAndroid Build Coastguard Worker } 693*d5c09012SAndroid Build Coastguard Worker 694*d5c09012SAndroid Build Coastguard Worker // Referencing the visual context of the entity in the 695*d5c09012SAndroid Build Coastguard Worker // [Document.pages][google.cloud.documentai.v1beta3.Document.pages]. Page 696*d5c09012SAndroid Build Coastguard Worker // anchors can be cross-page, consist of multiple bounding polygons and 697*d5c09012SAndroid Build Coastguard Worker // optionally reference specific layout element types. 698*d5c09012SAndroid Build Coastguard Worker message PageAnchor { 699*d5c09012SAndroid Build Coastguard Worker // Represents a weak reference to a page element within a document. 700*d5c09012SAndroid Build Coastguard Worker message PageRef { 701*d5c09012SAndroid Build Coastguard Worker // The type of layout that is being referenced. 702*d5c09012SAndroid Build Coastguard Worker enum LayoutType { 703*d5c09012SAndroid Build Coastguard Worker // Layout Unspecified. 704*d5c09012SAndroid Build Coastguard Worker LAYOUT_TYPE_UNSPECIFIED = 0; 705*d5c09012SAndroid Build Coastguard Worker 706*d5c09012SAndroid Build Coastguard Worker // References a 707*d5c09012SAndroid Build Coastguard Worker // [Page.blocks][google.cloud.documentai.v1beta3.Document.Page.blocks] 708*d5c09012SAndroid Build Coastguard Worker // element. 709*d5c09012SAndroid Build Coastguard Worker BLOCK = 1; 710*d5c09012SAndroid Build Coastguard Worker 711*d5c09012SAndroid Build Coastguard Worker // References a 712*d5c09012SAndroid Build Coastguard Worker // [Page.paragraphs][google.cloud.documentai.v1beta3.Document.Page.paragraphs] 713*d5c09012SAndroid Build Coastguard Worker // element. 714*d5c09012SAndroid Build Coastguard Worker PARAGRAPH = 2; 715*d5c09012SAndroid Build Coastguard Worker 716*d5c09012SAndroid Build Coastguard Worker // References a 717*d5c09012SAndroid Build Coastguard Worker // [Page.lines][google.cloud.documentai.v1beta3.Document.Page.lines] 718*d5c09012SAndroid Build Coastguard Worker // element. 719*d5c09012SAndroid Build Coastguard Worker LINE = 3; 720*d5c09012SAndroid Build Coastguard Worker 721*d5c09012SAndroid Build Coastguard Worker // References a 722*d5c09012SAndroid Build Coastguard Worker // [Page.tokens][google.cloud.documentai.v1beta3.Document.Page.tokens] 723*d5c09012SAndroid Build Coastguard Worker // element. 724*d5c09012SAndroid Build Coastguard Worker TOKEN = 4; 725*d5c09012SAndroid Build Coastguard Worker 726*d5c09012SAndroid Build Coastguard Worker // References a 727*d5c09012SAndroid Build Coastguard Worker // [Page.visual_elements][google.cloud.documentai.v1beta3.Document.Page.visual_elements] 728*d5c09012SAndroid Build Coastguard Worker // element. 729*d5c09012SAndroid Build Coastguard Worker VISUAL_ELEMENT = 5; 730*d5c09012SAndroid Build Coastguard Worker 731*d5c09012SAndroid Build Coastguard Worker // Refrrences a 732*d5c09012SAndroid Build Coastguard Worker // [Page.tables][google.cloud.documentai.v1beta3.Document.Page.tables] 733*d5c09012SAndroid Build Coastguard Worker // element. 734*d5c09012SAndroid Build Coastguard Worker TABLE = 6; 735*d5c09012SAndroid Build Coastguard Worker 736*d5c09012SAndroid Build Coastguard Worker // References a 737*d5c09012SAndroid Build Coastguard Worker // [Page.form_fields][google.cloud.documentai.v1beta3.Document.Page.form_fields] 738*d5c09012SAndroid Build Coastguard Worker // element. 739*d5c09012SAndroid Build Coastguard Worker FORM_FIELD = 7; 740*d5c09012SAndroid Build Coastguard Worker } 741*d5c09012SAndroid Build Coastguard Worker 742*d5c09012SAndroid Build Coastguard Worker // Required. Index into the 743*d5c09012SAndroid Build Coastguard Worker // [Document.pages][google.cloud.documentai.v1beta3.Document.pages] 744*d5c09012SAndroid Build Coastguard Worker // element, for example using 745*d5c09012SAndroid Build Coastguard Worker // `[Document.pages][page_refs.page]` to locate the related page element. 746*d5c09012SAndroid Build Coastguard Worker // This field is skipped when its value is the default `0`. See 747*d5c09012SAndroid Build Coastguard Worker // https://developers.google.com/protocol-buffers/docs/proto3#json. 748*d5c09012SAndroid Build Coastguard Worker int64 page = 1 [(google.api.field_behavior) = REQUIRED]; 749*d5c09012SAndroid Build Coastguard Worker 750*d5c09012SAndroid Build Coastguard Worker // Optional. The type of the layout element that is being referenced if 751*d5c09012SAndroid Build Coastguard Worker // any. 752*d5c09012SAndroid Build Coastguard Worker LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL]; 753*d5c09012SAndroid Build Coastguard Worker 754*d5c09012SAndroid Build Coastguard Worker // Optional. Deprecated. Use 755*d5c09012SAndroid Build Coastguard Worker // [PageRef.bounding_poly][google.cloud.documentai.v1beta3.Document.PageAnchor.PageRef.bounding_poly] 756*d5c09012SAndroid Build Coastguard Worker // instead. 757*d5c09012SAndroid Build Coastguard Worker string layout_id = 3 758*d5c09012SAndroid Build Coastguard Worker [deprecated = true, (google.api.field_behavior) = OPTIONAL]; 759*d5c09012SAndroid Build Coastguard Worker 760*d5c09012SAndroid Build Coastguard Worker // Optional. Identifies the bounding polygon of a layout element on the 761*d5c09012SAndroid Build Coastguard Worker // page. If `layout_type` is set, the bounding polygon must be exactly the 762*d5c09012SAndroid Build Coastguard Worker // same to the layout element it's referring to. 763*d5c09012SAndroid Build Coastguard Worker BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL]; 764*d5c09012SAndroid Build Coastguard Worker 765*d5c09012SAndroid Build Coastguard Worker // Optional. Confidence of detected page element, if applicable. Range 766*d5c09012SAndroid Build Coastguard Worker // `[0, 1]`. 767*d5c09012SAndroid Build Coastguard Worker float confidence = 5 [(google.api.field_behavior) = OPTIONAL]; 768*d5c09012SAndroid Build Coastguard Worker } 769*d5c09012SAndroid Build Coastguard Worker 770*d5c09012SAndroid Build Coastguard Worker // One or more references to visual page elements 771*d5c09012SAndroid Build Coastguard Worker repeated PageRef page_refs = 1; 772*d5c09012SAndroid Build Coastguard Worker } 773*d5c09012SAndroid Build Coastguard Worker 774*d5c09012SAndroid Build Coastguard Worker // Structure to identify provenance relationships between annotations in 775*d5c09012SAndroid Build Coastguard Worker // different revisions. 776*d5c09012SAndroid Build Coastguard Worker message Provenance { 777*d5c09012SAndroid Build Coastguard Worker // The parent element the current element is based on. Used for 778*d5c09012SAndroid Build Coastguard Worker // referencing/aligning, removal and replacement operations. 779*d5c09012SAndroid Build Coastguard Worker message Parent { 780*d5c09012SAndroid Build Coastguard Worker // The index of the index into current revision's parent_ids list. 781*d5c09012SAndroid Build Coastguard Worker int32 revision = 1; 782*d5c09012SAndroid Build Coastguard Worker 783*d5c09012SAndroid Build Coastguard Worker // The index of the parent item in the corresponding item list (eg. list 784*d5c09012SAndroid Build Coastguard Worker // of entities, properties within entities, etc.) in the parent revision. 785*d5c09012SAndroid Build Coastguard Worker int32 index = 3; 786*d5c09012SAndroid Build Coastguard Worker 787*d5c09012SAndroid Build Coastguard Worker // The id of the parent provenance. 788*d5c09012SAndroid Build Coastguard Worker int32 id = 2 [deprecated = true]; 789*d5c09012SAndroid Build Coastguard Worker } 790*d5c09012SAndroid Build Coastguard Worker 791*d5c09012SAndroid Build Coastguard Worker // If a processor or agent does an explicit operation on existing elements. 792*d5c09012SAndroid Build Coastguard Worker enum OperationType { 793*d5c09012SAndroid Build Coastguard Worker // Operation type unspecified. If no operation is specified a provenance 794*d5c09012SAndroid Build Coastguard Worker // entry is simply used to match against a `parent`. 795*d5c09012SAndroid Build Coastguard Worker OPERATION_TYPE_UNSPECIFIED = 0; 796*d5c09012SAndroid Build Coastguard Worker 797*d5c09012SAndroid Build Coastguard Worker // Add an element. 798*d5c09012SAndroid Build Coastguard Worker ADD = 1; 799*d5c09012SAndroid Build Coastguard Worker 800*d5c09012SAndroid Build Coastguard Worker // Remove an element identified by `parent`. 801*d5c09012SAndroid Build Coastguard Worker REMOVE = 2; 802*d5c09012SAndroid Build Coastguard Worker 803*d5c09012SAndroid Build Coastguard Worker // Updates any fields within the given provenance scope of the message. It 804*d5c09012SAndroid Build Coastguard Worker // overwrites the fields rather than replacing them. Use this when you 805*d5c09012SAndroid Build Coastguard Worker // want to update a field value of an entity without also updating all the 806*d5c09012SAndroid Build Coastguard Worker // child properties. 807*d5c09012SAndroid Build Coastguard Worker UPDATE = 7; 808*d5c09012SAndroid Build Coastguard Worker 809*d5c09012SAndroid Build Coastguard Worker // Currently unused. Replace an element identified by `parent`. 810*d5c09012SAndroid Build Coastguard Worker REPLACE = 3; 811*d5c09012SAndroid Build Coastguard Worker 812*d5c09012SAndroid Build Coastguard Worker // Deprecated. Request human review for the element identified by 813*d5c09012SAndroid Build Coastguard Worker // `parent`. 814*d5c09012SAndroid Build Coastguard Worker EVAL_REQUESTED = 4 [deprecated = true]; 815*d5c09012SAndroid Build Coastguard Worker 816*d5c09012SAndroid Build Coastguard Worker // Deprecated. Element is reviewed and approved at human review, 817*d5c09012SAndroid Build Coastguard Worker // confidence will be set to 1.0. 818*d5c09012SAndroid Build Coastguard Worker EVAL_APPROVED = 5 [deprecated = true]; 819*d5c09012SAndroid Build Coastguard Worker 820*d5c09012SAndroid Build Coastguard Worker // Deprecated. Element is skipped in the validation process. 821*d5c09012SAndroid Build Coastguard Worker EVAL_SKIPPED = 6 [deprecated = true]; 822*d5c09012SAndroid Build Coastguard Worker } 823*d5c09012SAndroid Build Coastguard Worker 824*d5c09012SAndroid Build Coastguard Worker // The index of the revision that produced this element. 825*d5c09012SAndroid Build Coastguard Worker int32 revision = 1 [deprecated = true]; 826*d5c09012SAndroid Build Coastguard Worker 827*d5c09012SAndroid Build Coastguard Worker // The Id of this operation. Needs to be unique within the scope of the 828*d5c09012SAndroid Build Coastguard Worker // revision. 829*d5c09012SAndroid Build Coastguard Worker int32 id = 2 [deprecated = true]; 830*d5c09012SAndroid Build Coastguard Worker 831*d5c09012SAndroid Build Coastguard Worker // References to the original elements that are replaced. 832*d5c09012SAndroid Build Coastguard Worker repeated Parent parents = 3; 833*d5c09012SAndroid Build Coastguard Worker 834*d5c09012SAndroid Build Coastguard Worker // The type of provenance operation. 835*d5c09012SAndroid Build Coastguard Worker OperationType type = 4; 836*d5c09012SAndroid Build Coastguard Worker } 837*d5c09012SAndroid Build Coastguard Worker 838*d5c09012SAndroid Build Coastguard Worker // Contains past or forward revisions of this document. 839*d5c09012SAndroid Build Coastguard Worker message Revision { 840*d5c09012SAndroid Build Coastguard Worker // Human Review information of the document. 841*d5c09012SAndroid Build Coastguard Worker message HumanReview { 842*d5c09012SAndroid Build Coastguard Worker // Human review state. e.g. `requested`, `succeeded`, `rejected`. 843*d5c09012SAndroid Build Coastguard Worker string state = 1; 844*d5c09012SAndroid Build Coastguard Worker 845*d5c09012SAndroid Build Coastguard Worker // A message providing more details about the current state of processing. 846*d5c09012SAndroid Build Coastguard Worker // For example, the rejection reason when the state is `rejected`. 847*d5c09012SAndroid Build Coastguard Worker string state_message = 2; 848*d5c09012SAndroid Build Coastguard Worker } 849*d5c09012SAndroid Build Coastguard Worker 850*d5c09012SAndroid Build Coastguard Worker // Who/what made the change 851*d5c09012SAndroid Build Coastguard Worker oneof source { 852*d5c09012SAndroid Build Coastguard Worker // If the change was made by a person specify the name or id of that 853*d5c09012SAndroid Build Coastguard Worker // person. 854*d5c09012SAndroid Build Coastguard Worker string agent = 4; 855*d5c09012SAndroid Build Coastguard Worker 856*d5c09012SAndroid Build Coastguard Worker // If the annotation was made by processor identify the processor by its 857*d5c09012SAndroid Build Coastguard Worker // resource name. 858*d5c09012SAndroid Build Coastguard Worker string processor = 5; 859*d5c09012SAndroid Build Coastguard Worker } 860*d5c09012SAndroid Build Coastguard Worker 861*d5c09012SAndroid Build Coastguard Worker // Id of the revision, internally generated by doc proto storage. 862*d5c09012SAndroid Build Coastguard Worker // Unique within the context of the document. 863*d5c09012SAndroid Build Coastguard Worker string id = 1; 864*d5c09012SAndroid Build Coastguard Worker 865*d5c09012SAndroid Build Coastguard Worker // The revisions that this revision is based on. This can include one or 866*d5c09012SAndroid Build Coastguard Worker // more parent (when documents are merged.) This field represents the 867*d5c09012SAndroid Build Coastguard Worker // index into the `revisions` field. 868*d5c09012SAndroid Build Coastguard Worker repeated int32 parent = 2 [deprecated = true]; 869*d5c09012SAndroid Build Coastguard Worker 870*d5c09012SAndroid Build Coastguard Worker // The revisions that this revision is based on. Must include all the ids 871*d5c09012SAndroid Build Coastguard Worker // that have anything to do with this revision - eg. there are 872*d5c09012SAndroid Build Coastguard Worker // `provenance.parent.revision` fields that index into this field. 873*d5c09012SAndroid Build Coastguard Worker repeated string parent_ids = 7; 874*d5c09012SAndroid Build Coastguard Worker 875*d5c09012SAndroid Build Coastguard Worker // The time that the revision was created, internally generated by 876*d5c09012SAndroid Build Coastguard Worker // doc proto storage at the time of create. 877*d5c09012SAndroid Build Coastguard Worker google.protobuf.Timestamp create_time = 3; 878*d5c09012SAndroid Build Coastguard Worker 879*d5c09012SAndroid Build Coastguard Worker // Human Review information of this revision. 880*d5c09012SAndroid Build Coastguard Worker HumanReview human_review = 6; 881*d5c09012SAndroid Build Coastguard Worker } 882*d5c09012SAndroid Build Coastguard Worker 883*d5c09012SAndroid Build Coastguard Worker // This message is used for text changes aka. OCR corrections. 884*d5c09012SAndroid Build Coastguard Worker message TextChange { 885*d5c09012SAndroid Build Coastguard Worker // Provenance of the correction. 886*d5c09012SAndroid Build Coastguard Worker // Text anchor indexing into the 887*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. There 888*d5c09012SAndroid Build Coastguard Worker // can only be a single `TextAnchor.text_segments` element. If the start 889*d5c09012SAndroid Build Coastguard Worker // and end index of the text segment are the same, the text change is 890*d5c09012SAndroid Build Coastguard Worker // inserted before that index. 891*d5c09012SAndroid Build Coastguard Worker TextAnchor text_anchor = 1; 892*d5c09012SAndroid Build Coastguard Worker 893*d5c09012SAndroid Build Coastguard Worker // The text that replaces the text identified in the `text_anchor`. 894*d5c09012SAndroid Build Coastguard Worker string changed_text = 2; 895*d5c09012SAndroid Build Coastguard Worker 896*d5c09012SAndroid Build Coastguard Worker // The history of this annotation. 897*d5c09012SAndroid Build Coastguard Worker repeated Provenance provenance = 3 [deprecated = true]; 898*d5c09012SAndroid Build Coastguard Worker } 899*d5c09012SAndroid Build Coastguard Worker 900*d5c09012SAndroid Build Coastguard Worker // Represents the parsed layout of a document as a collection of blocks that 901*d5c09012SAndroid Build Coastguard Worker // the document is divided into. 902*d5c09012SAndroid Build Coastguard Worker message DocumentLayout { 903*d5c09012SAndroid Build Coastguard Worker // Represents a block. A block could be one of the various types (text, 904*d5c09012SAndroid Build Coastguard Worker // table, list) supported. 905*d5c09012SAndroid Build Coastguard Worker message DocumentLayoutBlock { 906*d5c09012SAndroid Build Coastguard Worker // Represents where the block starts and ends in the document. 907*d5c09012SAndroid Build Coastguard Worker message LayoutPageSpan { 908*d5c09012SAndroid Build Coastguard Worker // Page where block starts in the document. 909*d5c09012SAndroid Build Coastguard Worker int32 page_start = 1; 910*d5c09012SAndroid Build Coastguard Worker 911*d5c09012SAndroid Build Coastguard Worker // Page where block ends in the document. 912*d5c09012SAndroid Build Coastguard Worker int32 page_end = 2; 913*d5c09012SAndroid Build Coastguard Worker } 914*d5c09012SAndroid Build Coastguard Worker 915*d5c09012SAndroid Build Coastguard Worker // Represents a text type block. 916*d5c09012SAndroid Build Coastguard Worker message LayoutTextBlock { 917*d5c09012SAndroid Build Coastguard Worker // Text content stored in the block. 918*d5c09012SAndroid Build Coastguard Worker string text = 1; 919*d5c09012SAndroid Build Coastguard Worker 920*d5c09012SAndroid Build Coastguard Worker // Type of the text in the block. Available options are: `paragraph`, 921*d5c09012SAndroid Build Coastguard Worker // `subtitle`, `heading-1`, `heading-2`, `heading-3`, `heading-4`, 922*d5c09012SAndroid Build Coastguard Worker // `heading-5`, `header`, `footer`. 923*d5c09012SAndroid Build Coastguard Worker string type = 2; 924*d5c09012SAndroid Build Coastguard Worker 925*d5c09012SAndroid Build Coastguard Worker // A text block could further have child blocks. 926*d5c09012SAndroid Build Coastguard Worker // Repeated blocks support further hierarchies and nested blocks. 927*d5c09012SAndroid Build Coastguard Worker repeated DocumentLayoutBlock blocks = 3; 928*d5c09012SAndroid Build Coastguard Worker } 929*d5c09012SAndroid Build Coastguard Worker 930*d5c09012SAndroid Build Coastguard Worker // Represents a table type block. 931*d5c09012SAndroid Build Coastguard Worker message LayoutTableBlock { 932*d5c09012SAndroid Build Coastguard Worker // Header rows at the top of the table. 933*d5c09012SAndroid Build Coastguard Worker repeated LayoutTableRow header_rows = 1; 934*d5c09012SAndroid Build Coastguard Worker 935*d5c09012SAndroid Build Coastguard Worker // Body rows containing main table content. 936*d5c09012SAndroid Build Coastguard Worker repeated LayoutTableRow body_rows = 2; 937*d5c09012SAndroid Build Coastguard Worker 938*d5c09012SAndroid Build Coastguard Worker // Table caption/title. 939*d5c09012SAndroid Build Coastguard Worker string caption = 3; 940*d5c09012SAndroid Build Coastguard Worker } 941*d5c09012SAndroid Build Coastguard Worker 942*d5c09012SAndroid Build Coastguard Worker // Represents a row in a table. 943*d5c09012SAndroid Build Coastguard Worker message LayoutTableRow { 944*d5c09012SAndroid Build Coastguard Worker // A table row is a list of table cells. 945*d5c09012SAndroid Build Coastguard Worker repeated LayoutTableCell cells = 1; 946*d5c09012SAndroid Build Coastguard Worker } 947*d5c09012SAndroid Build Coastguard Worker 948*d5c09012SAndroid Build Coastguard Worker // Represents a cell in a table row. 949*d5c09012SAndroid Build Coastguard Worker message LayoutTableCell { 950*d5c09012SAndroid Build Coastguard Worker // A table cell is a list of blocks. 951*d5c09012SAndroid Build Coastguard Worker // Repeated blocks support further hierarchies and nested blocks. 952*d5c09012SAndroid Build Coastguard Worker repeated DocumentLayoutBlock blocks = 1; 953*d5c09012SAndroid Build Coastguard Worker 954*d5c09012SAndroid Build Coastguard Worker // How many rows this cell spans. 955*d5c09012SAndroid Build Coastguard Worker int32 row_span = 2; 956*d5c09012SAndroid Build Coastguard Worker 957*d5c09012SAndroid Build Coastguard Worker // How many columns this cell spans. 958*d5c09012SAndroid Build Coastguard Worker int32 col_span = 3; 959*d5c09012SAndroid Build Coastguard Worker } 960*d5c09012SAndroid Build Coastguard Worker 961*d5c09012SAndroid Build Coastguard Worker // Represents a list type block. 962*d5c09012SAndroid Build Coastguard Worker message LayoutListBlock { 963*d5c09012SAndroid Build Coastguard Worker // List entries that constitute a list block. 964*d5c09012SAndroid Build Coastguard Worker repeated LayoutListEntry list_entries = 1; 965*d5c09012SAndroid Build Coastguard Worker 966*d5c09012SAndroid Build Coastguard Worker // Type of the list_entries (if exist). Available options are `ordered` 967*d5c09012SAndroid Build Coastguard Worker // and `unordered`. 968*d5c09012SAndroid Build Coastguard Worker string type = 2; 969*d5c09012SAndroid Build Coastguard Worker } 970*d5c09012SAndroid Build Coastguard Worker 971*d5c09012SAndroid Build Coastguard Worker // Represents an entry in the list. 972*d5c09012SAndroid Build Coastguard Worker message LayoutListEntry { 973*d5c09012SAndroid Build Coastguard Worker // A list entry is a list of blocks. 974*d5c09012SAndroid Build Coastguard Worker // Repeated blocks support further hierarchies and nested blocks. 975*d5c09012SAndroid Build Coastguard Worker repeated DocumentLayoutBlock blocks = 1; 976*d5c09012SAndroid Build Coastguard Worker } 977*d5c09012SAndroid Build Coastguard Worker 978*d5c09012SAndroid Build Coastguard Worker oneof block { 979*d5c09012SAndroid Build Coastguard Worker // Block consisting of text content. 980*d5c09012SAndroid Build Coastguard Worker LayoutTextBlock text_block = 2; 981*d5c09012SAndroid Build Coastguard Worker 982*d5c09012SAndroid Build Coastguard Worker // Block consisting of table content/structure. 983*d5c09012SAndroid Build Coastguard Worker LayoutTableBlock table_block = 3; 984*d5c09012SAndroid Build Coastguard Worker 985*d5c09012SAndroid Build Coastguard Worker // Block consisting of list content/structure. 986*d5c09012SAndroid Build Coastguard Worker LayoutListBlock list_block = 4; 987*d5c09012SAndroid Build Coastguard Worker } 988*d5c09012SAndroid Build Coastguard Worker 989*d5c09012SAndroid Build Coastguard Worker // ID of the block. 990*d5c09012SAndroid Build Coastguard Worker string block_id = 1; 991*d5c09012SAndroid Build Coastguard Worker 992*d5c09012SAndroid Build Coastguard Worker // Page span of the block. 993*d5c09012SAndroid Build Coastguard Worker LayoutPageSpan page_span = 5; 994*d5c09012SAndroid Build Coastguard Worker } 995*d5c09012SAndroid Build Coastguard Worker 996*d5c09012SAndroid Build Coastguard Worker // List of blocks in the document. 997*d5c09012SAndroid Build Coastguard Worker repeated DocumentLayoutBlock blocks = 1; 998*d5c09012SAndroid Build Coastguard Worker } 999*d5c09012SAndroid Build Coastguard Worker 1000*d5c09012SAndroid Build Coastguard Worker // Represents the chunks that the document is divided into. 1001*d5c09012SAndroid Build Coastguard Worker message ChunkedDocument { 1002*d5c09012SAndroid Build Coastguard Worker // Represents a chunk. 1003*d5c09012SAndroid Build Coastguard Worker message Chunk { 1004*d5c09012SAndroid Build Coastguard Worker // Represents where the chunk starts and ends in the document. 1005*d5c09012SAndroid Build Coastguard Worker message ChunkPageSpan { 1006*d5c09012SAndroid Build Coastguard Worker // Page where chunk starts in the document. 1007*d5c09012SAndroid Build Coastguard Worker int32 page_start = 1; 1008*d5c09012SAndroid Build Coastguard Worker 1009*d5c09012SAndroid Build Coastguard Worker // Page where chunk ends in the document. 1010*d5c09012SAndroid Build Coastguard Worker int32 page_end = 2; 1011*d5c09012SAndroid Build Coastguard Worker } 1012*d5c09012SAndroid Build Coastguard Worker 1013*d5c09012SAndroid Build Coastguard Worker // Represents the page header associated with the chunk. 1014*d5c09012SAndroid Build Coastguard Worker message ChunkPageHeader { 1015*d5c09012SAndroid Build Coastguard Worker // Header in text format. 1016*d5c09012SAndroid Build Coastguard Worker string text = 1; 1017*d5c09012SAndroid Build Coastguard Worker 1018*d5c09012SAndroid Build Coastguard Worker // Page span of the header. 1019*d5c09012SAndroid Build Coastguard Worker ChunkPageSpan page_span = 2; 1020*d5c09012SAndroid Build Coastguard Worker } 1021*d5c09012SAndroid Build Coastguard Worker 1022*d5c09012SAndroid Build Coastguard Worker // Represents the page footer associated with the chunk. 1023*d5c09012SAndroid Build Coastguard Worker message ChunkPageFooter { 1024*d5c09012SAndroid Build Coastguard Worker // Footer in text format. 1025*d5c09012SAndroid Build Coastguard Worker string text = 1; 1026*d5c09012SAndroid Build Coastguard Worker 1027*d5c09012SAndroid Build Coastguard Worker // Page span of the footer. 1028*d5c09012SAndroid Build Coastguard Worker ChunkPageSpan page_span = 2; 1029*d5c09012SAndroid Build Coastguard Worker } 1030*d5c09012SAndroid Build Coastguard Worker 1031*d5c09012SAndroid Build Coastguard Worker // ID of the chunk. 1032*d5c09012SAndroid Build Coastguard Worker string chunk_id = 1; 1033*d5c09012SAndroid Build Coastguard Worker 1034*d5c09012SAndroid Build Coastguard Worker // DO NOT USE. 1035*d5c09012SAndroid Build Coastguard Worker // List of all parsed documents layout source blocks used to generate the 1036*d5c09012SAndroid Build Coastguard Worker // chunk. 1037*d5c09012SAndroid Build Coastguard Worker repeated string source_block_ids = 2; 1038*d5c09012SAndroid Build Coastguard Worker 1039*d5c09012SAndroid Build Coastguard Worker // Text content of the chunk. 1040*d5c09012SAndroid Build Coastguard Worker string content = 3; 1041*d5c09012SAndroid Build Coastguard Worker 1042*d5c09012SAndroid Build Coastguard Worker // Page span of the chunk. 1043*d5c09012SAndroid Build Coastguard Worker ChunkPageSpan page_span = 4; 1044*d5c09012SAndroid Build Coastguard Worker 1045*d5c09012SAndroid Build Coastguard Worker // Page headers associated with the chunk. 1046*d5c09012SAndroid Build Coastguard Worker repeated ChunkPageHeader page_headers = 5; 1047*d5c09012SAndroid Build Coastguard Worker 1048*d5c09012SAndroid Build Coastguard Worker // Page footers associated with the chunk. 1049*d5c09012SAndroid Build Coastguard Worker repeated ChunkPageFooter page_footers = 6; 1050*d5c09012SAndroid Build Coastguard Worker } 1051*d5c09012SAndroid Build Coastguard Worker 1052*d5c09012SAndroid Build Coastguard Worker // List of chunks. 1053*d5c09012SAndroid Build Coastguard Worker repeated Chunk chunks = 1; 1054*d5c09012SAndroid Build Coastguard Worker } 1055*d5c09012SAndroid Build Coastguard Worker 1056*d5c09012SAndroid Build Coastguard Worker // Original source document from the user. 1057*d5c09012SAndroid Build Coastguard Worker oneof source { 1058*d5c09012SAndroid Build Coastguard Worker // Optional. Currently supports Google Cloud Storage URI of the form 1059*d5c09012SAndroid Build Coastguard Worker // `gs://bucket_name/object_name`. Object versioning is not supported. 1060*d5c09012SAndroid Build Coastguard Worker // For more information, refer to [Google Cloud Storage Request 1061*d5c09012SAndroid Build Coastguard Worker // URIs](https://cloud.google.com/storage/docs/reference-uris). 1062*d5c09012SAndroid Build Coastguard Worker string uri = 1 [(google.api.field_behavior) = OPTIONAL]; 1063*d5c09012SAndroid Build Coastguard Worker 1064*d5c09012SAndroid Build Coastguard Worker // Optional. Inline document content, represented as a stream of bytes. 1065*d5c09012SAndroid Build Coastguard Worker // Note: As with all `bytes` fields, protobuffers use a pure binary 1066*d5c09012SAndroid Build Coastguard Worker // representation, whereas JSON representations use base64. 1067*d5c09012SAndroid Build Coastguard Worker bytes content = 2 [(google.api.field_behavior) = OPTIONAL]; 1068*d5c09012SAndroid Build Coastguard Worker } 1069*d5c09012SAndroid Build Coastguard Worker 1070*d5c09012SAndroid Build Coastguard Worker // An IANA published [media type (MIME 1071*d5c09012SAndroid Build Coastguard Worker // type)](https://www.iana.org/assignments/media-types/media-types.xhtml). 1072*d5c09012SAndroid Build Coastguard Worker string mime_type = 3; 1073*d5c09012SAndroid Build Coastguard Worker 1074*d5c09012SAndroid Build Coastguard Worker // Optional. UTF-8 encoded text in reading order from the document. 1075*d5c09012SAndroid Build Coastguard Worker string text = 4 [(google.api.field_behavior) = OPTIONAL]; 1076*d5c09012SAndroid Build Coastguard Worker 1077*d5c09012SAndroid Build Coastguard Worker // Styles for the 1078*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 1079*d5c09012SAndroid Build Coastguard Worker repeated Style text_styles = 5 [deprecated = true]; 1080*d5c09012SAndroid Build Coastguard Worker 1081*d5c09012SAndroid Build Coastguard Worker // Visual page layout for the 1082*d5c09012SAndroid Build Coastguard Worker // [Document][google.cloud.documentai.v1beta3.Document]. 1083*d5c09012SAndroid Build Coastguard Worker repeated Page pages = 6; 1084*d5c09012SAndroid Build Coastguard Worker 1085*d5c09012SAndroid Build Coastguard Worker // A list of entities detected on 1086*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. For 1087*d5c09012SAndroid Build Coastguard Worker // document shards, entities in this list may cross shard boundaries. 1088*d5c09012SAndroid Build Coastguard Worker repeated Entity entities = 7; 1089*d5c09012SAndroid Build Coastguard Worker 1090*d5c09012SAndroid Build Coastguard Worker // Placeholder. Relationship among 1091*d5c09012SAndroid Build Coastguard Worker // [Document.entities][google.cloud.documentai.v1beta3.Document.entities]. 1092*d5c09012SAndroid Build Coastguard Worker repeated EntityRelation entity_relations = 8; 1093*d5c09012SAndroid Build Coastguard Worker 1094*d5c09012SAndroid Build Coastguard Worker // Placeholder. A list of text corrections made to 1095*d5c09012SAndroid Build Coastguard Worker // [Document.text][google.cloud.documentai.v1beta3.Document.text]. This is 1096*d5c09012SAndroid Build Coastguard Worker // usually used for annotating corrections to OCR mistakes. Text changes for 1097*d5c09012SAndroid Build Coastguard Worker // a given revision may not overlap with each other. 1098*d5c09012SAndroid Build Coastguard Worker repeated TextChange text_changes = 14; 1099*d5c09012SAndroid Build Coastguard Worker 1100*d5c09012SAndroid Build Coastguard Worker // Information about the sharding if this document is sharded part of a larger 1101*d5c09012SAndroid Build Coastguard Worker // document. If the document is not sharded, this message is not specified. 1102*d5c09012SAndroid Build Coastguard Worker ShardInfo shard_info = 9; 1103*d5c09012SAndroid Build Coastguard Worker 1104*d5c09012SAndroid Build Coastguard Worker // Any error that occurred while processing this document. 1105*d5c09012SAndroid Build Coastguard Worker google.rpc.Status error = 10; 1106*d5c09012SAndroid Build Coastguard Worker 1107*d5c09012SAndroid Build Coastguard Worker // Placeholder. Revision history of this document. 1108*d5c09012SAndroid Build Coastguard Worker repeated Revision revisions = 13; 1109*d5c09012SAndroid Build Coastguard Worker 1110*d5c09012SAndroid Build Coastguard Worker // Parsed layout of the document. 1111*d5c09012SAndroid Build Coastguard Worker DocumentLayout document_layout = 17; 1112*d5c09012SAndroid Build Coastguard Worker 1113*d5c09012SAndroid Build Coastguard Worker // Document chunked based on chunking config. 1114*d5c09012SAndroid Build Coastguard Worker ChunkedDocument chunked_document = 18; 1115*d5c09012SAndroid Build Coastguard Worker} 1116*d5c09012SAndroid Build Coastguard Worker 1117*d5c09012SAndroid Build Coastguard Worker// The revision reference specifies which revision on the document to read. 1118*d5c09012SAndroid Build Coastguard Workermessage RevisionRef { 1119*d5c09012SAndroid Build Coastguard Worker // Some predefined revision cases. 1120*d5c09012SAndroid Build Coastguard Worker enum RevisionCase { 1121*d5c09012SAndroid Build Coastguard Worker // Unspecified case, fall back to read the `LATEST_HUMAN_REVIEW`. 1122*d5c09012SAndroid Build Coastguard Worker REVISION_CASE_UNSPECIFIED = 0; 1123*d5c09012SAndroid Build Coastguard Worker 1124*d5c09012SAndroid Build Coastguard Worker // The latest revision made by a human. 1125*d5c09012SAndroid Build Coastguard Worker LATEST_HUMAN_REVIEW = 1; 1126*d5c09012SAndroid Build Coastguard Worker 1127*d5c09012SAndroid Build Coastguard Worker // The latest revision based on timestamp. 1128*d5c09012SAndroid Build Coastguard Worker LATEST_TIMESTAMP = 2; 1129*d5c09012SAndroid Build Coastguard Worker 1130*d5c09012SAndroid Build Coastguard Worker // The first (OCR) revision. 1131*d5c09012SAndroid Build Coastguard Worker BASE_OCR_REVISION = 3; 1132*d5c09012SAndroid Build Coastguard Worker } 1133*d5c09012SAndroid Build Coastguard Worker 1134*d5c09012SAndroid Build Coastguard Worker // Specifies which revision to read. 1135*d5c09012SAndroid Build Coastguard Worker oneof source { 1136*d5c09012SAndroid Build Coastguard Worker // Reads the revision by the predefined case. 1137*d5c09012SAndroid Build Coastguard Worker RevisionCase revision_case = 1; 1138*d5c09012SAndroid Build Coastguard Worker 1139*d5c09012SAndroid Build Coastguard Worker // Reads the revision given by the id. 1140*d5c09012SAndroid Build Coastguard Worker string revision_id = 2; 1141*d5c09012SAndroid Build Coastguard Worker 1142*d5c09012SAndroid Build Coastguard Worker // Reads the revision generated by the processor version. 1143*d5c09012SAndroid Build Coastguard Worker // The format takes the full resource name of processor version. 1144*d5c09012SAndroid Build Coastguard Worker // `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}` 1145*d5c09012SAndroid Build Coastguard Worker string latest_processor_version = 3; 1146*d5c09012SAndroid Build Coastguard Worker } 1147*d5c09012SAndroid Build Coastguard Worker} 1148