1// Copyright 2019 Google LLC. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15 16syntax = "proto3"; 17 18package google.cloud.documentai.v1beta1; 19 20import "google/api/field_behavior.proto"; 21import "google/cloud/documentai/v1beta1/geometry.proto"; 22import "google/rpc/status.proto"; 23import "google/type/color.proto"; 24 25option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta1"; 26option go_package = "cloud.google.com/go/documentai/apiv1beta1/documentaipb;documentaipb"; 27option java_multiple_files = true; 28option java_outer_classname = "DocumentProto"; 29option java_package = "com.google.cloud.documentai.v1beta1"; 30option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta1"; 31option ruby_package = "Google::Cloud::DocumentAI::V1beta1"; 32 33// Document represents the canonical document resource in Document Understanding 34// AI. 35// It is an interchange format that provides insights into documents and allows 36// for collaboration between users and Document Understanding AI to iterate and 37// optimize for quality. 38message Document { 39 // For a large document, sharding may be performed to produce several 40 // document shards. Each document shard contains this field to detail which 41 // shard it is. 42 message ShardInfo { 43 // The 0-based index of this shard. 44 int64 shard_index = 1; 45 46 // Total number of shards. 47 int64 shard_count = 2; 48 49 // The index of the first character in 50 // [Document.text][google.cloud.documentai.v1beta1.Document.text] in the 51 // overall document global text. 52 int64 text_offset = 3; 53 } 54 55 // Annotation for common text style attributes. This adheres to CSS 56 // conventions as much as possible. 57 message Style { 58 // Font size with unit. 59 message FontSize { 60 // Font size for the text. 61 float size = 1; 62 63 // Unit for the font size. Follows CSS naming (in, px, pt, etc.). 64 string unit = 2; 65 } 66 67 // Text anchor indexing into the 68 // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 69 TextAnchor text_anchor = 1; 70 71 // Text color. 72 google.type.Color color = 2; 73 74 // Text background color. 75 google.type.Color background_color = 3; 76 77 // Font weight. Possible values are normal, bold, bolder, and lighter. 78 // https://www.w3schools.com/cssref/pr_font_weight.asp 79 string font_weight = 4; 80 81 // Text style. Possible values are normal, italic, and oblique. 82 // https://www.w3schools.com/cssref/pr_font_font-style.asp 83 string text_style = 5; 84 85 // Text decoration. Follows CSS standard. 86 // <text-decoration-line> <text-decoration-color> <text-decoration-style> 87 // https://www.w3schools.com/cssref/pr_text_text-decoration.asp 88 string text_decoration = 6; 89 90 // Font size. 91 FontSize font_size = 7; 92 } 93 94 // A page in a [Document][google.cloud.documentai.v1beta1.Document]. 95 message Page { 96 // Dimension for the page. 97 message Dimension { 98 // Page width. 99 float width = 1; 100 101 // Page height. 102 float height = 2; 103 104 // Dimension unit. 105 string unit = 3; 106 } 107 108 // Visual element describing a layout unit on a page. 109 message Layout { 110 // Detected human reading orientation. 111 enum Orientation { 112 // Unspecified orientation. 113 ORIENTATION_UNSPECIFIED = 0; 114 115 // Orientation is aligned with page up. 116 PAGE_UP = 1; 117 118 // Orientation is aligned with page right. 119 // Turn the head 90 degrees clockwise from upright to read. 120 PAGE_RIGHT = 2; 121 122 // Orientation is aligned with page down. 123 // Turn the head 180 degrees from upright to read. 124 PAGE_DOWN = 3; 125 126 // Orientation is aligned with page left. 127 // Turn the head 90 degrees counterclockwise from upright to read. 128 PAGE_LEFT = 4; 129 } 130 131 // Text anchor indexing into the 132 // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 133 TextAnchor text_anchor = 1; 134 135 // Confidence of the current 136 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] within 137 // context of the object this layout is for. e.g. confidence can be for a 138 // single token, a table, a visual element, etc. depending on context. 139 // Range [0, 1]. 140 float confidence = 2; 141 142 // The bounding polygon for the 143 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout]. 144 BoundingPoly bounding_poly = 3; 145 146 // Detected orientation for the 147 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout]. 148 Orientation orientation = 4; 149 } 150 151 // A block has a set of lines (collected into paragraphs) that have a 152 // common line-spacing and orientation. 153 message Block { 154 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 155 // [Block][google.cloud.documentai.v1beta1.Document.Page.Block]. 156 Layout layout = 1; 157 158 // A list of detected languages together with confidence. 159 repeated DetectedLanguage detected_languages = 2; 160 } 161 162 // A collection of lines that a human would perceive as a paragraph. 163 message Paragraph { 164 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 165 // [Paragraph][google.cloud.documentai.v1beta1.Document.Page.Paragraph]. 166 Layout layout = 1; 167 168 // A list of detected languages together with confidence. 169 repeated DetectedLanguage detected_languages = 2; 170 } 171 172 // A collection of tokens that a human would perceive as a line. 173 // Does not cross column boundaries, can be horizontal, vertical, etc. 174 message Line { 175 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 176 // [Line][google.cloud.documentai.v1beta1.Document.Page.Line]. 177 Layout layout = 1; 178 179 // A list of detected languages together with confidence. 180 repeated DetectedLanguage detected_languages = 2; 181 } 182 183 // A detected token. 184 message Token { 185 // Detected break at the end of a 186 // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. 187 message DetectedBreak { 188 // Enum to denote the type of break found. 189 enum Type { 190 // Unspecified break type. 191 TYPE_UNSPECIFIED = 0; 192 193 // A single whitespace. 194 SPACE = 1; 195 196 // A wider whitespace. 197 WIDE_SPACE = 2; 198 199 // A hyphen that indicates that a token has been split across lines. 200 HYPHEN = 3; 201 } 202 203 // Detected break type. 204 Type type = 1; 205 } 206 207 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 208 // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. 209 Layout layout = 1; 210 211 // Detected break at the end of a 212 // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. 213 DetectedBreak detected_break = 2; 214 215 // A list of detected languages together with confidence. 216 repeated DetectedLanguage detected_languages = 3; 217 } 218 219 // Detected non-text visual elements e.g. checkbox, signature etc. on the 220 // page. 221 message VisualElement { 222 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 223 // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. 224 Layout layout = 1; 225 226 // Type of the 227 // [VisualElement][google.cloud.documentai.v1beta1.Document.Page.VisualElement]. 228 string type = 2; 229 230 // A list of detected languages together with confidence. 231 repeated DetectedLanguage detected_languages = 3; 232 } 233 234 // A table representation similar to HTML table structure. 235 message Table { 236 // A row of table cells. 237 message TableRow { 238 // Cells that make up this row. 239 repeated TableCell cells = 1; 240 } 241 242 // A cell representation inside the table. 243 message TableCell { 244 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 245 // [TableCell][google.cloud.documentai.v1beta1.Document.Page.Table.TableCell]. 246 Layout layout = 1; 247 248 // How many rows this cell spans. 249 int32 row_span = 2; 250 251 // How many columns this cell spans. 252 int32 col_span = 3; 253 254 // A list of detected languages together with confidence. 255 repeated DetectedLanguage detected_languages = 4; 256 } 257 258 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for 259 // [Table][google.cloud.documentai.v1beta1.Document.Page.Table]. 260 Layout layout = 1; 261 262 // Header rows of the table. 263 repeated TableRow header_rows = 2; 264 265 // Body rows of the table. 266 repeated TableRow body_rows = 3; 267 268 // A list of detected languages together with confidence. 269 repeated DetectedLanguage detected_languages = 4; 270 } 271 272 // A form field detected on the page. 273 message FormField { 274 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the 275 // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField] 276 // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc. 277 Layout field_name = 1; 278 279 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the 280 // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField] 281 // value. 282 Layout field_value = 2; 283 284 // A list of detected languages for name together with confidence. 285 repeated DetectedLanguage name_detected_languages = 3; 286 287 // A list of detected languages for value together with confidence. 288 repeated DetectedLanguage value_detected_languages = 4; 289 } 290 291 // Detected language for a structural component. 292 message DetectedLanguage { 293 // The BCP-47 language code, such as "en-US" or "sr-Latn". For more 294 // information, see 295 // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. 296 string language_code = 1; 297 298 // Confidence of detected language. Range [0, 1]. 299 float confidence = 2; 300 } 301 302 // 1-based index for current 303 // [Page][google.cloud.documentai.v1beta1.Document.Page] in a parent 304 // [Document][google.cloud.documentai.v1beta1.Document]. Useful when a page 305 // is taken out of a [Document][google.cloud.documentai.v1beta1.Document] 306 // for individual processing. 307 int32 page_number = 1; 308 309 // Physical dimension of the page. 310 Dimension dimension = 2; 311 312 // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the 313 // page. 314 Layout layout = 3; 315 316 // A list of detected languages together with confidence. 317 repeated DetectedLanguage detected_languages = 4; 318 319 // A list of visually detected text blocks on the page. 320 // A block has a set of lines (collected into paragraphs) that have a common 321 // line-spacing and orientation. 322 repeated Block blocks = 5; 323 324 // A list of visually detected text paragraphs on the page. 325 // A collection of lines that a human would perceive as a paragraph. 326 repeated Paragraph paragraphs = 6; 327 328 // A list of visually detected text lines on the page. 329 // A collection of tokens that a human would perceive as a line. 330 repeated Line lines = 7; 331 332 // A list of visually detected tokens on the page. 333 repeated Token tokens = 8; 334 335 // A list of detected non-text visual elements e.g. checkbox, 336 // signature etc. on the page. 337 repeated VisualElement visual_elements = 9; 338 339 // A list of visually detected tables on the page. 340 repeated Table tables = 10; 341 342 // A list of visually detected form fields on the page. 343 repeated FormField form_fields = 11; 344 } 345 346 // A phrase in the text that is a known entity type, such as a person, an 347 // organization, or location. 348 message Entity { 349 // Provenance of the entity. 350 // Text anchor indexing into the 351 // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 352 TextAnchor text_anchor = 1; 353 354 // Required. Entity type from a schema e.g. `Address`. 355 string type = 2 [(google.api.field_behavior) = REQUIRED]; 356 357 // Text value in the document e.g. `1600 Amphitheatre Pkwy`. 358 string mention_text = 3; 359 360 // Canonical mention name. This will be a unique value in the entity list 361 // for this document. 362 string mention_id = 4; 363 } 364 365 // Relationship between 366 // [Entities][google.cloud.documentai.v1beta1.Document.Entity]. 367 message EntityRelation { 368 // Subject entity mention_id. 369 string subject_id = 1; 370 371 // Object entity mention_id. 372 string object_id = 2; 373 374 // Relationship description. 375 string relation = 3; 376 } 377 378 // Text reference indexing into the 379 // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 380 message TextAnchor { 381 // A text segment in the 382 // [Document.text][google.cloud.documentai.v1beta1.Document.text]. The 383 // indices may be out of bounds which indicate that the text extends into 384 // another document shard for large sharded documents. See 385 // [ShardInfo.text_offset][google.cloud.documentai.v1beta1.Document.ShardInfo.text_offset] 386 message TextSegment { 387 // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment] 388 // start UTF-8 char index in the 389 // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 390 int64 start_index = 1; 391 392 // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment] 393 // half open end UTF-8 char index in the 394 // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 395 int64 end_index = 2; 396 } 397 398 // The text segments from the 399 // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 400 repeated TextSegment text_segments = 1; 401 } 402 403 // Original source document from the user. 404 oneof source { 405 // Currently supports Google Cloud Storage URI of the form 406 // `gs://bucket_name/object_name`. Object versioning is not supported. 407 // See [Google Cloud Storage Request 408 // URIs](https://cloud.google.com/storage/docs/reference-uris) for more 409 // info. 410 string uri = 1; 411 412 // Inline document content, represented as a stream of bytes. 413 // Note: As with all `bytes` fields, protobuffers use a pure binary 414 // representation, whereas JSON representations use base64. 415 bytes content = 2; 416 } 417 418 // An IANA published MIME type (also referred to as media type). For more 419 // information, see 420 // https://www.iana.org/assignments/media-types/media-types.xhtml. 421 string mime_type = 3; 422 423 // UTF-8 encoded text in reading order from the document. 424 string text = 4; 425 426 // Styles for the 427 // [Document.text][google.cloud.documentai.v1beta1.Document.text]. 428 repeated Style text_styles = 5; 429 430 // Visual page layout for the 431 // [Document][google.cloud.documentai.v1beta1.Document]. 432 repeated Page pages = 6; 433 434 // A list of entities detected on 435 // [Document.text][google.cloud.documentai.v1beta1.Document.text]. For 436 // document shards, entities in this list may cross shard boundaries. 437 repeated Entity entities = 7; 438 439 // Relationship among 440 // [Document.entities][google.cloud.documentai.v1beta1.Document.entities]. 441 repeated EntityRelation entity_relations = 8; 442 443 // Information about the sharding if this document is sharded part of a larger 444 // document. If the document is not sharded, this message is not specified. 445 ShardInfo shard_info = 9; 446 447 // Any error that occurred while processing this document. 448 google.rpc.Status error = 10; 449} 450