1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.documentai.v1beta3; 18 19import "google/api/field_behavior.proto"; 20import "google/cloud/documentai/v1beta3/barcode.proto"; 21import "google/cloud/documentai/v1beta3/geometry.proto"; 22import "google/protobuf/timestamp.proto"; 23import "google/rpc/status.proto"; 24import "google/type/color.proto"; 25import "google/type/date.proto"; 26import "google/type/datetime.proto"; 27import "google/type/money.proto"; 28import "google/type/postal_address.proto"; 29 30option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3"; 31option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb"; 32option java_multiple_files = true; 33option java_outer_classname = "DocumentProto"; 34option java_package = "com.google.cloud.documentai.v1beta3"; 35option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3"; 36option ruby_package = "Google::Cloud::DocumentAI::V1beta3"; 37 38// Document represents the canonical document resource in Document AI. It is an 39// interchange format that provides insights into documents and allows for 40// collaboration between users and Document AI to iterate and optimize for 41// quality. 42message Document { 43 // For a large document, sharding may be performed to produce several 44 // document shards. Each document shard contains this field to detail which 45 // shard it is. 46 message ShardInfo { 47 // The 0-based index of this shard. 48 int64 shard_index = 1; 49 50 // Total number of shards. 51 int64 shard_count = 2; 52 53 // The index of the first character in 54 // [Document.text][google.cloud.documentai.v1beta3.Document.text] in the 55 // overall document global text. 56 int64 text_offset = 3; 57 } 58 59 // Annotation for common text style attributes. This adheres to CSS 60 // conventions as much as possible. 61 message Style { 62 // Font size with unit. 63 message FontSize { 64 // Font size for the text. 65 float size = 1; 66 67 // Unit for the font size. Follows CSS naming (such as `in`, `px`, and 68 // `pt`). 69 string unit = 2; 70 } 71 72 // Text anchor indexing into the 73 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 74 TextAnchor text_anchor = 1; 75 76 // Text color. 77 google.type.Color color = 2; 78 79 // Text background color. 80 google.type.Color background_color = 3; 81 82 // [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp). 83 // Possible values are `normal`, `bold`, `bolder`, and `lighter`. 84 string font_weight = 4; 85 86 // [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp). 87 // Possible values are `normal`, `italic`, and `oblique`. 88 string text_style = 5; 89 90 // [Text 91 // decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp). 92 // Follows CSS standard. <text-decoration-line> <text-decoration-color> 93 // <text-decoration-style> 94 string text_decoration = 6; 95 96 // Font size. 97 FontSize font_size = 7; 98 99 // Font family such as `Arial`, `Times New Roman`. 100 // https://www.w3schools.com/cssref/pr_font_font-family.asp 101 string font_family = 8; 102 } 103 104 // A page in a [Document][google.cloud.documentai.v1beta3.Document]. 105 message Page { 106 // Dimension for the page. 107 message Dimension { 108 // Page width. 109 float width = 1; 110 111 // Page height. 112 float height = 2; 113 114 // Dimension unit. 115 string unit = 3; 116 } 117 118 // Rendered image contents for this page. 119 message Image { 120 // Raw byte content of the image. 121 bytes content = 1; 122 123 // Encoding [media type (MIME 124 // type)](https://www.iana.org/assignments/media-types/media-types.xhtml) 125 // for the image. 126 string mime_type = 2; 127 128 // Width of the image in pixels. 129 int32 width = 3; 130 131 // Height of the image in pixels. 132 int32 height = 4; 133 } 134 135 // Representation for transformation matrix, intended to be compatible and 136 // used with OpenCV format for image manipulation. 137 message Matrix { 138 // Number of rows in the matrix. 139 int32 rows = 1; 140 141 // Number of columns in the matrix. 142 int32 cols = 2; 143 144 // This encodes information about what data type the matrix uses. 145 // For example, 0 (CV_8U) is an unsigned 8-bit image. For the full list 146 // of OpenCV primitive data types, please refer to 147 // https://docs.opencv.org/4.3.0/d1/d1b/group__core__hal__interface.html 148 int32 type = 3; 149 150 // The matrix data. 151 bytes data = 4; 152 } 153 154 // Visual element describing a layout unit on a page. 155 message Layout { 156 // Detected human reading orientation. 157 enum Orientation { 158 // Unspecified orientation. 159 ORIENTATION_UNSPECIFIED = 0; 160 161 // Orientation is aligned with page up. 162 PAGE_UP = 1; 163 164 // Orientation is aligned with page right. 165 // Turn the head 90 degrees clockwise from upright to read. 166 PAGE_RIGHT = 2; 167 168 // Orientation is aligned with page down. 169 // Turn the head 180 degrees from upright to read. 170 PAGE_DOWN = 3; 171 172 // Orientation is aligned with page left. 173 // Turn the head 90 degrees counterclockwise from upright to read. 174 PAGE_LEFT = 4; 175 } 176 177 // Text anchor indexing into the 178 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 179 TextAnchor text_anchor = 1; 180 181 // Confidence of the current 182 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] within 183 // context of the object this layout is for. e.g. confidence can be for a 184 // single token, a table, a visual element, etc. depending on context. 185 // Range `[0, 1]`. 186 float confidence = 2; 187 188 // The bounding polygon for the 189 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout]. 190 BoundingPoly bounding_poly = 3; 191 192 // Detected orientation for the 193 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout]. 194 Orientation orientation = 4; 195 } 196 197 // A block has a set of lines (collected into paragraphs) that have a 198 // common line-spacing and orientation. 199 message Block { 200 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 201 // [Block][google.cloud.documentai.v1beta3.Document.Page.Block]. 202 Layout layout = 1; 203 204 // A list of detected languages together with confidence. 205 repeated DetectedLanguage detected_languages = 2; 206 207 // The history of this annotation. 208 Provenance provenance = 3 [deprecated = true]; 209 } 210 211 // A collection of lines that a human would perceive as a paragraph. 212 message Paragraph { 213 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 214 // [Paragraph][google.cloud.documentai.v1beta3.Document.Page.Paragraph]. 215 Layout layout = 1; 216 217 // A list of detected languages together with confidence. 218 repeated DetectedLanguage detected_languages = 2; 219 220 // The history of this annotation. 221 Provenance provenance = 3 [deprecated = true]; 222 } 223 224 // A collection of tokens that a human would perceive as a line. 225 // Does not cross column boundaries, can be horizontal, vertical, etc. 226 message Line { 227 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 228 // [Line][google.cloud.documentai.v1beta3.Document.Page.Line]. 229 Layout layout = 1; 230 231 // A list of detected languages together with confidence. 232 repeated DetectedLanguage detected_languages = 2; 233 234 // The history of this annotation. 235 Provenance provenance = 3 [deprecated = true]; 236 } 237 238 // A detected token. 239 message Token { 240 // Detected break at the end of a 241 // [Token][google.cloud.documentai.v1beta3.Document.Page.Token]. 242 message DetectedBreak { 243 // Enum to denote the type of break found. 244 enum Type { 245 // Unspecified break type. 246 TYPE_UNSPECIFIED = 0; 247 248 // A single whitespace. 249 SPACE = 1; 250 251 // A wider whitespace. 252 WIDE_SPACE = 2; 253 254 // A hyphen that indicates that a token has been split across lines. 255 HYPHEN = 3; 256 } 257 258 // Detected break type. 259 Type type = 1; 260 } 261 262 // Font and other text style attributes. 263 message StyleInfo { 264 // Font size in points (`1` point is `¹⁄₇₂` inches). 265 int32 font_size = 1; 266 267 // Font size in pixels, equal to _unrounded 268 // [font_size][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_size]_ 269 // * _resolution_ ÷ `72.0`. 270 double pixel_font_size = 2; 271 272 // Letter spacing in points. 273 double letter_spacing = 3; 274 275 // Name or style of the font. 276 string font_type = 4; 277 278 // Whether the text is bold (equivalent to 279 // [font_weight][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_weight] 280 // is at least `700`). 281 bool bold = 5; 282 283 // Whether the text is italic. 284 bool italic = 6; 285 286 // Whether the text is underlined. 287 bool underlined = 7; 288 289 // Whether the text is strikethrough. This feature is not supported yet. 290 bool strikeout = 8; 291 292 // Whether the text is a subscript. This feature is not supported yet. 293 bool subscript = 9; 294 295 // Whether the text is a superscript. This feature is not supported yet. 296 bool superscript = 10; 297 298 // Whether the text is in small caps. This feature is not supported yet. 299 bool smallcaps = 11; 300 301 // TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy). 302 // Normal is `400`, bold is `700`. 303 int32 font_weight = 12; 304 305 // Whether the text is handwritten. 306 bool handwritten = 13; 307 308 // Color of the text. 309 google.type.Color text_color = 14; 310 311 // Color of the background. 312 google.type.Color background_color = 15; 313 } 314 315 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 316 // [Token][google.cloud.documentai.v1beta3.Document.Page.Token]. 317 Layout layout = 1; 318 319 // Detected break at the end of a 320 // [Token][google.cloud.documentai.v1beta3.Document.Page.Token]. 321 DetectedBreak detected_break = 2; 322 323 // A list of detected languages together with confidence. 324 repeated DetectedLanguage detected_languages = 3; 325 326 // The history of this annotation. 327 Provenance provenance = 4 [deprecated = true]; 328 329 // Text style attributes. 330 StyleInfo style_info = 5; 331 } 332 333 // A detected symbol. 334 message Symbol { 335 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 336 // [Symbol][google.cloud.documentai.v1beta3.Document.Page.Symbol]. 337 Layout layout = 1; 338 339 // A list of detected languages together with confidence. 340 repeated DetectedLanguage detected_languages = 2; 341 } 342 343 // Detected non-text visual elements e.g. checkbox, signature etc. on the 344 // page. 345 message VisualElement { 346 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 347 // [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement]. 348 Layout layout = 1; 349 350 // Type of the 351 // [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement]. 352 string type = 2; 353 354 // A list of detected languages together with confidence. 355 repeated DetectedLanguage detected_languages = 3; 356 } 357 358 // A table representation similar to HTML table structure. 359 message Table { 360 // A row of table cells. 361 message TableRow { 362 // Cells that make up this row. 363 repeated TableCell cells = 1; 364 } 365 366 // A cell representation inside the table. 367 message TableCell { 368 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 369 // [TableCell][google.cloud.documentai.v1beta3.Document.Page.Table.TableCell]. 370 Layout layout = 1; 371 372 // How many rows this cell spans. 373 int32 row_span = 2; 374 375 // How many columns this cell spans. 376 int32 col_span = 3; 377 378 // A list of detected languages together with confidence. 379 repeated DetectedLanguage detected_languages = 4; 380 } 381 382 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 383 // [Table][google.cloud.documentai.v1beta3.Document.Page.Table]. 384 Layout layout = 1; 385 386 // Header rows of the table. 387 repeated TableRow header_rows = 2; 388 389 // Body rows of the table. 390 repeated TableRow body_rows = 3; 391 392 // A list of detected languages together with confidence. 393 repeated DetectedLanguage detected_languages = 4; 394 395 // The history of this table. 396 Provenance provenance = 5 [deprecated = true]; 397 } 398 399 // A form field detected on the page. 400 message FormField { 401 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the 402 // [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField] 403 // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc. 404 Layout field_name = 1; 405 406 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the 407 // [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField] 408 // value. 409 Layout field_value = 2; 410 411 // A list of detected languages for name together with confidence. 412 repeated DetectedLanguage name_detected_languages = 3; 413 414 // A list of detected languages for value together with confidence. 415 repeated DetectedLanguage value_detected_languages = 4; 416 417 // If the value is non-textual, this field represents the type. Current 418 // valid values are: 419 // 420 // - blank (this indicates the `field_value` is normal text) 421 // - `unfilled_checkbox` 422 // - `filled_checkbox` 423 string value_type = 5; 424 425 // Created for Labeling UI to export key text. 426 // If corrections were made to the text identified by the 427 // `field_name.text_anchor`, this field will contain the correction. 428 string corrected_key_text = 6; 429 430 // Created for Labeling UI to export value text. 431 // If corrections were made to the text identified by the 432 // `field_value.text_anchor`, this field will contain the correction. 433 string corrected_value_text = 7; 434 435 // The history of this annotation. 436 Provenance provenance = 8; 437 } 438 439 // A detected barcode. 440 message DetectedBarcode { 441 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for 442 // [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode]. 443 Layout layout = 1; 444 445 // Detailed barcode information of the 446 // [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode]. 447 Barcode barcode = 2; 448 } 449 450 // Detected language for a structural component. 451 message DetectedLanguage { 452 // The [BCP-47 language 453 // code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier), 454 // such as `en-US` or `sr-Latn`. 455 string language_code = 1; 456 457 // Confidence of detected language. Range `[0, 1]`. 458 float confidence = 2; 459 } 460 461 // Image quality scores for the page image. 462 message ImageQualityScores { 463 // Image Quality Defects 464 message DetectedDefect { 465 // Name of the defect type. Supported values are: 466 // 467 // - `quality/defect_blurry` 468 // - `quality/defect_noisy` 469 // - `quality/defect_dark` 470 // - `quality/defect_faint` 471 // - `quality/defect_text_too_small` 472 // - `quality/defect_document_cutoff` 473 // - `quality/defect_text_cutoff` 474 // - `quality/defect_glare` 475 string type = 1; 476 477 // Confidence of detected defect. Range `[0, 1]` where `1` indicates 478 // strong confidence that the defect exists. 479 float confidence = 2; 480 } 481 482 // The overall quality score. Range `[0, 1]` where `1` is perfect quality. 483 float quality_score = 1; 484 485 // A list of detected defects. 486 repeated DetectedDefect detected_defects = 2; 487 } 488 489 // 1-based index for current 490 // [Page][google.cloud.documentai.v1beta3.Document.Page] in a parent 491 // [Document][google.cloud.documentai.v1beta3.Document]. Useful when a page 492 // is taken out of a [Document][google.cloud.documentai.v1beta3.Document] 493 // for individual processing. 494 int32 page_number = 1; 495 496 // Rendered image for this page. This image is preprocessed to remove any 497 // skew, rotation, and distortions such that the annotation bounding boxes 498 // can be upright and axis-aligned. 499 Image image = 13; 500 501 // Transformation matrices that were applied to the original document image 502 // to produce 503 // [Page.image][google.cloud.documentai.v1beta3.Document.Page.image]. 504 repeated Matrix transforms = 14; 505 506 // Physical dimension of the page. 507 Dimension dimension = 2; 508 509 // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the 510 // page. 511 Layout layout = 3; 512 513 // A list of detected languages together with confidence. 514 repeated DetectedLanguage detected_languages = 4; 515 516 // A list of visually detected text blocks on the page. 517 // A block has a set of lines (collected into paragraphs) that have a common 518 // line-spacing and orientation. 519 repeated Block blocks = 5; 520 521 // A list of visually detected text paragraphs on the page. 522 // A collection of lines that a human would perceive as a paragraph. 523 repeated Paragraph paragraphs = 6; 524 525 // A list of visually detected text lines on the page. 526 // A collection of tokens that a human would perceive as a line. 527 repeated Line lines = 7; 528 529 // A list of visually detected tokens on the page. 530 repeated Token tokens = 8; 531 532 // A list of detected non-text visual elements e.g. checkbox, 533 // signature etc. on the page. 534 repeated VisualElement visual_elements = 9; 535 536 // A list of visually detected tables on the page. 537 repeated Table tables = 10; 538 539 // A list of visually detected form fields on the page. 540 repeated FormField form_fields = 11; 541 542 // A list of visually detected symbols on the page. 543 repeated Symbol symbols = 12; 544 545 // A list of detected barcodes. 546 repeated DetectedBarcode detected_barcodes = 15; 547 548 // Image quality scores. 549 ImageQualityScores image_quality_scores = 17; 550 551 // The history of this page. 552 Provenance provenance = 16 [deprecated = true]; 553 } 554 555 // An entity that could be a phrase in the text or a property that belongs to 556 // the document. It is a known entity type, such as a person, an organization, 557 // or location. 558 message Entity { 559 // Parsed and normalized entity value. 560 message NormalizedValue { 561 // An optional structured entity value. 562 // Must match entity type defined in schema if 563 // known. If this field is present, the `text` field could also be 564 // populated. 565 oneof structured_value { 566 // Money value. See also: 567 // https://github.com/googleapis/googleapis/blob/master/google/type/money.proto 568 google.type.Money money_value = 2; 569 570 // Date value. Includes year, month, day. See also: 571 // https://github.com/googleapis/googleapis/blob/master/google/type/date.proto 572 google.type.Date date_value = 3; 573 574 // DateTime value. Includes date, time, and timezone. See also: 575 // https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto 576 google.type.DateTime datetime_value = 4; 577 578 // Postal address. See also: 579 // https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto 580 google.type.PostalAddress address_value = 5; 581 582 // Boolean value. Can be used for entities with binary values, or for 583 // checkboxes. 584 bool boolean_value = 6; 585 586 // Integer value. 587 int32 integer_value = 7; 588 589 // Float value. 590 float float_value = 8; 591 } 592 593 // Optional. An optional field to store a normalized string. 594 // For some entity types, one of respective `structured_value` fields may 595 // also be populated. Also not all the types of `structured_value` will be 596 // normalized. For example, some processors may not generate `float` 597 // or `integer` normalized text by default. 598 // 599 // Below are sample formats mapped to structured values. 600 // 601 // - Money/Currency type (`money_value`) is in the ISO 4217 text format. 602 // - Date type (`date_value`) is in the ISO 8601 text format. 603 // - Datetime type (`datetime_value`) is in the ISO 8601 text format. 604 string text = 1 [(google.api.field_behavior) = OPTIONAL]; 605 } 606 607 // Optional. Provenance of the entity. 608 // Text anchor indexing into the 609 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 610 TextAnchor text_anchor = 1 [(google.api.field_behavior) = OPTIONAL]; 611 612 // Required. Entity type from a schema e.g. `Address`. 613 string type = 2 [(google.api.field_behavior) = REQUIRED]; 614 615 // Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`. 616 string mention_text = 3 [(google.api.field_behavior) = OPTIONAL]; 617 618 // Optional. Deprecated. Use `id` field instead. 619 string mention_id = 4 [(google.api.field_behavior) = OPTIONAL]; 620 621 // Optional. Confidence of detected Schema entity. Range `[0, 1]`. 622 float confidence = 5 [(google.api.field_behavior) = OPTIONAL]; 623 624 // Optional. Represents the provenance of this entity wrt. the location on 625 // the page where it was found. 626 PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL]; 627 628 // Optional. Canonical id. This will be a unique value in the entity list 629 // for this document. 630 string id = 7 [(google.api.field_behavior) = OPTIONAL]; 631 632 // Optional. Normalized entity value. Absent if the extracted value could 633 // not be converted or the type (e.g. address) is not supported for certain 634 // parsers. This field is also only populated for certain supported document 635 // types. 636 NormalizedValue normalized_value = 9 637 [(google.api.field_behavior) = OPTIONAL]; 638 639 // Optional. Entities can be nested to form a hierarchical data structure 640 // representing the content in the document. 641 repeated Entity properties = 10 [(google.api.field_behavior) = OPTIONAL]; 642 643 // Optional. The history of this annotation. 644 Provenance provenance = 11 [(google.api.field_behavior) = OPTIONAL]; 645 646 // Optional. Whether the entity will be redacted for de-identification 647 // purposes. 648 bool redacted = 12 [(google.api.field_behavior) = OPTIONAL]; 649 } 650 651 // Relationship between 652 // [Entities][google.cloud.documentai.v1beta3.Document.Entity]. 653 message EntityRelation { 654 // Subject entity id. 655 string subject_id = 1; 656 657 // Object entity id. 658 string object_id = 2; 659 660 // Relationship description. 661 string relation = 3; 662 } 663 664 // Text reference indexing into the 665 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 666 message TextAnchor { 667 // A text segment in the 668 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. The 669 // indices may be out of bounds which indicate that the text extends into 670 // another document shard for large sharded documents. See 671 // [ShardInfo.text_offset][google.cloud.documentai.v1beta3.Document.ShardInfo.text_offset] 672 message TextSegment { 673 // [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment] 674 // start UTF-8 char index in the 675 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 676 int64 start_index = 1; 677 678 // [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment] 679 // half open end UTF-8 char index in the 680 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 681 int64 end_index = 2; 682 } 683 684 // The text segments from the 685 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 686 repeated TextSegment text_segments = 1; 687 688 // Contains the content of the text span so that users do 689 // not have to look it up in the text_segments. It is always 690 // populated for formFields. 691 string content = 2; 692 } 693 694 // Referencing the visual context of the entity in the 695 // [Document.pages][google.cloud.documentai.v1beta3.Document.pages]. Page 696 // anchors can be cross-page, consist of multiple bounding polygons and 697 // optionally reference specific layout element types. 698 message PageAnchor { 699 // Represents a weak reference to a page element within a document. 700 message PageRef { 701 // The type of layout that is being referenced. 702 enum LayoutType { 703 // Layout Unspecified. 704 LAYOUT_TYPE_UNSPECIFIED = 0; 705 706 // References a 707 // [Page.blocks][google.cloud.documentai.v1beta3.Document.Page.blocks] 708 // element. 709 BLOCK = 1; 710 711 // References a 712 // [Page.paragraphs][google.cloud.documentai.v1beta3.Document.Page.paragraphs] 713 // element. 714 PARAGRAPH = 2; 715 716 // References a 717 // [Page.lines][google.cloud.documentai.v1beta3.Document.Page.lines] 718 // element. 719 LINE = 3; 720 721 // References a 722 // [Page.tokens][google.cloud.documentai.v1beta3.Document.Page.tokens] 723 // element. 724 TOKEN = 4; 725 726 // References a 727 // [Page.visual_elements][google.cloud.documentai.v1beta3.Document.Page.visual_elements] 728 // element. 729 VISUAL_ELEMENT = 5; 730 731 // Refrrences a 732 // [Page.tables][google.cloud.documentai.v1beta3.Document.Page.tables] 733 // element. 734 TABLE = 6; 735 736 // References a 737 // [Page.form_fields][google.cloud.documentai.v1beta3.Document.Page.form_fields] 738 // element. 739 FORM_FIELD = 7; 740 } 741 742 // Required. Index into the 743 // [Document.pages][google.cloud.documentai.v1beta3.Document.pages] 744 // element, for example using 745 // `[Document.pages][page_refs.page]` to locate the related page element. 746 // This field is skipped when its value is the default `0`. See 747 // https://developers.google.com/protocol-buffers/docs/proto3#json. 748 int64 page = 1 [(google.api.field_behavior) = REQUIRED]; 749 750 // Optional. The type of the layout element that is being referenced if 751 // any. 752 LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL]; 753 754 // Optional. Deprecated. Use 755 // [PageRef.bounding_poly][google.cloud.documentai.v1beta3.Document.PageAnchor.PageRef.bounding_poly] 756 // instead. 757 string layout_id = 3 758 [deprecated = true, (google.api.field_behavior) = OPTIONAL]; 759 760 // Optional. Identifies the bounding polygon of a layout element on the 761 // page. If `layout_type` is set, the bounding polygon must be exactly the 762 // same to the layout element it's referring to. 763 BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL]; 764 765 // Optional. Confidence of detected page element, if applicable. Range 766 // `[0, 1]`. 767 float confidence = 5 [(google.api.field_behavior) = OPTIONAL]; 768 } 769 770 // One or more references to visual page elements 771 repeated PageRef page_refs = 1; 772 } 773 774 // Structure to identify provenance relationships between annotations in 775 // different revisions. 776 message Provenance { 777 // The parent element the current element is based on. Used for 778 // referencing/aligning, removal and replacement operations. 779 message Parent { 780 // The index of the index into current revision's parent_ids list. 781 int32 revision = 1; 782 783 // The index of the parent item in the corresponding item list (eg. list 784 // of entities, properties within entities, etc.) in the parent revision. 785 int32 index = 3; 786 787 // The id of the parent provenance. 788 int32 id = 2 [deprecated = true]; 789 } 790 791 // If a processor or agent does an explicit operation on existing elements. 792 enum OperationType { 793 // Operation type unspecified. If no operation is specified a provenance 794 // entry is simply used to match against a `parent`. 795 OPERATION_TYPE_UNSPECIFIED = 0; 796 797 // Add an element. 798 ADD = 1; 799 800 // Remove an element identified by `parent`. 801 REMOVE = 2; 802 803 // Updates any fields within the given provenance scope of the message. It 804 // overwrites the fields rather than replacing them. Use this when you 805 // want to update a field value of an entity without also updating all the 806 // child properties. 807 UPDATE = 7; 808 809 // Currently unused. Replace an element identified by `parent`. 810 REPLACE = 3; 811 812 // Deprecated. Request human review for the element identified by 813 // `parent`. 814 EVAL_REQUESTED = 4 [deprecated = true]; 815 816 // Deprecated. Element is reviewed and approved at human review, 817 // confidence will be set to 1.0. 818 EVAL_APPROVED = 5 [deprecated = true]; 819 820 // Deprecated. Element is skipped in the validation process. 821 EVAL_SKIPPED = 6 [deprecated = true]; 822 } 823 824 // The index of the revision that produced this element. 825 int32 revision = 1 [deprecated = true]; 826 827 // The Id of this operation. Needs to be unique within the scope of the 828 // revision. 829 int32 id = 2 [deprecated = true]; 830 831 // References to the original elements that are replaced. 832 repeated Parent parents = 3; 833 834 // The type of provenance operation. 835 OperationType type = 4; 836 } 837 838 // Contains past or forward revisions of this document. 839 message Revision { 840 // Human Review information of the document. 841 message HumanReview { 842 // Human review state. e.g. `requested`, `succeeded`, `rejected`. 843 string state = 1; 844 845 // A message providing more details about the current state of processing. 846 // For example, the rejection reason when the state is `rejected`. 847 string state_message = 2; 848 } 849 850 // Who/what made the change 851 oneof source { 852 // If the change was made by a person specify the name or id of that 853 // person. 854 string agent = 4; 855 856 // If the annotation was made by processor identify the processor by its 857 // resource name. 858 string processor = 5; 859 } 860 861 // Id of the revision, internally generated by doc proto storage. 862 // Unique within the context of the document. 863 string id = 1; 864 865 // The revisions that this revision is based on. This can include one or 866 // more parent (when documents are merged.) This field represents the 867 // index into the `revisions` field. 868 repeated int32 parent = 2 [deprecated = true]; 869 870 // The revisions that this revision is based on. Must include all the ids 871 // that have anything to do with this revision - eg. there are 872 // `provenance.parent.revision` fields that index into this field. 873 repeated string parent_ids = 7; 874 875 // The time that the revision was created, internally generated by 876 // doc proto storage at the time of create. 877 google.protobuf.Timestamp create_time = 3; 878 879 // Human Review information of this revision. 880 HumanReview human_review = 6; 881 } 882 883 // This message is used for text changes aka. OCR corrections. 884 message TextChange { 885 // Provenance of the correction. 886 // Text anchor indexing into the 887 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. There 888 // can only be a single `TextAnchor.text_segments` element. If the start 889 // and end index of the text segment are the same, the text change is 890 // inserted before that index. 891 TextAnchor text_anchor = 1; 892 893 // The text that replaces the text identified in the `text_anchor`. 894 string changed_text = 2; 895 896 // The history of this annotation. 897 repeated Provenance provenance = 3 [deprecated = true]; 898 } 899 900 // Represents the parsed layout of a document as a collection of blocks that 901 // the document is divided into. 902 message DocumentLayout { 903 // Represents a block. A block could be one of the various types (text, 904 // table, list) supported. 905 message DocumentLayoutBlock { 906 // Represents where the block starts and ends in the document. 907 message LayoutPageSpan { 908 // Page where block starts in the document. 909 int32 page_start = 1; 910 911 // Page where block ends in the document. 912 int32 page_end = 2; 913 } 914 915 // Represents a text type block. 916 message LayoutTextBlock { 917 // Text content stored in the block. 918 string text = 1; 919 920 // Type of the text in the block. Available options are: `paragraph`, 921 // `subtitle`, `heading-1`, `heading-2`, `heading-3`, `heading-4`, 922 // `heading-5`, `header`, `footer`. 923 string type = 2; 924 925 // A text block could further have child blocks. 926 // Repeated blocks support further hierarchies and nested blocks. 927 repeated DocumentLayoutBlock blocks = 3; 928 } 929 930 // Represents a table type block. 931 message LayoutTableBlock { 932 // Header rows at the top of the table. 933 repeated LayoutTableRow header_rows = 1; 934 935 // Body rows containing main table content. 936 repeated LayoutTableRow body_rows = 2; 937 938 // Table caption/title. 939 string caption = 3; 940 } 941 942 // Represents a row in a table. 943 message LayoutTableRow { 944 // A table row is a list of table cells. 945 repeated LayoutTableCell cells = 1; 946 } 947 948 // Represents a cell in a table row. 949 message LayoutTableCell { 950 // A table cell is a list of blocks. 951 // Repeated blocks support further hierarchies and nested blocks. 952 repeated DocumentLayoutBlock blocks = 1; 953 954 // How many rows this cell spans. 955 int32 row_span = 2; 956 957 // How many columns this cell spans. 958 int32 col_span = 3; 959 } 960 961 // Represents a list type block. 962 message LayoutListBlock { 963 // List entries that constitute a list block. 964 repeated LayoutListEntry list_entries = 1; 965 966 // Type of the list_entries (if exist). Available options are `ordered` 967 // and `unordered`. 968 string type = 2; 969 } 970 971 // Represents an entry in the list. 972 message LayoutListEntry { 973 // A list entry is a list of blocks. 974 // Repeated blocks support further hierarchies and nested blocks. 975 repeated DocumentLayoutBlock blocks = 1; 976 } 977 978 oneof block { 979 // Block consisting of text content. 980 LayoutTextBlock text_block = 2; 981 982 // Block consisting of table content/structure. 983 LayoutTableBlock table_block = 3; 984 985 // Block consisting of list content/structure. 986 LayoutListBlock list_block = 4; 987 } 988 989 // ID of the block. 990 string block_id = 1; 991 992 // Page span of the block. 993 LayoutPageSpan page_span = 5; 994 } 995 996 // List of blocks in the document. 997 repeated DocumentLayoutBlock blocks = 1; 998 } 999 1000 // Represents the chunks that the document is divided into. 1001 message ChunkedDocument { 1002 // Represents a chunk. 1003 message Chunk { 1004 // Represents where the chunk starts and ends in the document. 1005 message ChunkPageSpan { 1006 // Page where chunk starts in the document. 1007 int32 page_start = 1; 1008 1009 // Page where chunk ends in the document. 1010 int32 page_end = 2; 1011 } 1012 1013 // Represents the page header associated with the chunk. 1014 message ChunkPageHeader { 1015 // Header in text format. 1016 string text = 1; 1017 1018 // Page span of the header. 1019 ChunkPageSpan page_span = 2; 1020 } 1021 1022 // Represents the page footer associated with the chunk. 1023 message ChunkPageFooter { 1024 // Footer in text format. 1025 string text = 1; 1026 1027 // Page span of the footer. 1028 ChunkPageSpan page_span = 2; 1029 } 1030 1031 // ID of the chunk. 1032 string chunk_id = 1; 1033 1034 // DO NOT USE. 1035 // List of all parsed documents layout source blocks used to generate the 1036 // chunk. 1037 repeated string source_block_ids = 2; 1038 1039 // Text content of the chunk. 1040 string content = 3; 1041 1042 // Page span of the chunk. 1043 ChunkPageSpan page_span = 4; 1044 1045 // Page headers associated with the chunk. 1046 repeated ChunkPageHeader page_headers = 5; 1047 1048 // Page footers associated with the chunk. 1049 repeated ChunkPageFooter page_footers = 6; 1050 } 1051 1052 // List of chunks. 1053 repeated Chunk chunks = 1; 1054 } 1055 1056 // Original source document from the user. 1057 oneof source { 1058 // Optional. Currently supports Google Cloud Storage URI of the form 1059 // `gs://bucket_name/object_name`. Object versioning is not supported. 1060 // For more information, refer to [Google Cloud Storage Request 1061 // URIs](https://cloud.google.com/storage/docs/reference-uris). 1062 string uri = 1 [(google.api.field_behavior) = OPTIONAL]; 1063 1064 // Optional. Inline document content, represented as a stream of bytes. 1065 // Note: As with all `bytes` fields, protobuffers use a pure binary 1066 // representation, whereas JSON representations use base64. 1067 bytes content = 2 [(google.api.field_behavior) = OPTIONAL]; 1068 } 1069 1070 // An IANA published [media type (MIME 1071 // type)](https://www.iana.org/assignments/media-types/media-types.xhtml). 1072 string mime_type = 3; 1073 1074 // Optional. UTF-8 encoded text in reading order from the document. 1075 string text = 4 [(google.api.field_behavior) = OPTIONAL]; 1076 1077 // Styles for the 1078 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. 1079 repeated Style text_styles = 5 [deprecated = true]; 1080 1081 // Visual page layout for the 1082 // [Document][google.cloud.documentai.v1beta3.Document]. 1083 repeated Page pages = 6; 1084 1085 // A list of entities detected on 1086 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. For 1087 // document shards, entities in this list may cross shard boundaries. 1088 repeated Entity entities = 7; 1089 1090 // Placeholder. Relationship among 1091 // [Document.entities][google.cloud.documentai.v1beta3.Document.entities]. 1092 repeated EntityRelation entity_relations = 8; 1093 1094 // Placeholder. A list of text corrections made to 1095 // [Document.text][google.cloud.documentai.v1beta3.Document.text]. This is 1096 // usually used for annotating corrections to OCR mistakes. Text changes for 1097 // a given revision may not overlap with each other. 1098 repeated TextChange text_changes = 14; 1099 1100 // Information about the sharding if this document is sharded part of a larger 1101 // document. If the document is not sharded, this message is not specified. 1102 ShardInfo shard_info = 9; 1103 1104 // Any error that occurred while processing this document. 1105 google.rpc.Status error = 10; 1106 1107 // Placeholder. Revision history of this document. 1108 repeated Revision revisions = 13; 1109 1110 // Parsed layout of the document. 1111 DocumentLayout document_layout = 17; 1112 1113 // Document chunked based on chunking config. 1114 ChunkedDocument chunked_document = 18; 1115} 1116 1117// The revision reference specifies which revision on the document to read. 1118message RevisionRef { 1119 // Some predefined revision cases. 1120 enum RevisionCase { 1121 // Unspecified case, fall back to read the `LATEST_HUMAN_REVIEW`. 1122 REVISION_CASE_UNSPECIFIED = 0; 1123 1124 // The latest revision made by a human. 1125 LATEST_HUMAN_REVIEW = 1; 1126 1127 // The latest revision based on timestamp. 1128 LATEST_TIMESTAMP = 2; 1129 1130 // The first (OCR) revision. 1131 BASE_OCR_REVISION = 3; 1132 } 1133 1134 // Specifies which revision to read. 1135 oneof source { 1136 // Reads the revision by the predefined case. 1137 RevisionCase revision_case = 1; 1138 1139 // Reads the revision given by the id. 1140 string revision_id = 2; 1141 1142 // Reads the revision generated by the processor version. 1143 // The format takes the full resource name of processor version. 1144 // `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}` 1145 string latest_processor_version = 3; 1146 } 1147} 1148