1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.documentai.v1; 18 19import "google/api/field_behavior.proto"; 20import "google/cloud/documentai/v1/barcode.proto"; 21import "google/cloud/documentai/v1/geometry.proto"; 22import "google/protobuf/timestamp.proto"; 23import "google/rpc/status.proto"; 24import "google/type/color.proto"; 25import "google/type/date.proto"; 26import "google/type/datetime.proto"; 27import "google/type/money.proto"; 28import "google/type/postal_address.proto"; 29 30option csharp_namespace = "Google.Cloud.DocumentAI.V1"; 31option go_package = "cloud.google.com/go/documentai/apiv1/documentaipb;documentaipb"; 32option java_multiple_files = true; 33option java_outer_classname = "DocumentProto"; 34option java_package = "com.google.cloud.documentai.v1"; 35option php_namespace = "Google\\Cloud\\DocumentAI\\V1"; 36option ruby_package = "Google::Cloud::DocumentAI::V1"; 37 38// Document represents the canonical document resource in Document AI. It is an 39// interchange format that provides insights into documents and allows for 40// collaboration between users and Document AI to iterate and optimize for 41// quality. 42message Document { 43 // For a large document, sharding may be performed to produce several 44 // document shards. Each document shard contains this field to detail which 45 // shard it is. 46 message ShardInfo { 47 // The 0-based index of this shard. 48 int64 shard_index = 1; 49 50 // Total number of shards. 51 int64 shard_count = 2; 52 53 // The index of the first character in [Document.text][google.cloud.documentai.v1.Document.text] in the overall 54 // document global text. 55 int64 text_offset = 3; 56 } 57 58 // Annotation for common text style attributes. This adheres to CSS 59 // conventions as much as possible. 60 message Style { 61 // Font size with unit. 62 message FontSize { 63 // Font size for the text. 64 float size = 1; 65 66 // Unit for the font size. Follows CSS naming (such as `in`, `px`, and 67 // `pt`). 68 string unit = 2; 69 } 70 71 // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text]. 72 TextAnchor text_anchor = 1; 73 74 // Text color. 75 google.type.Color color = 2; 76 77 // Text background color. 78 google.type.Color background_color = 3; 79 80 // [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp). 81 // Possible values are `normal`, `bold`, `bolder`, and `lighter`. 82 string font_weight = 4; 83 84 // [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp). 85 // Possible values are `normal`, `italic`, and `oblique`. 86 string text_style = 5; 87 88 // [Text 89 // decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp). 90 // Follows CSS standard. <text-decoration-line> <text-decoration-color> 91 // <text-decoration-style> 92 string text_decoration = 6; 93 94 // Font size. 95 FontSize font_size = 7; 96 97 // Font family such as `Arial`, `Times New Roman`. 98 // https://www.w3schools.com/cssref/pr_font_font-family.asp 99 string font_family = 8; 100 } 101 102 // A page in a [Document][google.cloud.documentai.v1.Document]. 103 message Page { 104 // Dimension for the page. 105 message Dimension { 106 // Page width. 107 float width = 1; 108 109 // Page height. 110 float height = 2; 111 112 // Dimension unit. 113 string unit = 3; 114 } 115 116 // Rendered image contents for this page. 117 message Image { 118 // Raw byte content of the image. 119 bytes content = 1; 120 121 // Encoding [media type (MIME 122 // type)](https://www.iana.org/assignments/media-types/media-types.xhtml) 123 // for the image. 124 string mime_type = 2; 125 126 // Width of the image in pixels. 127 int32 width = 3; 128 129 // Height of the image in pixels. 130 int32 height = 4; 131 } 132 133 // Representation for transformation matrix, intended to be compatible and 134 // used with OpenCV format for image manipulation. 135 message Matrix { 136 // Number of rows in the matrix. 137 int32 rows = 1; 138 139 // Number of columns in the matrix. 140 int32 cols = 2; 141 142 // This encodes information about what data type the matrix uses. 143 // For example, 0 (CV_8U) is an unsigned 8-bit image. For the full list 144 // of OpenCV primitive data types, please refer to 145 // https://docs.opencv.org/4.3.0/d1/d1b/group__core__hal__interface.html 146 int32 type = 3; 147 148 // The matrix data. 149 bytes data = 4; 150 } 151 152 // Visual element describing a layout unit on a page. 153 message Layout { 154 // Detected human reading orientation. 155 enum Orientation { 156 // Unspecified orientation. 157 ORIENTATION_UNSPECIFIED = 0; 158 159 // Orientation is aligned with page up. 160 PAGE_UP = 1; 161 162 // Orientation is aligned with page right. 163 // Turn the head 90 degrees clockwise from upright to read. 164 PAGE_RIGHT = 2; 165 166 // Orientation is aligned with page down. 167 // Turn the head 180 degrees from upright to read. 168 PAGE_DOWN = 3; 169 170 // Orientation is aligned with page left. 171 // Turn the head 90 degrees counterclockwise from upright to read. 172 PAGE_LEFT = 4; 173 } 174 175 // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text]. 176 TextAnchor text_anchor = 1; 177 178 // Confidence of the current [Layout][google.cloud.documentai.v1.Document.Page.Layout] within context of the object this 179 // layout is for. e.g. confidence can be for a single token, a table, 180 // a visual element, etc. depending on context. Range `[0, 1]`. 181 float confidence = 2; 182 183 // The bounding polygon for the [Layout][google.cloud.documentai.v1.Document.Page.Layout]. 184 BoundingPoly bounding_poly = 3; 185 186 // Detected orientation for the [Layout][google.cloud.documentai.v1.Document.Page.Layout]. 187 Orientation orientation = 4; 188 } 189 190 // A block has a set of lines (collected into paragraphs) that have a 191 // common line-spacing and orientation. 192 message Block { 193 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Block][google.cloud.documentai.v1.Document.Page.Block]. 194 Layout layout = 1; 195 196 // A list of detected languages together with confidence. 197 repeated DetectedLanguage detected_languages = 2; 198 199 // The history of this annotation. 200 Provenance provenance = 3 [deprecated = true]; 201 } 202 203 // A collection of lines that a human would perceive as a paragraph. 204 message Paragraph { 205 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Paragraph][google.cloud.documentai.v1.Document.Page.Paragraph]. 206 Layout layout = 1; 207 208 // A list of detected languages together with confidence. 209 repeated DetectedLanguage detected_languages = 2; 210 211 // The history of this annotation. 212 Provenance provenance = 3 [deprecated = true]; 213 } 214 215 // A collection of tokens that a human would perceive as a line. 216 // Does not cross column boundaries, can be horizontal, vertical, etc. 217 message Line { 218 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Line][google.cloud.documentai.v1.Document.Page.Line]. 219 Layout layout = 1; 220 221 // A list of detected languages together with confidence. 222 repeated DetectedLanguage detected_languages = 2; 223 224 // The history of this annotation. 225 Provenance provenance = 3 [deprecated = true]; 226 } 227 228 // A detected token. 229 message Token { 230 // Detected break at the end of a [Token][google.cloud.documentai.v1.Document.Page.Token]. 231 message DetectedBreak { 232 // Enum to denote the type of break found. 233 enum Type { 234 // Unspecified break type. 235 TYPE_UNSPECIFIED = 0; 236 237 // A single whitespace. 238 SPACE = 1; 239 240 // A wider whitespace. 241 WIDE_SPACE = 2; 242 243 // A hyphen that indicates that a token has been split across lines. 244 HYPHEN = 3; 245 } 246 247 // Detected break type. 248 Type type = 1; 249 } 250 251 // Font and other text style attributes. 252 message StyleInfo { 253 // Font size in points (`1` point is `¹⁄₇₂` inches). 254 int32 font_size = 1; 255 256 // Font size in pixels, equal to _unrounded [font_size][google.cloud.documentai.v1.Document.Page.Token.StyleInfo.font_size]_ * 257 // _resolution_ ÷ `72.0`. 258 double pixel_font_size = 2; 259 260 // Letter spacing in points. 261 double letter_spacing = 3; 262 263 // Name or style of the font. 264 string font_type = 4; 265 266 // Whether the text is bold (equivalent to [font_weight][google.cloud.documentai.v1.Document.Page.Token.StyleInfo.font_weight] is at least 267 // `700`). 268 bool bold = 5; 269 270 // Whether the text is italic. 271 bool italic = 6; 272 273 // Whether the text is underlined. 274 bool underlined = 7; 275 276 // Whether the text is strikethrough. 277 bool strikeout = 8; 278 279 // Whether the text is a subscript. 280 bool subscript = 9; 281 282 // Whether the text is a superscript. 283 bool superscript = 10; 284 285 // Whether the text is in small caps. 286 bool smallcaps = 11; 287 288 // TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy). 289 // Normal is `400`, bold is `700`. 290 int32 font_weight = 12; 291 292 // Whether the text is handwritten. 293 bool handwritten = 13; 294 295 // Color of the text. 296 google.type.Color text_color = 14; 297 298 // Color of the background. 299 google.type.Color background_color = 15; 300 } 301 302 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Token][google.cloud.documentai.v1.Document.Page.Token]. 303 Layout layout = 1; 304 305 // Detected break at the end of a [Token][google.cloud.documentai.v1.Document.Page.Token]. 306 DetectedBreak detected_break = 2; 307 308 // A list of detected languages together with confidence. 309 repeated DetectedLanguage detected_languages = 3; 310 311 // The history of this annotation. 312 Provenance provenance = 4 [deprecated = true]; 313 314 // Text style attributes. 315 StyleInfo style_info = 5; 316 } 317 318 // A detected symbol. 319 message Symbol { 320 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Symbol][google.cloud.documentai.v1.Document.Page.Symbol]. 321 Layout layout = 1; 322 323 // A list of detected languages together with confidence. 324 repeated DetectedLanguage detected_languages = 2; 325 } 326 327 // Detected non-text visual elements e.g. checkbox, signature etc. on the 328 // page. 329 message VisualElement { 330 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [VisualElement][google.cloud.documentai.v1.Document.Page.VisualElement]. 331 Layout layout = 1; 332 333 // Type of the [VisualElement][google.cloud.documentai.v1.Document.Page.VisualElement]. 334 string type = 2; 335 336 // A list of detected languages together with confidence. 337 repeated DetectedLanguage detected_languages = 3; 338 } 339 340 // A table representation similar to HTML table structure. 341 message Table { 342 // A row of table cells. 343 message TableRow { 344 // Cells that make up this row. 345 repeated TableCell cells = 1; 346 } 347 348 // A cell representation inside the table. 349 message TableCell { 350 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [TableCell][google.cloud.documentai.v1.Document.Page.Table.TableCell]. 351 Layout layout = 1; 352 353 // How many rows this cell spans. 354 int32 row_span = 2; 355 356 // How many columns this cell spans. 357 int32 col_span = 3; 358 359 // A list of detected languages together with confidence. 360 repeated DetectedLanguage detected_languages = 4; 361 } 362 363 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Table][google.cloud.documentai.v1.Document.Page.Table]. 364 Layout layout = 1; 365 366 // Header rows of the table. 367 repeated TableRow header_rows = 2; 368 369 // Body rows of the table. 370 repeated TableRow body_rows = 3; 371 372 // A list of detected languages together with confidence. 373 repeated DetectedLanguage detected_languages = 4; 374 375 // The history of this table. 376 Provenance provenance = 5 [deprecated = true]; 377 } 378 379 // A form field detected on the page. 380 message FormField { 381 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1.Document.Page.FormField] name. e.g. `Address`, `Email`, 382 // `Grand total`, `Phone number`, etc. 383 Layout field_name = 1; 384 385 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1.Document.Page.FormField] value. 386 Layout field_value = 2; 387 388 // A list of detected languages for name together with confidence. 389 repeated DetectedLanguage name_detected_languages = 3; 390 391 // A list of detected languages for value together with confidence. 392 repeated DetectedLanguage value_detected_languages = 4; 393 394 // If the value is non-textual, this field represents the type. Current 395 // valid values are: 396 // 397 // - blank (this indicates the `field_value` is normal text) 398 // - `unfilled_checkbox` 399 // - `filled_checkbox` 400 string value_type = 5; 401 402 // Created for Labeling UI to export key text. 403 // If corrections were made to the text identified by the 404 // `field_name.text_anchor`, this field will contain the correction. 405 string corrected_key_text = 6; 406 407 // Created for Labeling UI to export value text. 408 // If corrections were made to the text identified by the 409 // `field_value.text_anchor`, this field will contain the correction. 410 string corrected_value_text = 7; 411 412 // The history of this annotation. 413 Provenance provenance = 8; 414 } 415 416 // A detected barcode. 417 message DetectedBarcode { 418 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [DetectedBarcode][google.cloud.documentai.v1.Document.Page.DetectedBarcode]. 419 Layout layout = 1; 420 421 // Detailed barcode information of the [DetectedBarcode][google.cloud.documentai.v1.Document.Page.DetectedBarcode]. 422 Barcode barcode = 2; 423 } 424 425 // Detected language for a structural component. 426 message DetectedLanguage { 427 // The [BCP-47 language 428 // code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier), 429 // such as `en-US` or `sr-Latn`. 430 string language_code = 1; 431 432 // Confidence of detected language. Range `[0, 1]`. 433 float confidence = 2; 434 } 435 436 // Image quality scores for the page image. 437 message ImageQualityScores { 438 // Image Quality Defects 439 message DetectedDefect { 440 // Name of the defect type. Supported values are: 441 // 442 // - `quality/defect_blurry` 443 // - `quality/defect_noisy` 444 // - `quality/defect_dark` 445 // - `quality/defect_faint` 446 // - `quality/defect_text_too_small` 447 // - `quality/defect_document_cutoff` 448 // - `quality/defect_text_cutoff` 449 // - `quality/defect_glare` 450 string type = 1; 451 452 // Confidence of detected defect. Range `[0, 1]` where `1` indicates 453 // strong confidence that the defect exists. 454 float confidence = 2; 455 } 456 457 // The overall quality score. Range `[0, 1]` where `1` is perfect quality. 458 float quality_score = 1; 459 460 // A list of detected defects. 461 repeated DetectedDefect detected_defects = 2; 462 } 463 464 // 1-based index for current [Page][google.cloud.documentai.v1.Document.Page] in a parent [Document][google.cloud.documentai.v1.Document]. 465 // Useful when a page is taken out of a [Document][google.cloud.documentai.v1.Document] for individual 466 // processing. 467 int32 page_number = 1; 468 469 // Rendered image for this page. This image is preprocessed to remove any 470 // skew, rotation, and distortions such that the annotation bounding boxes 471 // can be upright and axis-aligned. 472 Image image = 13; 473 474 // Transformation matrices that were applied to the original document image 475 // to produce [Page.image][google.cloud.documentai.v1.Document.Page.image]. 476 repeated Matrix transforms = 14; 477 478 // Physical dimension of the page. 479 Dimension dimension = 2; 480 481 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for the page. 482 Layout layout = 3; 483 484 // A list of detected languages together with confidence. 485 repeated DetectedLanguage detected_languages = 4; 486 487 // A list of visually detected text blocks on the page. 488 // A block has a set of lines (collected into paragraphs) that have a common 489 // line-spacing and orientation. 490 repeated Block blocks = 5; 491 492 // A list of visually detected text paragraphs on the page. 493 // A collection of lines that a human would perceive as a paragraph. 494 repeated Paragraph paragraphs = 6; 495 496 // A list of visually detected text lines on the page. 497 // A collection of tokens that a human would perceive as a line. 498 repeated Line lines = 7; 499 500 // A list of visually detected tokens on the page. 501 repeated Token tokens = 8; 502 503 // A list of detected non-text visual elements e.g. checkbox, 504 // signature etc. on the page. 505 repeated VisualElement visual_elements = 9; 506 507 // A list of visually detected tables on the page. 508 repeated Table tables = 10; 509 510 // A list of visually detected form fields on the page. 511 repeated FormField form_fields = 11; 512 513 // A list of visually detected symbols on the page. 514 repeated Symbol symbols = 12; 515 516 // A list of detected barcodes. 517 repeated DetectedBarcode detected_barcodes = 15; 518 519 // Image quality scores. 520 ImageQualityScores image_quality_scores = 17; 521 522 // The history of this page. 523 Provenance provenance = 16 [deprecated = true]; 524 } 525 526 // An entity that could be a phrase in the text or a property that belongs to 527 // the document. It is a known entity type, such as a person, an organization, 528 // or location. 529 message Entity { 530 // Parsed and normalized entity value. 531 message NormalizedValue { 532 // An optional structured entity value. 533 // Must match entity type defined in schema if 534 // known. If this field is present, the `text` field could also be 535 // populated. 536 oneof structured_value { 537 // Money value. See also: 538 // https://github.com/googleapis/googleapis/blob/master/google/type/money.proto 539 google.type.Money money_value = 2; 540 541 // Date value. Includes year, month, day. See also: 542 // https://github.com/googleapis/googleapis/blob/master/google/type/date.proto 543 google.type.Date date_value = 3; 544 545 // DateTime value. Includes date, time, and timezone. See also: 546 // https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto 547 google.type.DateTime datetime_value = 4; 548 549 // Postal address. See also: 550 // https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto 551 google.type.PostalAddress address_value = 5; 552 553 // Boolean value. Can be used for entities with binary values, or for 554 // checkboxes. 555 bool boolean_value = 6; 556 557 // Integer value. 558 int32 integer_value = 7; 559 560 // Float value. 561 float float_value = 8; 562 } 563 564 // Optional. An optional field to store a normalized string. 565 // For some entity types, one of respective `structured_value` fields may 566 // also be populated. Also not all the types of `structured_value` will be 567 // normalized. For example, some processors may not generate `float` 568 // or `integer` normalized text by default. 569 // 570 // Below are sample formats mapped to structured values. 571 // 572 // - Money/Currency type (`money_value`) is in the ISO 4217 text format. 573 // - Date type (`date_value`) is in the ISO 8601 text format. 574 // - Datetime type (`datetime_value`) is in the ISO 8601 text format. 575 string text = 1 [(google.api.field_behavior) = OPTIONAL]; 576 } 577 578 // Optional. Provenance of the entity. 579 // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text]. 580 TextAnchor text_anchor = 1 [(google.api.field_behavior) = OPTIONAL]; 581 582 // Required. Entity type from a schema e.g. `Address`. 583 string type = 2 [(google.api.field_behavior) = REQUIRED]; 584 585 // Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`. 586 string mention_text = 3 [(google.api.field_behavior) = OPTIONAL]; 587 588 // Optional. Deprecated. Use `id` field instead. 589 string mention_id = 4 [(google.api.field_behavior) = OPTIONAL]; 590 591 // Optional. Confidence of detected Schema entity. Range `[0, 1]`. 592 float confidence = 5 [(google.api.field_behavior) = OPTIONAL]; 593 594 // Optional. Represents the provenance of this entity wrt. the location on the 595 // page where it was found. 596 PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL]; 597 598 // Optional. Canonical id. This will be a unique value in the entity list 599 // for this document. 600 string id = 7 [(google.api.field_behavior) = OPTIONAL]; 601 602 // Optional. Normalized entity value. Absent if the extracted value could not be 603 // converted or the type (e.g. address) is not supported for certain 604 // parsers. This field is also only populated for certain supported document 605 // types. 606 NormalizedValue normalized_value = 9 [(google.api.field_behavior) = OPTIONAL]; 607 608 // Optional. Entities can be nested to form a hierarchical data structure representing 609 // the content in the document. 610 repeated Entity properties = 10 [(google.api.field_behavior) = OPTIONAL]; 611 612 // Optional. The history of this annotation. 613 Provenance provenance = 11 [(google.api.field_behavior) = OPTIONAL]; 614 615 // Optional. Whether the entity will be redacted for de-identification purposes. 616 bool redacted = 12 [(google.api.field_behavior) = OPTIONAL]; 617 } 618 619 // Relationship between [Entities][google.cloud.documentai.v1.Document.Entity]. 620 message EntityRelation { 621 // Subject entity id. 622 string subject_id = 1; 623 624 // Object entity id. 625 string object_id = 2; 626 627 // Relationship description. 628 string relation = 3; 629 } 630 631 // Text reference indexing into the [Document.text][google.cloud.documentai.v1.Document.text]. 632 message TextAnchor { 633 // A text segment in the [Document.text][google.cloud.documentai.v1.Document.text]. The indices may be out of bounds 634 // which indicate that the text extends into another document shard for 635 // large sharded documents. See [ShardInfo.text_offset][google.cloud.documentai.v1.Document.ShardInfo.text_offset] 636 message TextSegment { 637 // [TextSegment][google.cloud.documentai.v1.Document.TextAnchor.TextSegment] start UTF-8 char index in the [Document.text][google.cloud.documentai.v1.Document.text]. 638 int64 start_index = 1; 639 640 // [TextSegment][google.cloud.documentai.v1.Document.TextAnchor.TextSegment] half open end UTF-8 char index in the 641 // [Document.text][google.cloud.documentai.v1.Document.text]. 642 int64 end_index = 2; 643 } 644 645 // The text segments from the [Document.text][google.cloud.documentai.v1.Document.text]. 646 repeated TextSegment text_segments = 1; 647 648 // Contains the content of the text span so that users do 649 // not have to look it up in the text_segments. It is always 650 // populated for formFields. 651 string content = 2; 652 } 653 654 // Referencing the visual context of the entity in the [Document.pages][google.cloud.documentai.v1.Document.pages]. 655 // Page anchors can be cross-page, consist of multiple bounding polygons and 656 // optionally reference specific layout element types. 657 message PageAnchor { 658 // Represents a weak reference to a page element within a document. 659 message PageRef { 660 // The type of layout that is being referenced. 661 enum LayoutType { 662 // Layout Unspecified. 663 LAYOUT_TYPE_UNSPECIFIED = 0; 664 665 // References a [Page.blocks][google.cloud.documentai.v1.Document.Page.blocks] element. 666 BLOCK = 1; 667 668 // References a [Page.paragraphs][google.cloud.documentai.v1.Document.Page.paragraphs] element. 669 PARAGRAPH = 2; 670 671 // References a [Page.lines][google.cloud.documentai.v1.Document.Page.lines] element. 672 LINE = 3; 673 674 // References a [Page.tokens][google.cloud.documentai.v1.Document.Page.tokens] element. 675 TOKEN = 4; 676 677 // References a [Page.visual_elements][google.cloud.documentai.v1.Document.Page.visual_elements] element. 678 VISUAL_ELEMENT = 5; 679 680 // Refrrences a [Page.tables][google.cloud.documentai.v1.Document.Page.tables] element. 681 TABLE = 6; 682 683 // References a [Page.form_fields][google.cloud.documentai.v1.Document.Page.form_fields] element. 684 FORM_FIELD = 7; 685 } 686 687 // Required. Index into the [Document.pages][google.cloud.documentai.v1.Document.pages] element, for example using 688 // `[Document.pages][page_refs.page]` to locate the related page element. 689 // This field is skipped when its value is the default `0`. See 690 // https://developers.google.com/protocol-buffers/docs/proto3#json. 691 int64 page = 1 [(google.api.field_behavior) = REQUIRED]; 692 693 // Optional. The type of the layout element that is being referenced if any. 694 LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL]; 695 696 // Optional. Deprecated. Use [PageRef.bounding_poly][google.cloud.documentai.v1.Document.PageAnchor.PageRef.bounding_poly] instead. 697 string layout_id = 3 [ 698 deprecated = true, 699 (google.api.field_behavior) = OPTIONAL 700 ]; 701 702 // Optional. Identifies the bounding polygon of a layout element on the page. 703 // If `layout_type` is set, the bounding polygon must be exactly the same 704 // to the layout element it's referring to. 705 BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL]; 706 707 // Optional. Confidence of detected page element, if applicable. Range `[0, 1]`. 708 float confidence = 5 [(google.api.field_behavior) = OPTIONAL]; 709 } 710 711 // One or more references to visual page elements 712 repeated PageRef page_refs = 1; 713 } 714 715 // Structure to identify provenance relationships between annotations in 716 // different revisions. 717 message Provenance { 718 // The parent element the current element is based on. Used for 719 // referencing/aligning, removal and replacement operations. 720 message Parent { 721 // The index of the index into current revision's parent_ids list. 722 int32 revision = 1; 723 724 // The index of the parent item in the corresponding item list (eg. list 725 // of entities, properties within entities, etc.) in the parent revision. 726 int32 index = 3; 727 728 // The id of the parent provenance. 729 int32 id = 2 [deprecated = true]; 730 } 731 732 // If a processor or agent does an explicit operation on existing elements. 733 enum OperationType { 734 // Operation type unspecified. If no operation is specified a provenance 735 // entry is simply used to match against a `parent`. 736 OPERATION_TYPE_UNSPECIFIED = 0; 737 738 // Add an element. 739 ADD = 1; 740 741 // Remove an element identified by `parent`. 742 REMOVE = 2; 743 744 // Updates any fields within the given provenance scope of the message. It 745 // overwrites the fields rather than replacing them. Use this when you 746 // want to update a field value of an entity without also updating all the 747 // child properties. 748 UPDATE = 7; 749 750 // Currently unused. Replace an element identified by `parent`. 751 REPLACE = 3; 752 753 // Deprecated. Request human review for the element identified by 754 // `parent`. 755 EVAL_REQUESTED = 4 [deprecated = true]; 756 757 // Deprecated. Element is reviewed and approved at human review, 758 // confidence will be set to 1.0. 759 EVAL_APPROVED = 5 [deprecated = true]; 760 761 // Deprecated. Element is skipped in the validation process. 762 EVAL_SKIPPED = 6 [deprecated = true]; 763 } 764 765 // The index of the revision that produced this element. 766 int32 revision = 1 [deprecated = true]; 767 768 // The Id of this operation. Needs to be unique within the scope of the 769 // revision. 770 int32 id = 2 [deprecated = true]; 771 772 // References to the original elements that are replaced. 773 repeated Parent parents = 3; 774 775 // The type of provenance operation. 776 OperationType type = 4; 777 } 778 779 // Contains past or forward revisions of this document. 780 message Revision { 781 // Human Review information of the document. 782 message HumanReview { 783 // Human review state. e.g. `requested`, `succeeded`, `rejected`. 784 string state = 1; 785 786 // A message providing more details about the current state of processing. 787 // For example, the rejection reason when the state is `rejected`. 788 string state_message = 2; 789 } 790 791 // Who/what made the change 792 oneof source { 793 // If the change was made by a person specify the name or id of that 794 // person. 795 string agent = 4; 796 797 // If the annotation was made by processor identify the processor by its 798 // resource name. 799 string processor = 5; 800 } 801 802 // Id of the revision, internally generated by doc proto storage. 803 // Unique within the context of the document. 804 string id = 1; 805 806 // The revisions that this revision is based on. This can include one or 807 // more parent (when documents are merged.) This field represents the 808 // index into the `revisions` field. 809 repeated int32 parent = 2 [deprecated = true]; 810 811 // The revisions that this revision is based on. Must include all the ids 812 // that have anything to do with this revision - eg. there are 813 // `provenance.parent.revision` fields that index into this field. 814 repeated string parent_ids = 7; 815 816 // The time that the revision was created, internally generated by 817 // doc proto storage at the time of create. 818 google.protobuf.Timestamp create_time = 3; 819 820 // Human Review information of this revision. 821 HumanReview human_review = 6; 822 } 823 824 // This message is used for text changes aka. OCR corrections. 825 message TextChange { 826 // Provenance of the correction. 827 // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text]. There can only be a 828 // single `TextAnchor.text_segments` element. If the start and 829 // end index of the text segment are the same, the text change is inserted 830 // before that index. 831 TextAnchor text_anchor = 1; 832 833 // The text that replaces the text identified in the `text_anchor`. 834 string changed_text = 2; 835 836 // The history of this annotation. 837 repeated Provenance provenance = 3 [deprecated = true]; 838 } 839 840 // Original source document from the user. 841 oneof source { 842 // Optional. Currently supports Google Cloud Storage URI of the form 843 // `gs://bucket_name/object_name`. Object versioning is not supported. 844 // For more information, refer to [Google Cloud Storage Request 845 // URIs](https://cloud.google.com/storage/docs/reference-uris). 846 string uri = 1 [(google.api.field_behavior) = OPTIONAL]; 847 848 // Optional. Inline document content, represented as a stream of bytes. 849 // Note: As with all `bytes` fields, protobuffers use a pure binary 850 // representation, whereas JSON representations use base64. 851 bytes content = 2 [(google.api.field_behavior) = OPTIONAL]; 852 } 853 854 // An IANA published [media type (MIME 855 // type)](https://www.iana.org/assignments/media-types/media-types.xhtml). 856 string mime_type = 3; 857 858 // Optional. UTF-8 encoded text in reading order from the document. 859 string text = 4 [(google.api.field_behavior) = OPTIONAL]; 860 861 // Styles for the [Document.text][google.cloud.documentai.v1.Document.text]. 862 repeated Style text_styles = 5 [deprecated = true]; 863 864 // Visual page layout for the [Document][google.cloud.documentai.v1.Document]. 865 repeated Page pages = 6; 866 867 // A list of entities detected on [Document.text][google.cloud.documentai.v1.Document.text]. For document shards, 868 // entities in this list may cross shard boundaries. 869 repeated Entity entities = 7; 870 871 // Placeholder. Relationship among [Document.entities][google.cloud.documentai.v1.Document.entities]. 872 repeated EntityRelation entity_relations = 8; 873 874 // Placeholder. A list of text corrections made to [Document.text][google.cloud.documentai.v1.Document.text]. This 875 // is usually used for annotating corrections to OCR mistakes. Text changes 876 // for a given revision may not overlap with each other. 877 repeated TextChange text_changes = 14; 878 879 // Information about the sharding if this document is sharded part of a larger 880 // document. If the document is not sharded, this message is not specified. 881 ShardInfo shard_info = 9; 882 883 // Any error that occurred while processing this document. 884 google.rpc.Status error = 10; 885 886 // Placeholder. Revision history of this document. 887 repeated Revision revisions = 13; 888} 889