1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.documentai.v1; 18 19option csharp_namespace = "Google.Cloud.DocumentAI.V1"; 20option go_package = "cloud.google.com/go/documentai/apiv1/documentaipb;documentaipb"; 21option java_multiple_files = true; 22option java_outer_classname = "DocumentAiDocumentSchema"; 23option java_package = "com.google.cloud.documentai.v1"; 24option php_namespace = "Google\\Cloud\\DocumentAI\\V1"; 25option ruby_package = "Google::Cloud::DocumentAI::V1"; 26 27// The schema defines the output of the processed document by a processor. 28message DocumentSchema { 29 // EntityType is the wrapper of a label of the corresponding model with 30 // detailed attributes and limitations for entity-based processors. Multiple 31 // types can also compose a dependency tree to represent nested types. 32 message EntityType { 33 // Defines the a list of enum values. 34 message EnumValues { 35 // The individual values that this enum values type can include. 36 repeated string values = 1; 37 } 38 39 // Defines properties that can be part of the entity type. 40 message Property { 41 // Types of occurrences of the entity type in the document. This 42 // represents the number of instances, not mentions, of an entity. 43 // For example, a bank statement might only have one 44 // `account_number`, but this account number can be mentioned in several 45 // places on the document. In this case, the `account_number` is 46 // considered a `REQUIRED_ONCE` entity type. If, on the other hand, we 47 // expect a bank statement to contain the status of multiple different 48 // accounts for the customers, the occurrence type is set to 49 // `REQUIRED_MULTIPLE`. 50 enum OccurrenceType { 51 // Unspecified occurrence type. 52 OCCURRENCE_TYPE_UNSPECIFIED = 0; 53 54 // There will be zero or one instance of this entity type. The same 55 // entity instance may be mentioned multiple times. 56 OPTIONAL_ONCE = 1; 57 58 // The entity type will appear zero or multiple times. 59 OPTIONAL_MULTIPLE = 2; 60 61 // The entity type will only appear exactly once. The same 62 // entity instance may be mentioned multiple times. 63 REQUIRED_ONCE = 3; 64 65 // The entity type will appear once or more times. 66 REQUIRED_MULTIPLE = 4; 67 } 68 69 // The name of the property. Follows the same guidelines as the 70 // EntityType name. 71 string name = 1; 72 73 // User defined name for the property. 74 string display_name = 6; 75 76 // A reference to the value type of the property. This type is subject 77 // to the same conventions as the `Entity.base_types` field. 78 string value_type = 2; 79 80 // Occurrence type limits the number of instances an entity type appears 81 // in the document. 82 OccurrenceType occurrence_type = 3; 83 } 84 85 oneof value_source { 86 // If specified, lists all the possible values for this entity. This 87 // should not be more than a handful of values. If the number of values 88 // is >10 or could change frequently use the `EntityType.value_ontology` 89 // field and specify a list of all possible values in a value ontology 90 // file. 91 EnumValues enum_values = 14; 92 } 93 94 // User defined name for the type. 95 string display_name = 13; 96 97 // Name of the type. It must be unique within the schema file and 98 // cannot be a "Common Type". The following naming conventions are used: 99 // 100 // - Use `snake_casing`. 101 // - Name matching is case-sensitive. 102 // - Maximum 64 characters. 103 // - Must start with a letter. 104 // - Allowed characters: ASCII letters `[a-z0-9_-]`. (For backward 105 // compatibility internal infrastructure and tooling can handle any ascii 106 // character.) 107 // - The `/` is sometimes used to denote a property of a type. For example 108 // `line_item/amount`. This convention is deprecated, but will still be 109 // honored for backward compatibility. 110 string name = 1; 111 112 // The entity type that this type is derived from. For now, one and only 113 // one should be set. 114 repeated string base_types = 2; 115 116 // Description the nested structure, or composition of an entity. 117 repeated Property properties = 6; 118 } 119 120 // Metadata for global schema behavior. 121 message Metadata { 122 // If true, a `document` entity type can be applied to subdocument 123 // (splitting). Otherwise, it can only be applied to the entire document 124 // (classification). 125 bool document_splitter = 1; 126 127 // If true, on a given page, there can be multiple `document` annotations 128 // covering it. 129 bool document_allow_multiple_labels = 2; 130 131 // If set, all the nested entities must be prefixed with the parents. 132 bool prefixed_naming_on_properties = 6; 133 134 // If set, we will skip the naming format validation in the schema. So the 135 // string values in `DocumentSchema.EntityType.name` and 136 // `DocumentSchema.EntityType.Property.name` will not be checked. 137 bool skip_naming_validation = 7; 138 } 139 140 // Display name to show to users. 141 string display_name = 1; 142 143 // Description of the schema. 144 string description = 2; 145 146 // Entity types of the schema. 147 repeated EntityType entity_types = 3; 148 149 // Metadata of the schema. 150 Metadata metadata = 4; 151} 152