xref: /aosp_15_r20/external/googleapis/google/cloud/documentai/v1/document_schema.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.documentai.v1;
18
19option csharp_namespace = "Google.Cloud.DocumentAI.V1";
20option go_package = "cloud.google.com/go/documentai/apiv1/documentaipb;documentaipb";
21option java_multiple_files = true;
22option java_outer_classname = "DocumentAiDocumentSchema";
23option java_package = "com.google.cloud.documentai.v1";
24option php_namespace = "Google\\Cloud\\DocumentAI\\V1";
25option ruby_package = "Google::Cloud::DocumentAI::V1";
26
27// The schema defines the output of the processed document by a processor.
28message DocumentSchema {
29  // EntityType is the wrapper of a label of the corresponding model with
30  // detailed attributes and limitations for entity-based processors. Multiple
31  // types can also compose a dependency tree to represent nested types.
32  message EntityType {
33    // Defines the a list of enum values.
34    message EnumValues {
35      // The individual values that this enum values type can include.
36      repeated string values = 1;
37    }
38
39    // Defines properties that can be part of the entity type.
40    message Property {
41      // Types of occurrences of the entity type in the document.  This
42      // represents the number of instances, not mentions, of an entity.
43      // For example, a bank statement might only have one
44      // `account_number`, but this account number can be mentioned in several
45      // places on the document.  In this case, the `account_number` is
46      // considered a `REQUIRED_ONCE` entity type. If, on the other hand, we
47      // expect a bank statement to contain the status of multiple different
48      // accounts for the customers, the occurrence type is set to
49      // `REQUIRED_MULTIPLE`.
50      enum OccurrenceType {
51        // Unspecified occurrence type.
52        OCCURRENCE_TYPE_UNSPECIFIED = 0;
53
54        // There will be zero or one instance of this entity type.  The same
55        // entity instance may be mentioned multiple times.
56        OPTIONAL_ONCE = 1;
57
58        // The entity type will appear zero or multiple times.
59        OPTIONAL_MULTIPLE = 2;
60
61        // The entity type will only appear exactly once.  The same
62        // entity instance may be mentioned multiple times.
63        REQUIRED_ONCE = 3;
64
65        // The entity type will appear once or more times.
66        REQUIRED_MULTIPLE = 4;
67      }
68
69      // The name of the property.  Follows the same guidelines as the
70      // EntityType name.
71      string name = 1;
72
73      // User defined name for the property.
74      string display_name = 6;
75
76      // A reference to the value type of the property.  This type is subject
77      // to the same conventions as the `Entity.base_types` field.
78      string value_type = 2;
79
80      // Occurrence type limits the number of instances an entity type appears
81      // in the document.
82      OccurrenceType occurrence_type = 3;
83    }
84
85    oneof value_source {
86      // If specified, lists all the possible values for this entity.  This
87      // should not be more than a handful of values.  If the number of values
88      // is >10 or could change frequently use the `EntityType.value_ontology`
89      // field and specify a list of all possible values in a value ontology
90      // file.
91      EnumValues enum_values = 14;
92    }
93
94    // User defined name for the type.
95    string display_name = 13;
96
97    // Name of the type. It must be unique within the schema file and
98    // cannot be a "Common Type".  The following naming conventions are used:
99    //
100    // - Use `snake_casing`.
101    // - Name matching is case-sensitive.
102    // - Maximum 64 characters.
103    // - Must start with a letter.
104    // - Allowed characters: ASCII letters `[a-z0-9_-]`.  (For backward
105    //   compatibility internal infrastructure and tooling can handle any ascii
106    //   character.)
107    // - The `/` is sometimes used to denote a property of a type.  For example
108    //   `line_item/amount`.  This convention is deprecated, but will still be
109    //   honored for backward compatibility.
110    string name = 1;
111
112    // The entity type that this type is derived from.  For now, one and only
113    // one should be set.
114    repeated string base_types = 2;
115
116    // Description the nested structure, or composition of an entity.
117    repeated Property properties = 6;
118  }
119
120  // Metadata for global schema behavior.
121  message Metadata {
122    // If true, a `document` entity type can be applied to subdocument
123    // (splitting). Otherwise, it can only be applied to the entire document
124    // (classification).
125    bool document_splitter = 1;
126
127    // If true, on a given page, there can be multiple `document` annotations
128    // covering it.
129    bool document_allow_multiple_labels = 2;
130
131    // If set, all the nested entities must be prefixed with the parents.
132    bool prefixed_naming_on_properties = 6;
133
134    // If set, we will skip the naming format validation in the schema. So the
135    // string values in `DocumentSchema.EntityType.name` and
136    // `DocumentSchema.EntityType.Property.name` will not be checked.
137    bool skip_naming_validation = 7;
138  }
139
140  // Display name to show to users.
141  string display_name = 1;
142
143  // Description of the schema.
144  string description = 2;
145
146  // Entity types of the schema.
147  repeated EntityType entity_types = 3;
148
149  // Metadata of the schema.
150  Metadata metadata = 4;
151}
152