xref: /aosp_15_r20/external/googleapis/google/cloud/documentai/v1beta3/dataset.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1*d5c09012SAndroid Build Coastguard Worker// Copyright 2023 Google LLC
2*d5c09012SAndroid Build Coastguard Worker//
3*d5c09012SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License");
4*d5c09012SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License.
5*d5c09012SAndroid Build Coastguard Worker// You may obtain a copy of the License at
6*d5c09012SAndroid Build Coastguard Worker//
7*d5c09012SAndroid Build Coastguard Worker//     http://www.apache.org/licenses/LICENSE-2.0
8*d5c09012SAndroid Build Coastguard Worker//
9*d5c09012SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software
10*d5c09012SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS,
11*d5c09012SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*d5c09012SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and
13*d5c09012SAndroid Build Coastguard Worker// limitations under the License.
14*d5c09012SAndroid Build Coastguard Worker
15*d5c09012SAndroid Build Coastguard Workersyntax = "proto3";
16*d5c09012SAndroid Build Coastguard Worker
17*d5c09012SAndroid Build Coastguard Workerpackage google.cloud.documentai.v1beta3;
18*d5c09012SAndroid Build Coastguard Worker
19*d5c09012SAndroid Build Coastguard Workerimport "google/api/field_behavior.proto";
20*d5c09012SAndroid Build Coastguard Workerimport "google/api/resource.proto";
21*d5c09012SAndroid Build Coastguard Workerimport "google/cloud/documentai/v1beta3/document.proto";
22*d5c09012SAndroid Build Coastguard Workerimport "google/cloud/documentai/v1beta3/document_io.proto";
23*d5c09012SAndroid Build Coastguard Workerimport "google/cloud/documentai/v1beta3/document_schema.proto";
24*d5c09012SAndroid Build Coastguard Worker
25*d5c09012SAndroid Build Coastguard Workeroption csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
26*d5c09012SAndroid Build Coastguard Workeroption go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb";
27*d5c09012SAndroid Build Coastguard Workeroption java_multiple_files = true;
28*d5c09012SAndroid Build Coastguard Workeroption java_outer_classname = "DatasetProto";
29*d5c09012SAndroid Build Coastguard Workeroption java_package = "com.google.cloud.documentai.v1beta3";
30*d5c09012SAndroid Build Coastguard Workeroption php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
31*d5c09012SAndroid Build Coastguard Workeroption ruby_package = "Google::Cloud::DocumentAI::V1beta3";
32*d5c09012SAndroid Build Coastguard Workeroption (google.api.resource_definition) = {
33*d5c09012SAndroid Build Coastguard Worker  type: "contentwarehouse.googleapis.com/Schema"
34*d5c09012SAndroid Build Coastguard Worker  pattern: "projects/{project}/locations/{location}/schemas/{schema}"
35*d5c09012SAndroid Build Coastguard Worker};
36*d5c09012SAndroid Build Coastguard Worker
37*d5c09012SAndroid Build Coastguard Worker// A singleton resource under a
38*d5c09012SAndroid Build Coastguard Worker// [Processor][google.cloud.documentai.v1beta3.Processor] which configures a
39*d5c09012SAndroid Build Coastguard Worker// collection of documents.
40*d5c09012SAndroid Build Coastguard Workermessage Dataset {
41*d5c09012SAndroid Build Coastguard Worker  option (google.api.resource) = {
42*d5c09012SAndroid Build Coastguard Worker    type: "documentai.googleapis.com/Dataset"
43*d5c09012SAndroid Build Coastguard Worker    pattern: "projects/{project}/locations/{location}/processors/{processor}/dataset"
44*d5c09012SAndroid Build Coastguard Worker  };
45*d5c09012SAndroid Build Coastguard Worker
46*d5c09012SAndroid Build Coastguard Worker  // Configuration specific to the Cloud Storage-based implementation.
47*d5c09012SAndroid Build Coastguard Worker  message GCSManagedConfig {
48*d5c09012SAndroid Build Coastguard Worker    // Required. The Cloud Storage URI (a directory) where the documents
49*d5c09012SAndroid Build Coastguard Worker    // belonging to the dataset must be stored.
50*d5c09012SAndroid Build Coastguard Worker    GcsPrefix gcs_prefix = 1 [(google.api.field_behavior) = REQUIRED];
51*d5c09012SAndroid Build Coastguard Worker  }
52*d5c09012SAndroid Build Coastguard Worker
53*d5c09012SAndroid Build Coastguard Worker  // Configuration specific to the Document AI Warehouse-based implementation.
54*d5c09012SAndroid Build Coastguard Worker  message DocumentWarehouseConfig {
55*d5c09012SAndroid Build Coastguard Worker    // Output only. The collection in Document AI Warehouse associated with the
56*d5c09012SAndroid Build Coastguard Worker    // dataset.
57*d5c09012SAndroid Build Coastguard Worker    string collection = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
58*d5c09012SAndroid Build Coastguard Worker
59*d5c09012SAndroid Build Coastguard Worker    // Output only. The schema in Document AI Warehouse associated with the
60*d5c09012SAndroid Build Coastguard Worker    // dataset.
61*d5c09012SAndroid Build Coastguard Worker    string schema = 2 [
62*d5c09012SAndroid Build Coastguard Worker      (google.api.field_behavior) = OUTPUT_ONLY,
63*d5c09012SAndroid Build Coastguard Worker      (google.api.resource_reference) = {
64*d5c09012SAndroid Build Coastguard Worker        type: "contentwarehouse.googleapis.com/Schema"
65*d5c09012SAndroid Build Coastguard Worker      }
66*d5c09012SAndroid Build Coastguard Worker    ];
67*d5c09012SAndroid Build Coastguard Worker  }
68*d5c09012SAndroid Build Coastguard Worker
69*d5c09012SAndroid Build Coastguard Worker  // Configuration specific to an unmanaged dataset.
70*d5c09012SAndroid Build Coastguard Worker  message UnmanagedDatasetConfig {}
71*d5c09012SAndroid Build Coastguard Worker
72*d5c09012SAndroid Build Coastguard Worker  // Configuration specific to spanner-based indexing.
73*d5c09012SAndroid Build Coastguard Worker  message SpannerIndexingConfig {}
74*d5c09012SAndroid Build Coastguard Worker
75*d5c09012SAndroid Build Coastguard Worker  // Different states of a dataset.
76*d5c09012SAndroid Build Coastguard Worker  enum State {
77*d5c09012SAndroid Build Coastguard Worker    // Default unspecified enum, should not be used.
78*d5c09012SAndroid Build Coastguard Worker    STATE_UNSPECIFIED = 0;
79*d5c09012SAndroid Build Coastguard Worker
80*d5c09012SAndroid Build Coastguard Worker    // Dataset has not been initialized.
81*d5c09012SAndroid Build Coastguard Worker    UNINITIALIZED = 1;
82*d5c09012SAndroid Build Coastguard Worker
83*d5c09012SAndroid Build Coastguard Worker    // Dataset is being initialized.
84*d5c09012SAndroid Build Coastguard Worker    INITIALIZING = 2;
85*d5c09012SAndroid Build Coastguard Worker
86*d5c09012SAndroid Build Coastguard Worker    // Dataset has been initialized.
87*d5c09012SAndroid Build Coastguard Worker    INITIALIZED = 3;
88*d5c09012SAndroid Build Coastguard Worker  }
89*d5c09012SAndroid Build Coastguard Worker
90*d5c09012SAndroid Build Coastguard Worker  oneof storage_source {
91*d5c09012SAndroid Build Coastguard Worker    // Optional. User-managed Cloud Storage dataset configuration. Use this
92*d5c09012SAndroid Build Coastguard Worker    // configuration if the dataset documents are stored under a user-managed
93*d5c09012SAndroid Build Coastguard Worker    // Cloud Storage location.
94*d5c09012SAndroid Build Coastguard Worker    GCSManagedConfig gcs_managed_config = 3
95*d5c09012SAndroid Build Coastguard Worker        [(google.api.field_behavior) = OPTIONAL];
96*d5c09012SAndroid Build Coastguard Worker
97*d5c09012SAndroid Build Coastguard Worker    // Optional. Deprecated. Warehouse-based dataset configuration is not
98*d5c09012SAndroid Build Coastguard Worker    // supported.
99*d5c09012SAndroid Build Coastguard Worker    DocumentWarehouseConfig document_warehouse_config = 5
100*d5c09012SAndroid Build Coastguard Worker        [deprecated = true, (google.api.field_behavior) = OPTIONAL];
101*d5c09012SAndroid Build Coastguard Worker
102*d5c09012SAndroid Build Coastguard Worker    // Optional. Unmanaged dataset configuration. Use this configuration if the
103*d5c09012SAndroid Build Coastguard Worker    // dataset documents are managed by the document service internally (not
104*d5c09012SAndroid Build Coastguard Worker    // user-managed).
105*d5c09012SAndroid Build Coastguard Worker    UnmanagedDatasetConfig unmanaged_dataset_config = 6
106*d5c09012SAndroid Build Coastguard Worker        [(google.api.field_behavior) = OPTIONAL];
107*d5c09012SAndroid Build Coastguard Worker  }
108*d5c09012SAndroid Build Coastguard Worker
109*d5c09012SAndroid Build Coastguard Worker  oneof indexing_source {
110*d5c09012SAndroid Build Coastguard Worker    // Optional. A lightweight indexing source with low latency and high
111*d5c09012SAndroid Build Coastguard Worker    // reliability, but lacking advanced features like CMEK and content-based
112*d5c09012SAndroid Build Coastguard Worker    // search.
113*d5c09012SAndroid Build Coastguard Worker    SpannerIndexingConfig spanner_indexing_config = 4
114*d5c09012SAndroid Build Coastguard Worker        [(google.api.field_behavior) = OPTIONAL];
115*d5c09012SAndroid Build Coastguard Worker  }
116*d5c09012SAndroid Build Coastguard Worker
117*d5c09012SAndroid Build Coastguard Worker  // Dataset resource name.
118*d5c09012SAndroid Build Coastguard Worker  // Format:
119*d5c09012SAndroid Build Coastguard Worker  // `projects/{project}/locations/{location}/processors/{processor}/dataset`
120*d5c09012SAndroid Build Coastguard Worker  string name = 1;
121*d5c09012SAndroid Build Coastguard Worker
122*d5c09012SAndroid Build Coastguard Worker  // Required. State of the dataset. Ignored when updating dataset.
123*d5c09012SAndroid Build Coastguard Worker  State state = 2 [(google.api.field_behavior) = REQUIRED];
124*d5c09012SAndroid Build Coastguard Worker}
125*d5c09012SAndroid Build Coastguard Worker
126*d5c09012SAndroid Build Coastguard Worker// Document Identifier.
127*d5c09012SAndroid Build Coastguard Workermessage DocumentId {
128*d5c09012SAndroid Build Coastguard Worker  // Identifies a document uniquely within the scope of a dataset in the
129*d5c09012SAndroid Build Coastguard Worker  // user-managed Cloud Storage option.
130*d5c09012SAndroid Build Coastguard Worker  message GCSManagedDocumentId {
131*d5c09012SAndroid Build Coastguard Worker    // Required. The Cloud Storage URI where the actual document is stored.
132*d5c09012SAndroid Build Coastguard Worker    string gcs_uri = 1 [(google.api.field_behavior) = REQUIRED];
133*d5c09012SAndroid Build Coastguard Worker
134*d5c09012SAndroid Build Coastguard Worker    // Id of the document (indexed) managed by Content Warehouse.
135*d5c09012SAndroid Build Coastguard Worker    string cw_doc_id = 2 [deprecated = true];
136*d5c09012SAndroid Build Coastguard Worker  }
137*d5c09012SAndroid Build Coastguard Worker
138*d5c09012SAndroid Build Coastguard Worker  // Identifies a document uniquely within the scope of a dataset in unmanaged
139*d5c09012SAndroid Build Coastguard Worker  // option.
140*d5c09012SAndroid Build Coastguard Worker  message UnmanagedDocumentId {
141*d5c09012SAndroid Build Coastguard Worker    // Required. The id of the document.
142*d5c09012SAndroid Build Coastguard Worker    string doc_id = 1 [(google.api.field_behavior) = REQUIRED];
143*d5c09012SAndroid Build Coastguard Worker  }
144*d5c09012SAndroid Build Coastguard Worker
145*d5c09012SAndroid Build Coastguard Worker  oneof type {
146*d5c09012SAndroid Build Coastguard Worker    // A document id within user-managed Cloud Storage.
147*d5c09012SAndroid Build Coastguard Worker    GCSManagedDocumentId gcs_managed_doc_id = 1;
148*d5c09012SAndroid Build Coastguard Worker
149*d5c09012SAndroid Build Coastguard Worker    // A document id within unmanaged dataset.
150*d5c09012SAndroid Build Coastguard Worker    UnmanagedDocumentId unmanaged_doc_id = 4;
151*d5c09012SAndroid Build Coastguard Worker  }
152*d5c09012SAndroid Build Coastguard Worker
153*d5c09012SAndroid Build Coastguard Worker  // Points to a specific revision of the document if set.
154*d5c09012SAndroid Build Coastguard Worker  RevisionRef revision_ref = 3;
155*d5c09012SAndroid Build Coastguard Worker}
156*d5c09012SAndroid Build Coastguard Worker
157*d5c09012SAndroid Build Coastguard Worker// Dataset Schema.
158*d5c09012SAndroid Build Coastguard Workermessage DatasetSchema {
159*d5c09012SAndroid Build Coastguard Worker  option (google.api.resource) = {
160*d5c09012SAndroid Build Coastguard Worker    type: "documentai.googleapis.com/DatasetSchema"
161*d5c09012SAndroid Build Coastguard Worker    pattern: "projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema"
162*d5c09012SAndroid Build Coastguard Worker  };
163*d5c09012SAndroid Build Coastguard Worker
164*d5c09012SAndroid Build Coastguard Worker  // Dataset schema resource name.
165*d5c09012SAndroid Build Coastguard Worker  // Format:
166*d5c09012SAndroid Build Coastguard Worker  // `projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema`
167*d5c09012SAndroid Build Coastguard Worker  string name = 1;
168*d5c09012SAndroid Build Coastguard Worker
169*d5c09012SAndroid Build Coastguard Worker  // Optional. Schema of the dataset.
170*d5c09012SAndroid Build Coastguard Worker  DocumentSchema document_schema = 3 [(google.api.field_behavior) = OPTIONAL];
171*d5c09012SAndroid Build Coastguard Worker}
172*d5c09012SAndroid Build Coastguard Worker
173*d5c09012SAndroid Build Coastguard Worker// Dataset documents that the batch operation will be applied to.
174*d5c09012SAndroid Build Coastguard Workermessage BatchDatasetDocuments {
175*d5c09012SAndroid Build Coastguard Worker  // List of individual DocumentIds.
176*d5c09012SAndroid Build Coastguard Worker  message IndividualDocumentIds {
177*d5c09012SAndroid Build Coastguard Worker    // Required. List of Document IDs indicating where the actual documents are
178*d5c09012SAndroid Build Coastguard Worker    // stored.
179*d5c09012SAndroid Build Coastguard Worker    repeated DocumentId document_ids = 1
180*d5c09012SAndroid Build Coastguard Worker        [(google.api.field_behavior) = REQUIRED];
181*d5c09012SAndroid Build Coastguard Worker  }
182*d5c09012SAndroid Build Coastguard Worker
183*d5c09012SAndroid Build Coastguard Worker  oneof criteria {
184*d5c09012SAndroid Build Coastguard Worker    // Document identifiers.
185*d5c09012SAndroid Build Coastguard Worker    IndividualDocumentIds individual_document_ids = 1;
186*d5c09012SAndroid Build Coastguard Worker
187*d5c09012SAndroid Build Coastguard Worker    // A filter matching the documents.
188*d5c09012SAndroid Build Coastguard Worker    // Follows the same format and restriction as
189*d5c09012SAndroid Build Coastguard Worker    // [google.cloud.documentai.master.ListDocumentsRequest.filter].
190*d5c09012SAndroid Build Coastguard Worker    string filter = 2;
191*d5c09012SAndroid Build Coastguard Worker  }
192*d5c09012SAndroid Build Coastguard Worker}
193