xref: /aosp_15_r20/external/googleapis/google/cloud/discoveryengine/v1alpha/document.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.discoveryengine.v1alpha;
18
19import "google/api/field_behavior.proto";
20import "google/api/resource.proto";
21import "google/cloud/discoveryengine/v1alpha/common.proto";
22import "google/protobuf/struct.proto";
23import "google/protobuf/timestamp.proto";
24
25option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Alpha";
26option go_package = "cloud.google.com/go/discoveryengine/apiv1alpha/discoveryenginepb;discoveryenginepb";
27option java_multiple_files = true;
28option java_outer_classname = "DocumentProto";
29option java_package = "com.google.cloud.discoveryengine.v1alpha";
30option objc_class_prefix = "DISCOVERYENGINE";
31option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1alpha";
32option ruby_package = "Google::Cloud::DiscoveryEngine::V1alpha";
33
34// Document captures all raw metadata information of items to be recommended or
35// searched.
36message Document {
37  option (google.api.resource) = {
38    type: "discoveryengine.googleapis.com/Document"
39    pattern: "projects/{project}/locations/{location}/dataStores/{data_store}/branches/{branch}/documents/{document}"
40    pattern: "projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document}"
41  };
42
43  // Unstructured data linked to this document.
44  message Content {
45    oneof content {
46      // The content represented as a stream of bytes. The maximum length is
47      // 1,000,000 bytes (1 MB / ~0.95 MiB).
48      //
49      // Note: As with all `bytes` fields, this field is represented as pure
50      // binary in Protocol Buffers and base64-encoded string in JSON. For
51      // example, `abc123!?$*&()'-=@~` should be represented as
52      // `YWJjMTIzIT8kKiYoKSctPUB+` in JSON. See
53      // https://developers.google.com/protocol-buffers/docs/proto3#json.
54      bytes raw_bytes = 2;
55
56      // The URI of the content. Only Cloud Storage URIs (e.g.
57      // `gs://bucket-name/path/to/file`) are supported. The maximum file size
58      // is 2.5 MB for text-based formats, 100 MB for other formats.
59      string uri = 3;
60    }
61
62    // The MIME type of the content. Supported types:
63    //
64    // * `application/pdf` (PDF, only native PDFs are supported for now)
65    // * `text/html` (HTML)
66    // * `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (DOCX)
67    // * `application/vnd.openxmlformats-officedocument.presentationml.presentation` (PPTX)
68    // * `text/plain` (TXT)
69    //
70    // See https://www.iana.org/assignments/media-types/media-types.xhtml.
71    string mime_type = 1;
72  }
73
74  // ACL Information of the Document.
75  message AclInfo {
76    // AclRestriction to model complex inheritance restrictions.
77    //
78    // Example: Modeling a "Both Permit" inheritance, where to access a
79    // child document, user needs to have access to parent document.
80    //
81    // Document Hierarchy - Space_S --> Page_P.
82    //
83    // Readers:
84    //   Space_S: group_1, user_1
85    //   Page_P: group_2, group_3, user_2
86    //
87    // Space_S ACL Restriction -
88    // {
89    //   "acl_info": {
90    //     "readers": [
91    //       {
92    //         "principals": [
93    //           {
94    //             "group_id": "group_1"
95    //           },
96    //           {
97    //             "user_id": "user_1"
98    //           }
99    //         ]
100    //       }
101    //     ]
102    //   }
103    // }
104    //
105    // Page_P ACL Restriction.
106    // {
107    //   "acl_info": {
108    //     "readers": [
109    //       {
110    //         "principals": [
111    //           {
112    //             "group_id": "group_2"
113    //           },
114    //           {
115    //             "group_id": "group_3"
116    //           },
117    //           {
118    //             "user_id": "user_2"
119    //           }
120    //         ],
121    //       },
122    //       {
123    //         "principals": [
124    //           {
125    //             "group_id": "group_1"
126    //           },
127    //           {
128    //             "user_id": "user_1"
129    //           }
130    //         ],
131    //       }
132    //     ]
133    //   }
134    // }
135    message AccessRestriction {
136      // List of principals.
137      repeated Principal principals = 1;
138    }
139
140    // Readers of the document.
141    repeated AccessRestriction readers = 1;
142  }
143
144  // Data representation. One of
145  // [struct_data][google.cloud.discoveryengine.v1alpha.Document.struct_data] or
146  // [json_data][google.cloud.discoveryengine.v1alpha.Document.json_data] should
147  // be provided otherwise an `INVALID_ARGUMENT` error is thrown.
148  oneof data {
149    // The structured JSON data for the document. It should conform to the
150    // registered [Schema][google.cloud.discoveryengine.v1alpha.Schema] or an
151    // `INVALID_ARGUMENT` error is thrown.
152    google.protobuf.Struct struct_data = 4;
153
154    // The JSON string representation of the document. It should conform to the
155    // registered [Schema][google.cloud.discoveryengine.v1alpha.Schema] or an
156    // `INVALID_ARGUMENT` error is thrown.
157    string json_data = 5;
158  }
159
160  // Immutable. The full resource name of the document.
161  // Format:
162  // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document_id}`.
163  //
164  // This field must be a UTF-8 encoded string with a length limit of 1024
165  // characters.
166  string name = 1 [(google.api.field_behavior) = IMMUTABLE];
167
168  // Immutable. The identifier of the document.
169  //
170  // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034)
171  // standard with a length limit of 63 characters.
172  string id = 2 [(google.api.field_behavior) = IMMUTABLE];
173
174  // The identifier of the schema located in the same data store.
175  string schema_id = 3;
176
177  // The unstructured data linked to this document. Content must be set if this
178  // document is under a
179  // `CONTENT_REQUIRED` data store.
180  Content content = 10;
181
182  // The identifier of the parent document. Currently supports at most two level
183  // document hierarchy.
184  //
185  // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034)
186  // standard with a length limit of 63 characters.
187  string parent_document_id = 7;
188
189  // Output only. This field is OUTPUT_ONLY.
190  // It contains derived data that are not in the original input document.
191  google.protobuf.Struct derived_struct_data = 6
192      [(google.api.field_behavior) = OUTPUT_ONLY];
193
194  // Access control information for the document.
195  AclInfo acl_info = 11;
196
197  // Output only. The last time the document was indexed. If this field is set,
198  // the document could be returned in search results.
199  //
200  // This field is OUTPUT_ONLY. If this field is not populated, it means the
201  // document has never been indexed.
202  google.protobuf.Timestamp index_time = 13
203      [(google.api.field_behavior) = OUTPUT_ONLY];
204}
205
206// Document captures all raw metadata information of items to be recommended or
207// searched.
208message ProcessedDocument {
209  // Output format of the processed document.
210  oneof processed_data_format {
211    // The JSON string representation of the processed document.
212    string json_data = 2;
213  }
214
215  // Required. Full resource name of the referenced document, in the format
216  // `projects/*/locations/*/collections/*/dataStores/*/branches/*/documents/*`.
217  string document = 1 [
218    (google.api.field_behavior) = REQUIRED,
219    (google.api.resource_reference) = {
220      type: "discoveryengine.googleapis.com/Document"
221    }
222  ];
223}
224