1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.discoveryengine.v1alpha; 18 19import "google/api/field_behavior.proto"; 20import "google/api/resource.proto"; 21import "google/cloud/discoveryengine/v1alpha/common.proto"; 22import "google/protobuf/struct.proto"; 23import "google/protobuf/timestamp.proto"; 24 25option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Alpha"; 26option go_package = "cloud.google.com/go/discoveryengine/apiv1alpha/discoveryenginepb;discoveryenginepb"; 27option java_multiple_files = true; 28option java_outer_classname = "DocumentProto"; 29option java_package = "com.google.cloud.discoveryengine.v1alpha"; 30option objc_class_prefix = "DISCOVERYENGINE"; 31option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1alpha"; 32option ruby_package = "Google::Cloud::DiscoveryEngine::V1alpha"; 33 34// Document captures all raw metadata information of items to be recommended or 35// searched. 36message Document { 37 option (google.api.resource) = { 38 type: "discoveryengine.googleapis.com/Document" 39 pattern: "projects/{project}/locations/{location}/dataStores/{data_store}/branches/{branch}/documents/{document}" 40 pattern: "projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document}" 41 }; 42 43 // Unstructured data linked to this document. 44 message Content { 45 oneof content { 46 // The content represented as a stream of bytes. The maximum length is 47 // 1,000,000 bytes (1 MB / ~0.95 MiB). 48 // 49 // Note: As with all `bytes` fields, this field is represented as pure 50 // binary in Protocol Buffers and base64-encoded string in JSON. For 51 // example, `abc123!?$*&()'-=@~` should be represented as 52 // `YWJjMTIzIT8kKiYoKSctPUB+` in JSON. See 53 // https://developers.google.com/protocol-buffers/docs/proto3#json. 54 bytes raw_bytes = 2; 55 56 // The URI of the content. Only Cloud Storage URIs (e.g. 57 // `gs://bucket-name/path/to/file`) are supported. The maximum file size 58 // is 2.5 MB for text-based formats, 100 MB for other formats. 59 string uri = 3; 60 } 61 62 // The MIME type of the content. Supported types: 63 // 64 // * `application/pdf` (PDF, only native PDFs are supported for now) 65 // * `text/html` (HTML) 66 // * `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (DOCX) 67 // * `application/vnd.openxmlformats-officedocument.presentationml.presentation` (PPTX) 68 // * `text/plain` (TXT) 69 // 70 // See https://www.iana.org/assignments/media-types/media-types.xhtml. 71 string mime_type = 1; 72 } 73 74 // ACL Information of the Document. 75 message AclInfo { 76 // AclRestriction to model complex inheritance restrictions. 77 // 78 // Example: Modeling a "Both Permit" inheritance, where to access a 79 // child document, user needs to have access to parent document. 80 // 81 // Document Hierarchy - Space_S --> Page_P. 82 // 83 // Readers: 84 // Space_S: group_1, user_1 85 // Page_P: group_2, group_3, user_2 86 // 87 // Space_S ACL Restriction - 88 // { 89 // "acl_info": { 90 // "readers": [ 91 // { 92 // "principals": [ 93 // { 94 // "group_id": "group_1" 95 // }, 96 // { 97 // "user_id": "user_1" 98 // } 99 // ] 100 // } 101 // ] 102 // } 103 // } 104 // 105 // Page_P ACL Restriction. 106 // { 107 // "acl_info": { 108 // "readers": [ 109 // { 110 // "principals": [ 111 // { 112 // "group_id": "group_2" 113 // }, 114 // { 115 // "group_id": "group_3" 116 // }, 117 // { 118 // "user_id": "user_2" 119 // } 120 // ], 121 // }, 122 // { 123 // "principals": [ 124 // { 125 // "group_id": "group_1" 126 // }, 127 // { 128 // "user_id": "user_1" 129 // } 130 // ], 131 // } 132 // ] 133 // } 134 // } 135 message AccessRestriction { 136 // List of principals. 137 repeated Principal principals = 1; 138 } 139 140 // Readers of the document. 141 repeated AccessRestriction readers = 1; 142 } 143 144 // Data representation. One of 145 // [struct_data][google.cloud.discoveryengine.v1alpha.Document.struct_data] or 146 // [json_data][google.cloud.discoveryengine.v1alpha.Document.json_data] should 147 // be provided otherwise an `INVALID_ARGUMENT` error is thrown. 148 oneof data { 149 // The structured JSON data for the document. It should conform to the 150 // registered [Schema][google.cloud.discoveryengine.v1alpha.Schema] or an 151 // `INVALID_ARGUMENT` error is thrown. 152 google.protobuf.Struct struct_data = 4; 153 154 // The JSON string representation of the document. It should conform to the 155 // registered [Schema][google.cloud.discoveryengine.v1alpha.Schema] or an 156 // `INVALID_ARGUMENT` error is thrown. 157 string json_data = 5; 158 } 159 160 // Immutable. The full resource name of the document. 161 // Format: 162 // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document_id}`. 163 // 164 // This field must be a UTF-8 encoded string with a length limit of 1024 165 // characters. 166 string name = 1 [(google.api.field_behavior) = IMMUTABLE]; 167 168 // Immutable. The identifier of the document. 169 // 170 // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034) 171 // standard with a length limit of 63 characters. 172 string id = 2 [(google.api.field_behavior) = IMMUTABLE]; 173 174 // The identifier of the schema located in the same data store. 175 string schema_id = 3; 176 177 // The unstructured data linked to this document. Content must be set if this 178 // document is under a 179 // `CONTENT_REQUIRED` data store. 180 Content content = 10; 181 182 // The identifier of the parent document. Currently supports at most two level 183 // document hierarchy. 184 // 185 // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034) 186 // standard with a length limit of 63 characters. 187 string parent_document_id = 7; 188 189 // Output only. This field is OUTPUT_ONLY. 190 // It contains derived data that are not in the original input document. 191 google.protobuf.Struct derived_struct_data = 6 192 [(google.api.field_behavior) = OUTPUT_ONLY]; 193 194 // Access control information for the document. 195 AclInfo acl_info = 11; 196 197 // Output only. The last time the document was indexed. If this field is set, 198 // the document could be returned in search results. 199 // 200 // This field is OUTPUT_ONLY. If this field is not populated, it means the 201 // document has never been indexed. 202 google.protobuf.Timestamp index_time = 13 203 [(google.api.field_behavior) = OUTPUT_ONLY]; 204} 205 206// Document captures all raw metadata information of items to be recommended or 207// searched. 208message ProcessedDocument { 209 // Output format of the processed document. 210 oneof processed_data_format { 211 // The JSON string representation of the processed document. 212 string json_data = 2; 213 } 214 215 // Required. Full resource name of the referenced document, in the format 216 // `projects/*/locations/*/collections/*/dataStores/*/branches/*/documents/*`. 217 string document = 1 [ 218 (google.api.field_behavior) = REQUIRED, 219 (google.api.resource_reference) = { 220 type: "discoveryengine.googleapis.com/Document" 221 } 222 ]; 223} 224