1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.discoveryengine.v1beta; 18 19import "google/api/field_behavior.proto"; 20import "google/api/resource.proto"; 21import "google/protobuf/struct.proto"; 22import "google/protobuf/timestamp.proto"; 23 24option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Beta"; 25option go_package = "cloud.google.com/go/discoveryengine/apiv1beta/discoveryenginepb;discoveryenginepb"; 26option java_multiple_files = true; 27option java_outer_classname = "DocumentProto"; 28option java_package = "com.google.cloud.discoveryengine.v1beta"; 29option objc_class_prefix = "DISCOVERYENGINE"; 30option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1beta"; 31option ruby_package = "Google::Cloud::DiscoveryEngine::V1beta"; 32 33// Document captures all raw metadata information of items to be recommended or 34// searched. 35message Document { 36 option (google.api.resource) = { 37 type: "discoveryengine.googleapis.com/Document" 38 pattern: "projects/{project}/locations/{location}/dataStores/{data_store}/branches/{branch}/documents/{document}" 39 pattern: "projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document}" 40 }; 41 42 // Unstructured data linked to this document. 43 message Content { 44 oneof content { 45 // The content represented as a stream of bytes. The maximum length is 46 // 1,000,000 bytes (1 MB / ~0.95 MiB). 47 // 48 // Note: As with all `bytes` fields, this field is represented as pure 49 // binary in Protocol Buffers and base64-encoded string in JSON. For 50 // example, `abc123!?$*&()'-=@~` should be represented as 51 // `YWJjMTIzIT8kKiYoKSctPUB+` in JSON. See 52 // https://developers.google.com/protocol-buffers/docs/proto3#json. 53 bytes raw_bytes = 2; 54 55 // The URI of the content. Only Cloud Storage URIs (e.g. 56 // `gs://bucket-name/path/to/file`) are supported. The maximum file size 57 // is 2.5 MB for text-based formats, 100 MB for other formats. 58 string uri = 3; 59 } 60 61 // The MIME type of the content. Supported types: 62 // 63 // * `application/pdf` (PDF, only native PDFs are supported for now) 64 // * `text/html` (HTML) 65 // * `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (DOCX) 66 // * `application/vnd.openxmlformats-officedocument.presentationml.presentation` (PPTX) 67 // * `text/plain` (TXT) 68 // 69 // See https://www.iana.org/assignments/media-types/media-types.xhtml. 70 string mime_type = 1; 71 } 72 73 // Data representation. One of 74 // [struct_data][google.cloud.discoveryengine.v1beta.Document.struct_data] or 75 // [json_data][google.cloud.discoveryengine.v1beta.Document.json_data] should 76 // be provided otherwise an `INVALID_ARGUMENT` error is thrown. 77 oneof data { 78 // The structured JSON data for the document. It should conform to the 79 // registered [Schema][google.cloud.discoveryengine.v1beta.Schema] or an 80 // `INVALID_ARGUMENT` error is thrown. 81 google.protobuf.Struct struct_data = 4; 82 83 // The JSON string representation of the document. It should conform to the 84 // registered [Schema][google.cloud.discoveryengine.v1beta.Schema] or an 85 // `INVALID_ARGUMENT` error is thrown. 86 string json_data = 5; 87 } 88 89 // Immutable. The full resource name of the document. 90 // Format: 91 // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document_id}`. 92 // 93 // This field must be a UTF-8 encoded string with a length limit of 1024 94 // characters. 95 string name = 1 [(google.api.field_behavior) = IMMUTABLE]; 96 97 // Immutable. The identifier of the document. 98 // 99 // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034) 100 // standard with a length limit of 63 characters. 101 string id = 2 [(google.api.field_behavior) = IMMUTABLE]; 102 103 // The identifier of the schema located in the same data store. 104 string schema_id = 3; 105 106 // The unstructured data linked to this document. Content must be set if this 107 // document is under a 108 // `CONTENT_REQUIRED` data store. 109 Content content = 10; 110 111 // The identifier of the parent document. Currently supports at most two level 112 // document hierarchy. 113 // 114 // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034) 115 // standard with a length limit of 63 characters. 116 string parent_document_id = 7; 117 118 // Output only. This field is OUTPUT_ONLY. 119 // It contains derived data that are not in the original input document. 120 google.protobuf.Struct derived_struct_data = 6 121 [(google.api.field_behavior) = OUTPUT_ONLY]; 122 123 // Output only. The last time the document was indexed. If this field is set, 124 // the document could be returned in search results. 125 // 126 // This field is OUTPUT_ONLY. If this field is not populated, it means the 127 // document has never been indexed. 128 google.protobuf.Timestamp index_time = 13 129 [(google.api.field_behavior) = OUTPUT_ONLY]; 130} 131