xref: /aosp_15_r20/external/googleapis/google/cloud/discoveryengine/v1beta/document.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.discoveryengine.v1beta;
18
19import "google/api/field_behavior.proto";
20import "google/api/resource.proto";
21import "google/protobuf/struct.proto";
22import "google/protobuf/timestamp.proto";
23
24option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Beta";
25option go_package = "cloud.google.com/go/discoveryengine/apiv1beta/discoveryenginepb;discoveryenginepb";
26option java_multiple_files = true;
27option java_outer_classname = "DocumentProto";
28option java_package = "com.google.cloud.discoveryengine.v1beta";
29option objc_class_prefix = "DISCOVERYENGINE";
30option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1beta";
31option ruby_package = "Google::Cloud::DiscoveryEngine::V1beta";
32
33// Document captures all raw metadata information of items to be recommended or
34// searched.
35message Document {
36  option (google.api.resource) = {
37    type: "discoveryengine.googleapis.com/Document"
38    pattern: "projects/{project}/locations/{location}/dataStores/{data_store}/branches/{branch}/documents/{document}"
39    pattern: "projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document}"
40  };
41
42  // Unstructured data linked to this document.
43  message Content {
44    oneof content {
45      // The content represented as a stream of bytes. The maximum length is
46      // 1,000,000 bytes (1 MB / ~0.95 MiB).
47      //
48      // Note: As with all `bytes` fields, this field is represented as pure
49      // binary in Protocol Buffers and base64-encoded string in JSON. For
50      // example, `abc123!?$*&()'-=@~` should be represented as
51      // `YWJjMTIzIT8kKiYoKSctPUB+` in JSON. See
52      // https://developers.google.com/protocol-buffers/docs/proto3#json.
53      bytes raw_bytes = 2;
54
55      // The URI of the content. Only Cloud Storage URIs (e.g.
56      // `gs://bucket-name/path/to/file`) are supported. The maximum file size
57      // is 2.5 MB for text-based formats, 100 MB for other formats.
58      string uri = 3;
59    }
60
61    // The MIME type of the content. Supported types:
62    //
63    // * `application/pdf` (PDF, only native PDFs are supported for now)
64    // * `text/html` (HTML)
65    // * `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (DOCX)
66    // * `application/vnd.openxmlformats-officedocument.presentationml.presentation` (PPTX)
67    // * `text/plain` (TXT)
68    //
69    // See https://www.iana.org/assignments/media-types/media-types.xhtml.
70    string mime_type = 1;
71  }
72
73  // Data representation. One of
74  // [struct_data][google.cloud.discoveryengine.v1beta.Document.struct_data] or
75  // [json_data][google.cloud.discoveryengine.v1beta.Document.json_data] should
76  // be provided otherwise an `INVALID_ARGUMENT` error is thrown.
77  oneof data {
78    // The structured JSON data for the document. It should conform to the
79    // registered [Schema][google.cloud.discoveryengine.v1beta.Schema] or an
80    // `INVALID_ARGUMENT` error is thrown.
81    google.protobuf.Struct struct_data = 4;
82
83    // The JSON string representation of the document. It should conform to the
84    // registered [Schema][google.cloud.discoveryengine.v1beta.Schema] or an
85    // `INVALID_ARGUMENT` error is thrown.
86    string json_data = 5;
87  }
88
89  // Immutable. The full resource name of the document.
90  // Format:
91  // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document_id}`.
92  //
93  // This field must be a UTF-8 encoded string with a length limit of 1024
94  // characters.
95  string name = 1 [(google.api.field_behavior) = IMMUTABLE];
96
97  // Immutable. The identifier of the document.
98  //
99  // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034)
100  // standard with a length limit of 63 characters.
101  string id = 2 [(google.api.field_behavior) = IMMUTABLE];
102
103  // The identifier of the schema located in the same data store.
104  string schema_id = 3;
105
106  // The unstructured data linked to this document. Content must be set if this
107  // document is under a
108  // `CONTENT_REQUIRED` data store.
109  Content content = 10;
110
111  // The identifier of the parent document. Currently supports at most two level
112  // document hierarchy.
113  //
114  // Id should conform to [RFC-1034](https://tools.ietf.org/html/rfc1034)
115  // standard with a length limit of 63 characters.
116  string parent_document_id = 7;
117
118  // Output only. This field is OUTPUT_ONLY.
119  // It contains derived data that are not in the original input document.
120  google.protobuf.Struct derived_struct_data = 6
121      [(google.api.field_behavior) = OUTPUT_ONLY];
122
123  // Output only. The last time the document was indexed. If this field is set,
124  // the document could be returned in search results.
125  //
126  // This field is OUTPUT_ONLY. If this field is not populated, it means the
127  // document has never been indexed.
128  google.protobuf.Timestamp index_time = 13
129      [(google.api.field_behavior) = OUTPUT_ONLY];
130}
131