xref: /aosp_15_r20/external/googleapis/google/cloud/discoveryengine/v1beta/search_tuning_service.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.discoveryengine.v1beta;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/cloud/discoveryengine/v1beta/import_config.proto";
24import "google/longrunning/operations.proto";
25import "google/protobuf/timestamp.proto";
26import "google/rpc/status.proto";
27
28option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Beta";
29option go_package = "cloud.google.com/go/discoveryengine/apiv1beta/discoveryenginepb;discoveryenginepb";
30option java_multiple_files = true;
31option java_outer_classname = "SearchTuningServiceProto";
32option java_package = "com.google.cloud.discoveryengine.v1beta";
33option objc_class_prefix = "DISCOVERYENGINE";
34option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1beta";
35option ruby_package = "Google::Cloud::DiscoveryEngine::V1beta";
36
37// Service for search tuning.
38service SearchTuningService {
39  option (google.api.default_host) = "discoveryengine.googleapis.com";
40  option (google.api.oauth_scopes) =
41      "https://www.googleapis.com/auth/cloud-platform";
42
43  // Trains a custom model.
44  rpc TrainCustomModel(TrainCustomModelRequest)
45      returns (google.longrunning.Operation) {
46    option (google.api.http) = {
47      post: "/v1beta/{data_store=projects/*/locations/*/collections/*/dataStores/*}:trainCustomModel"
48      body: "*"
49    };
50    option (google.longrunning.operation_info) = {
51      response_type: "google.cloud.discoveryengine.v1beta.TrainCustomModelResponse"
52      metadata_type: "google.cloud.discoveryengine.v1beta.TrainCustomModelMetadata"
53    };
54  }
55}
56
57// Request message for
58// [SearchTuningService.TrainCustomModel][google.cloud.discoveryengine.v1beta.SearchTuningService.TrainCustomModel]
59// method.
60message TrainCustomModelRequest {
61  // Cloud Storage training data input.
62  message GcsTrainingInput {
63    // The Cloud Storage corpus data which could be associated in train data.
64    // The data path format is `gs://<bucket_to_data>/<jsonl_file_name>`.
65    // A newline delimited jsonl/ndjson file.
66    //
67    // For search-tuning model, each line should have the _id, title
68    // and text. Example:
69    // `{"_id": "doc1", title: "relevant doc", "text": "relevant text"}`
70    string corpus_data_path = 1;
71
72    // The gcs query data which could be associated in train data.
73    // The data path format is `gs://<bucket_to_data>/<jsonl_file_name>`.
74    // A newline delimited jsonl/ndjson file.
75    //
76    // For search-tuning model, each line should have the _id
77    // and text. Example: {"_id": "query1",  "text": "example query"}
78    string query_data_path = 2;
79
80    // Cloud Storage training data path whose format should be
81    // `gs://<bucket_to_data>/<tsv_file_name>`. The file should be in tsv
82    // format. Each line should have the doc_id and query_id and score (number).
83    //
84    // For search-tuning model, it should have the query-id corpus-id
85    // score as tsv file header. The score should be a number in `[0, inf+)`.
86    // The larger the number is, the more relevant the pair is. Example:
87    //
88    // * `query-id\tcorpus-id\tscore`
89    // * `query1\tdoc1\t1`
90    string train_data_path = 3;
91
92    // Cloud Storage test data. Same format as train_data_path. If not provided,
93    // a random 80/20 train/test split will be performed on train_data_path.
94    string test_data_path = 4;
95  }
96
97  // Model training input.
98  oneof training_input {
99    // Cloud Storage training input.
100    GcsTrainingInput gcs_training_input = 2;
101  }
102
103  // Required. The resource name of the Data Store, such as
104  // `projects/*/locations/global/collections/default_collection/dataStores/default_data_store`.
105  // This field is used to identify the data store where to train the models.
106  string data_store = 1 [
107    (google.api.field_behavior) = REQUIRED,
108    (google.api.resource_reference) = {
109      type: "discoveryengine.googleapis.com/DataStore"
110    }
111  ];
112
113  // Model to be trained. Supported values are:
114  //
115  //  * **search-tuning**: Fine tuning the search system based on data provided.
116  string model_type = 3;
117
118  // The desired location of errors incurred during the data ingestion and
119  // training.
120  ImportErrorConfig error_config = 4;
121}
122
123// Response of the
124// [TrainCustomModelRequest][google.cloud.discoveryengine.v1beta.TrainCustomModelRequest].
125// This message is returned by the google.longrunning.Operations.response field.
126message TrainCustomModelResponse {
127  // A sample of errors encountered while processing the data.
128  repeated google.rpc.Status error_samples = 1;
129
130  // Echoes the destination for the complete errors in the request if set.
131  ImportErrorConfig error_config = 2;
132
133  // The trained model status. Possible values are:
134  //
135  //  * **bad-data**: The training data quality is bad.
136  //  * **no-improvement**: Tuning didn't improve performance. Won't deploy.
137  //  * **in-progress**: Model training job creation is in progress.
138  //  * **training**: Model is actively training.
139  //  * **evaluating**: The model is evaluating trained metrics.
140  //  * **indexing**: The model trained metrics are indexing.
141  //  * **ready**: The model is ready for serving.
142  string model_status = 3;
143
144  // The metrics of the trained model.
145  map<string, double> metrics = 4;
146}
147
148// Metadata related to the progress of the TrainCustomModel operation. This is
149// returned by the google.longrunning.Operation.metadata field.
150message TrainCustomModelMetadata {
151  // Operation create time.
152  google.protobuf.Timestamp create_time = 1;
153
154  // Operation last update time. If the operation is done, this is also the
155  // finish time.
156  google.protobuf.Timestamp update_time = 2;
157}
158