1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.discoveryengine.v1beta; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/cloud/discoveryengine/v1beta/import_config.proto"; 24import "google/longrunning/operations.proto"; 25import "google/protobuf/timestamp.proto"; 26import "google/rpc/status.proto"; 27 28option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Beta"; 29option go_package = "cloud.google.com/go/discoveryengine/apiv1beta/discoveryenginepb;discoveryenginepb"; 30option java_multiple_files = true; 31option java_outer_classname = "SearchTuningServiceProto"; 32option java_package = "com.google.cloud.discoveryengine.v1beta"; 33option objc_class_prefix = "DISCOVERYENGINE"; 34option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1beta"; 35option ruby_package = "Google::Cloud::DiscoveryEngine::V1beta"; 36 37// Service for search tuning. 38service SearchTuningService { 39 option (google.api.default_host) = "discoveryengine.googleapis.com"; 40 option (google.api.oauth_scopes) = 41 "https://www.googleapis.com/auth/cloud-platform"; 42 43 // Trains a custom model. 44 rpc TrainCustomModel(TrainCustomModelRequest) 45 returns (google.longrunning.Operation) { 46 option (google.api.http) = { 47 post: "/v1beta/{data_store=projects/*/locations/*/collections/*/dataStores/*}:trainCustomModel" 48 body: "*" 49 }; 50 option (google.longrunning.operation_info) = { 51 response_type: "google.cloud.discoveryengine.v1beta.TrainCustomModelResponse" 52 metadata_type: "google.cloud.discoveryengine.v1beta.TrainCustomModelMetadata" 53 }; 54 } 55} 56 57// Request message for 58// [SearchTuningService.TrainCustomModel][google.cloud.discoveryengine.v1beta.SearchTuningService.TrainCustomModel] 59// method. 60message TrainCustomModelRequest { 61 // Cloud Storage training data input. 62 message GcsTrainingInput { 63 // The Cloud Storage corpus data which could be associated in train data. 64 // The data path format is `gs://<bucket_to_data>/<jsonl_file_name>`. 65 // A newline delimited jsonl/ndjson file. 66 // 67 // For search-tuning model, each line should have the _id, title 68 // and text. Example: 69 // `{"_id": "doc1", title: "relevant doc", "text": "relevant text"}` 70 string corpus_data_path = 1; 71 72 // The gcs query data which could be associated in train data. 73 // The data path format is `gs://<bucket_to_data>/<jsonl_file_name>`. 74 // A newline delimited jsonl/ndjson file. 75 // 76 // For search-tuning model, each line should have the _id 77 // and text. Example: {"_id": "query1", "text": "example query"} 78 string query_data_path = 2; 79 80 // Cloud Storage training data path whose format should be 81 // `gs://<bucket_to_data>/<tsv_file_name>`. The file should be in tsv 82 // format. Each line should have the doc_id and query_id and score (number). 83 // 84 // For search-tuning model, it should have the query-id corpus-id 85 // score as tsv file header. The score should be a number in `[0, inf+)`. 86 // The larger the number is, the more relevant the pair is. Example: 87 // 88 // * `query-id\tcorpus-id\tscore` 89 // * `query1\tdoc1\t1` 90 string train_data_path = 3; 91 92 // Cloud Storage test data. Same format as train_data_path. If not provided, 93 // a random 80/20 train/test split will be performed on train_data_path. 94 string test_data_path = 4; 95 } 96 97 // Model training input. 98 oneof training_input { 99 // Cloud Storage training input. 100 GcsTrainingInput gcs_training_input = 2; 101 } 102 103 // Required. The resource name of the Data Store, such as 104 // `projects/*/locations/global/collections/default_collection/dataStores/default_data_store`. 105 // This field is used to identify the data store where to train the models. 106 string data_store = 1 [ 107 (google.api.field_behavior) = REQUIRED, 108 (google.api.resource_reference) = { 109 type: "discoveryengine.googleapis.com/DataStore" 110 } 111 ]; 112 113 // Model to be trained. Supported values are: 114 // 115 // * **search-tuning**: Fine tuning the search system based on data provided. 116 string model_type = 3; 117 118 // The desired location of errors incurred during the data ingestion and 119 // training. 120 ImportErrorConfig error_config = 4; 121} 122 123// Response of the 124// [TrainCustomModelRequest][google.cloud.discoveryengine.v1beta.TrainCustomModelRequest]. 125// This message is returned by the google.longrunning.Operations.response field. 126message TrainCustomModelResponse { 127 // A sample of errors encountered while processing the data. 128 repeated google.rpc.Status error_samples = 1; 129 130 // Echoes the destination for the complete errors in the request if set. 131 ImportErrorConfig error_config = 2; 132 133 // The trained model status. Possible values are: 134 // 135 // * **bad-data**: The training data quality is bad. 136 // * **no-improvement**: Tuning didn't improve performance. Won't deploy. 137 // * **in-progress**: Model training job creation is in progress. 138 // * **training**: Model is actively training. 139 // * **evaluating**: The model is evaluating trained metrics. 140 // * **indexing**: The model trained metrics are indexing. 141 // * **ready**: The model is ready for serving. 142 string model_status = 3; 143 144 // The metrics of the trained model. 145 map<string, double> metrics = 4; 146} 147 148// Metadata related to the progress of the TrainCustomModel operation. This is 149// returned by the google.longrunning.Operation.metadata field. 150message TrainCustomModelMetadata { 151 // Operation create time. 152 google.protobuf.Timestamp create_time = 1; 153 154 // Operation last update time. If the operation is done, this is also the 155 // finish time. 156 google.protobuf.Timestamp update_time = 2; 157} 158