xref: /aosp_15_r20/external/googleapis/google/cloud/aiplatform/v1/tuning_job.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2024 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.aiplatform.v1;
18
19import "google/api/field_behavior.proto";
20import "google/api/resource.proto";
21import "google/cloud/aiplatform/v1/content.proto";
22import "google/cloud/aiplatform/v1/job_state.proto";
23import "google/protobuf/timestamp.proto";
24import "google/rpc/status.proto";
25
26option csharp_namespace = "Google.Cloud.AIPlatform.V1";
27option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
28option java_multiple_files = true;
29option java_outer_classname = "TuningJobProto";
30option java_package = "com.google.cloud.aiplatform.v1";
31option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
32option ruby_package = "Google::Cloud::AIPlatform::V1";
33
34// Represents a TuningJob that runs with Google owned models.
35message TuningJob {
36  option (google.api.resource) = {
37    type: "aiplatform.googleapis.com/TuningJob"
38    pattern: "projects/{project}/locations/{location}/tuningJobs/{tuning_job}"
39    plural: "tuningJobs"
40    singular: "tuningJob"
41  };
42
43  oneof source_model {
44    // Model name for tuning, e.g., "gemini-1.0-pro-002".
45    string base_model = 4;
46  }
47
48  oneof tuning_spec {
49    // Tuning Spec for Supervised Fine Tuning.
50    SupervisedTuningSpec supervised_tuning_spec = 5;
51  }
52
53  // Output only. Identifier. Resource name of a TuningJob. Format:
54  // `projects/{project}/locations/{location}/tuningJobs/{tuning_job}`
55  string name = 1 [
56    (google.api.field_behavior) = IDENTIFIER,
57    (google.api.field_behavior) = OUTPUT_ONLY
58  ];
59
60  // Optional. The display name of the
61  // [TunedModel][google.cloud.aiplatform.v1.Model]. The name can be up to
62  // 128 characters long and can consist of any UTF-8 characters.
63  string tuned_model_display_name = 2 [(google.api.field_behavior) = OPTIONAL];
64
65  // Optional. The description of the
66  // [TuningJob][google.cloud.aiplatform.v1.TuningJob].
67  string description = 3 [(google.api.field_behavior) = OPTIONAL];
68
69  // Output only. The detailed state of the job.
70  JobState state = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
71
72  // Output only. Time when the
73  // [TuningJob][google.cloud.aiplatform.v1.TuningJob] was created.
74  google.protobuf.Timestamp create_time = 7
75      [(google.api.field_behavior) = OUTPUT_ONLY];
76
77  // Output only. Time when the
78  // [TuningJob][google.cloud.aiplatform.v1.TuningJob] for the first time
79  // entered the `JOB_STATE_RUNNING` state.
80  google.protobuf.Timestamp start_time = 8
81      [(google.api.field_behavior) = OUTPUT_ONLY];
82
83  // Output only. Time when the TuningJob entered any of the following
84  // [JobStates][google.cloud.aiplatform.v1.JobState]: `JOB_STATE_SUCCEEDED`,
85  // `JOB_STATE_FAILED`, `JOB_STATE_CANCELLED`, `JOB_STATE_EXPIRED`.
86  google.protobuf.Timestamp end_time = 9
87      [(google.api.field_behavior) = OUTPUT_ONLY];
88
89  // Output only. Time when the
90  // [TuningJob][google.cloud.aiplatform.v1.TuningJob] was most recently
91  // updated.
92  google.protobuf.Timestamp update_time = 10
93      [(google.api.field_behavior) = OUTPUT_ONLY];
94
95  // Output only. Only populated when job's state is `JOB_STATE_FAILED` or
96  // `JOB_STATE_CANCELLED`.
97  google.rpc.Status error = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
98
99  // Optional. The labels with user-defined metadata to organize
100  // [TuningJob][google.cloud.aiplatform.v1.TuningJob] and generated resources
101  // such as [Model][google.cloud.aiplatform.v1.Model] and
102  // [Endpoint][google.cloud.aiplatform.v1.Endpoint].
103  //
104  // Label keys and values can be no longer than 64 characters
105  // (Unicode codepoints), can only contain lowercase letters, numeric
106  // characters, underscores and dashes. International characters are allowed.
107  //
108  // See https://goo.gl/xmQnxf for more information and examples of labels.
109  map<string, string> labels = 12 [(google.api.field_behavior) = OPTIONAL];
110
111  // Output only. The Experiment associated with this
112  // [TuningJob][google.cloud.aiplatform.v1.TuningJob].
113  string experiment = 13 [
114    (google.api.field_behavior) = OUTPUT_ONLY,
115    (google.api.resource_reference) = {
116      type: "aiplatform.googleapis.com/Context"
117    }
118  ];
119
120  // Output only. The tuned model resources assiociated with this
121  // [TuningJob][google.cloud.aiplatform.v1.TuningJob].
122  TunedModel tuned_model = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
123
124  // Output only. The tuning data statistics associated with this
125  // [TuningJob][google.cloud.aiplatform.v1.TuningJob].
126  TuningDataStats tuning_data_stats = 15
127      [(google.api.field_behavior) = OUTPUT_ONLY];
128}
129
130// The Model Registry Model and Online Prediction Endpoint assiociated with
131// this [TuningJob][google.cloud.aiplatform.v1.TuningJob].
132message TunedModel {
133  // Output only. The resource name of the TunedModel. Format:
134  // `projects/{project}/locations/{location}/models/{model}`.
135  string model = 1 [
136    (google.api.field_behavior) = OUTPUT_ONLY,
137    (google.api.resource_reference) = {
138      type: "aiplatform.googleapis.com/Model"
139    }
140  ];
141
142  // Output only. A resource name of an Endpoint. Format:
143  // `projects/{project}/locations/{location}/endpoints/{endpoint}`.
144  string endpoint = 2 [
145    (google.api.field_behavior) = OUTPUT_ONLY,
146    (google.api.resource_reference) = {
147      type: "aiplatform.googleapis.com/Endpoint"
148    }
149  ];
150}
151
152// Dataset distribution for Supervised Tuning.
153message SupervisedTuningDatasetDistribution {
154  // Dataset bucket used to create a histogram for the distribution given a
155  // population of values.
156  message DatasetBucket {
157    // Output only. Number of values in the bucket.
158    double count = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
159
160    // Output only. Left bound of the bucket.
161    double left = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
162
163    // Output only. Right bound of the bucket.
164    double right = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
165  }
166
167  // Output only. Sum of a given population of values.
168  int64 sum = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
169
170  // Output only. The minimum of the population values.
171  double min = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
172
173  // Output only. The maximum of the population values.
174  double max = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
175
176  // Output only. The arithmetic mean of the values in the population.
177  double mean = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
178
179  // Output only. The median of the values in the population.
180  double median = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
181
182  // Output only. The 5th percentile of the values in the population.
183  double p5 = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
184
185  // Output only. The 95th percentile of the values in the population.
186  double p95 = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
187
188  // Output only. Defines the histogram bucket.
189  repeated DatasetBucket buckets = 8
190      [(google.api.field_behavior) = OUTPUT_ONLY];
191}
192
193// Tuning data statistics for Supervised Tuning.
194message SupervisedTuningDataStats {
195  // Output only. Number of examples in the tuning dataset.
196  int64 tuning_dataset_example_count = 1
197      [(google.api.field_behavior) = OUTPUT_ONLY];
198
199  // Output only. Number of tuning characters in the tuning dataset.
200  int64 total_tuning_character_count = 2
201      [(google.api.field_behavior) = OUTPUT_ONLY];
202
203  // Output only. Number of billable characters in the tuning dataset.
204  int64 total_billable_character_count = 3
205      [(google.api.field_behavior) = OUTPUT_ONLY];
206
207  // Output only. Number of tuning steps for this Tuning Job.
208  int64 tuning_step_count = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
209
210  // Output only. Dataset distributions for the user input tokens.
211  SupervisedTuningDatasetDistribution user_input_token_distribution = 5
212      [(google.api.field_behavior) = OUTPUT_ONLY];
213
214  // Output only. Dataset distributions for the user output tokens.
215  SupervisedTuningDatasetDistribution user_output_token_distribution = 6
216      [(google.api.field_behavior) = OUTPUT_ONLY];
217
218  // Output only. Dataset distributions for the messages per example.
219  SupervisedTuningDatasetDistribution user_message_per_example_distribution = 7
220      [(google.api.field_behavior) = OUTPUT_ONLY];
221
222  // Output only. Sample user messages in the training dataset uri.
223  repeated Content user_dataset_examples = 8
224      [(google.api.field_behavior) = OUTPUT_ONLY];
225}
226
227// The tuning data statistic values for
228// [TuningJob][google.cloud.aiplatform.v1.TuningJob].
229message TuningDataStats {
230  oneof tuning_data_stats {
231    // The SFT Tuning data stats.
232    SupervisedTuningDataStats supervised_tuning_data_stats = 1;
233  }
234}
235
236// Hyperparameters for SFT.
237message SupervisedHyperParameters {
238  // Supported adapter sizes for tuning.
239  enum AdapterSize {
240    // Adapter size is unspecified.
241    ADAPTER_SIZE_UNSPECIFIED = 0;
242
243    // Adapter size 1.
244    ADAPTER_SIZE_ONE = 1;
245
246    // Adapter size 4.
247    ADAPTER_SIZE_FOUR = 2;
248
249    // Adapter size 8.
250    ADAPTER_SIZE_EIGHT = 3;
251
252    // Adapter size 16.
253    ADAPTER_SIZE_SIXTEEN = 4;
254  }
255
256  // Optional. Number of training epoches for this tuning job.
257  int64 epoch_count = 1 [(google.api.field_behavior) = OPTIONAL];
258
259  // Optional. Learning rate multiplier for tuning.
260  double learning_rate_multiplier = 2 [(google.api.field_behavior) = OPTIONAL];
261
262  // Optional. Adapter size for tuning.
263  AdapterSize adapter_size = 3 [(google.api.field_behavior) = OPTIONAL];
264}
265
266// Tuning Spec for Supervised Tuning.
267message SupervisedTuningSpec {
268  // Required. Cloud Storage path to file containing training dataset for
269  // tuning.
270  string training_dataset_uri = 1 [(google.api.field_behavior) = REQUIRED];
271
272  // Optional. Cloud Storage path to file containing validation dataset for
273  // tuning.
274  string validation_dataset_uri = 2 [(google.api.field_behavior) = OPTIONAL];
275
276  // Optional. Hyperparameters for SFT.
277  SupervisedHyperParameters hyper_parameters = 3
278      [(google.api.field_behavior) = OPTIONAL];
279}
280