xref: /aosp_15_r20/external/googleapis/google/cloud/aiplatform/v1beta1/feature_view.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.aiplatform.v1beta1;
18
19import "google/api/field_behavior.proto";
20import "google/api/resource.proto";
21import "google/protobuf/timestamp.proto";
22
23option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
24option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb";
25option java_multiple_files = true;
26option java_outer_classname = "FeatureViewProto";
27option java_package = "com.google.cloud.aiplatform.v1beta1";
28option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
29option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
30
31// FeatureView is representation of values that the FeatureOnlineStore will
32// serve based on its syncConfig.
33message FeatureView {
34  option (google.api.resource) = {
35    type: "aiplatform.googleapis.com/FeatureView"
36    pattern: "projects/{project}/locations/{location}/featureOnlineStores/{feature_online_store}/featureViews/{feature_view}"
37  };
38
39  message BigQuerySource {
40    // Required. The BigQuery view URI that will be materialized on each sync
41    // trigger based on FeatureView.SyncConfig.
42    string uri = 1 [(google.api.field_behavior) = REQUIRED];
43
44    // Required. Columns to construct entity_id / row keys.
45    repeated string entity_id_columns = 2
46        [(google.api.field_behavior) = REQUIRED];
47  }
48
49  // Configuration for Sync. Only one option is set.
50  message SyncConfig {
51    // Cron schedule (https://en.wikipedia.org/wiki/Cron) to launch scheduled
52    // runs. To explicitly set a timezone to the cron tab, apply a prefix in
53    // the cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}".
54    // The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone
55    // database. For example, "CRON_TZ=America/New_York 1 * * * *", or
56    // "TZ=America/New_York 1 * * * *".
57    string cron = 1;
58  }
59
60  // Deprecated. Use
61  // [IndexConfig][google.cloud.aiplatform.v1beta1.FeatureView.IndexConfig]
62  // instead.
63  message VectorSearchConfig {
64    option deprecated = true;
65
66    message BruteForceConfig {}
67
68    message TreeAHConfig {
69      // Optional. Number of embeddings on each leaf node. The default value is
70      // 1000 if not set.
71      optional int64 leaf_node_embedding_count = 1
72          [(google.api.field_behavior) = OPTIONAL];
73    }
74
75    enum DistanceMeasureType {
76      // Should not be set.
77      DISTANCE_MEASURE_TYPE_UNSPECIFIED = 0;
78
79      // Euclidean (L_2) Distance.
80      SQUARED_L2_DISTANCE = 1;
81
82      // Cosine Distance. Defined as 1 - cosine similarity.
83      //
84      // We strongly suggest using DOT_PRODUCT_DISTANCE + UNIT_L2_NORM instead
85      // of COSINE distance. Our algorithms have been more optimized for
86      // DOT_PRODUCT distance which, when combined with UNIT_L2_NORM, is
87      // mathematically equivalent to COSINE distance and results in the same
88      // ranking.
89      COSINE_DISTANCE = 2;
90
91      // Dot Product Distance. Defined as a negative of the dot product.
92      DOT_PRODUCT_DISTANCE = 3;
93    }
94
95    // The configuration with regard to the algorithms used for efficient
96    // search.
97    oneof algorithm_config {
98      // Optional. Configuration options for the tree-AH algorithm (Shallow tree
99      // + Asymmetric Hashing). Please refer to this paper for more details:
100      // https://arxiv.org/abs/1908.10396
101      TreeAHConfig tree_ah_config = 8 [(google.api.field_behavior) = OPTIONAL];
102
103      // Optional. Configuration options for using brute force search, which
104      // simply implements the standard linear search in the database for each
105      // query. It is primarily meant for benchmarking and to generate the
106      // ground truth for approximate search.
107      BruteForceConfig brute_force_config = 9
108          [(google.api.field_behavior) = OPTIONAL];
109    }
110
111    // Optional. Column of embedding. This column contains the source data to
112    // create index for vector search. embedding_column must be set when using
113    // vector search.
114    string embedding_column = 3 [(google.api.field_behavior) = OPTIONAL];
115
116    // Optional. Columns of features that're used to filter vector search
117    // results.
118    repeated string filter_columns = 4 [(google.api.field_behavior) = OPTIONAL];
119
120    // Optional. Column of crowding. This column contains crowding attribute
121    // which is a constraint on a neighbor list produced by
122    // [FeatureOnlineStoreService.SearchNearestEntities][google.cloud.aiplatform.v1beta1.FeatureOnlineStoreService.SearchNearestEntities]
123    // to diversify search results. If
124    // [NearestNeighborQuery.per_crowding_attribute_neighbor_count][google.cloud.aiplatform.v1beta1.NearestNeighborQuery.per_crowding_attribute_neighbor_count]
125    // is set to K in
126    // [SearchNearestEntitiesRequest][google.cloud.aiplatform.v1beta1.SearchNearestEntitiesRequest],
127    // it's guaranteed that no more than K entities of the same crowding
128    // attribute are returned in the response.
129    string crowding_column = 5 [(google.api.field_behavior) = OPTIONAL];
130
131    // Optional. The number of dimensions of the input embedding.
132    optional int32 embedding_dimension = 6
133        [(google.api.field_behavior) = OPTIONAL];
134
135    // Optional. The distance measure used in nearest neighbor search.
136    DistanceMeasureType distance_measure_type = 7
137        [(google.api.field_behavior) = OPTIONAL];
138  }
139
140  // Configuration for vector indexing.
141  message IndexConfig {
142    // Configuration options for using brute force search.
143    message BruteForceConfig {}
144
145    // Configuration options for the tree-AH algorithm.
146    message TreeAHConfig {
147      // Optional. Number of embeddings on each leaf node. The default value is
148      // 1000 if not set.
149      optional int64 leaf_node_embedding_count = 1
150          [(google.api.field_behavior) = OPTIONAL];
151    }
152
153    // The distance measure used in nearest neighbor search.
154    enum DistanceMeasureType {
155      // Should not be set.
156      DISTANCE_MEASURE_TYPE_UNSPECIFIED = 0;
157
158      // Euclidean (L_2) Distance.
159      SQUARED_L2_DISTANCE = 1;
160
161      // Cosine Distance. Defined as 1 - cosine similarity.
162      //
163      // We strongly suggest using DOT_PRODUCT_DISTANCE + UNIT_L2_NORM instead
164      // of COSINE distance. Our algorithms have been more optimized for
165      // DOT_PRODUCT distance which, when combined with UNIT_L2_NORM, is
166      // mathematically equivalent to COSINE distance and results in the same
167      // ranking.
168      COSINE_DISTANCE = 2;
169
170      // Dot Product Distance. Defined as a negative of the dot product.
171      DOT_PRODUCT_DISTANCE = 3;
172    }
173
174    // The configuration with regard to the algorithms used for efficient
175    // search.
176    oneof algorithm_config {
177      // Optional. Configuration options for the tree-AH algorithm (Shallow tree
178      // + Asymmetric Hashing). Please refer to this paper for more details:
179      // https://arxiv.org/abs/1908.10396
180      TreeAHConfig tree_ah_config = 6 [(google.api.field_behavior) = OPTIONAL];
181
182      // Optional. Configuration options for using brute force search, which
183      // simply implements the standard linear search in the database for each
184      // query. It is primarily meant for benchmarking and to generate the
185      // ground truth for approximate search.
186      BruteForceConfig brute_force_config = 7
187          [(google.api.field_behavior) = OPTIONAL];
188    }
189
190    // Optional. Column of embedding. This column contains the source data to
191    // create index for vector search. embedding_column must be set when using
192    // vector search.
193    string embedding_column = 1 [(google.api.field_behavior) = OPTIONAL];
194
195    // Optional. Columns of features that're used to filter vector search
196    // results.
197    repeated string filter_columns = 2 [(google.api.field_behavior) = OPTIONAL];
198
199    // Optional. Column of crowding. This column contains crowding attribute
200    // which is a constraint on a neighbor list produced by
201    // [FeatureOnlineStoreService.SearchNearestEntities][google.cloud.aiplatform.v1beta1.FeatureOnlineStoreService.SearchNearestEntities]
202    // to diversify search results. If
203    // [NearestNeighborQuery.per_crowding_attribute_neighbor_count][google.cloud.aiplatform.v1beta1.NearestNeighborQuery.per_crowding_attribute_neighbor_count]
204    // is set to K in
205    // [SearchNearestEntitiesRequest][google.cloud.aiplatform.v1beta1.SearchNearestEntitiesRequest],
206    // it's guaranteed that no more than K entities of the same crowding
207    // attribute are returned in the response.
208    string crowding_column = 3 [(google.api.field_behavior) = OPTIONAL];
209
210    // Optional. The number of dimensions of the input embedding.
211    optional int32 embedding_dimension = 4
212        [(google.api.field_behavior) = OPTIONAL];
213
214    // Optional. The distance measure used in nearest neighbor search.
215    DistanceMeasureType distance_measure_type = 5
216        [(google.api.field_behavior) = OPTIONAL];
217  }
218
219  // A Feature Registry source for features that need to be synced to Online
220  // Store.
221  message FeatureRegistrySource {
222    // Features belonging to a single feature group that will be
223    // synced to Online Store.
224    message FeatureGroup {
225      // Required. Identifier of the feature group.
226      string feature_group_id = 1 [(google.api.field_behavior) = REQUIRED];
227
228      // Required. Identifiers of features under the feature group.
229      repeated string feature_ids = 2 [(google.api.field_behavior) = REQUIRED];
230    }
231
232    // Required. List of features that need to be synced to Online Store.
233    repeated FeatureGroup feature_groups = 1
234        [(google.api.field_behavior) = REQUIRED];
235
236    // Optional. The project number of the parent project of the Feature Groups.
237    optional int64 project_number = 2 [(google.api.field_behavior) = OPTIONAL];
238  }
239
240  // Service agent type used during data sync.
241  enum ServiceAgentType {
242    // By default, the project-level Vertex AI Service Agent is enabled.
243    SERVICE_AGENT_TYPE_UNSPECIFIED = 0;
244
245    // Indicates the project-level Vertex AI Service Agent
246    // (https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents)
247    // will be used during sync jobs.
248    SERVICE_AGENT_TYPE_PROJECT = 1;
249
250    // Enable a FeatureView service account to be created by Vertex AI and
251    // output in the field `service_account_email`. This service account will
252    // be used to read from the source BigQuery table during sync.
253    SERVICE_AGENT_TYPE_FEATURE_VIEW = 2;
254  }
255
256  oneof source {
257    // Optional. Configures how data is supposed to be extracted from a BigQuery
258    // source to be loaded onto the FeatureOnlineStore.
259    BigQuerySource big_query_source = 6
260        [(google.api.field_behavior) = OPTIONAL];
261
262    // Optional. Configures the features from a Feature Registry source that
263    // need to be loaded onto the FeatureOnlineStore.
264    FeatureRegistrySource feature_registry_source = 9
265        [(google.api.field_behavior) = OPTIONAL];
266  }
267
268  // Identifier. Name of the FeatureView. Format:
269  // `projects/{project}/locations/{location}/featureOnlineStores/{feature_online_store}/featureViews/{feature_view}`
270  string name = 1 [(google.api.field_behavior) = IDENTIFIER];
271
272  // Output only. Timestamp when this FeatureView was created.
273  google.protobuf.Timestamp create_time = 2
274      [(google.api.field_behavior) = OUTPUT_ONLY];
275
276  // Output only. Timestamp when this FeatureView was last updated.
277  google.protobuf.Timestamp update_time = 3
278      [(google.api.field_behavior) = OUTPUT_ONLY];
279
280  // Optional. Used to perform consistent read-modify-write updates. If not set,
281  // a blind "overwrite" update happens.
282  string etag = 4 [(google.api.field_behavior) = OPTIONAL];
283
284  // Optional. The labels with user-defined metadata to organize your
285  // FeatureViews.
286  //
287  // Label keys and values can be no longer than 64 characters
288  // (Unicode codepoints), can only contain lowercase letters, numeric
289  // characters, underscores and dashes. International characters are allowed.
290  //
291  // See https://goo.gl/xmQnxf for more information on and examples of labels.
292  // No more than 64 user labels can be associated with one
293  // FeatureOnlineStore(System labels are excluded)." System reserved label keys
294  // are prefixed with "aiplatform.googleapis.com/" and are immutable.
295  map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL];
296
297  // Configures when data is to be synced/updated for this FeatureView. At the
298  // end of the sync the latest featureValues for each entityId of this
299  // FeatureView are made ready for online serving.
300  SyncConfig sync_config = 7;
301
302  // Optional. Deprecated: please use
303  // [FeatureView.index_config][google.cloud.aiplatform.v1beta1.FeatureView.index_config]
304  // instead.
305  VectorSearchConfig vector_search_config = 8
306      [deprecated = true, (google.api.field_behavior) = OPTIONAL];
307
308  // Optional. Configuration for index preparation for vector search. It
309  // contains the required configurations to create an index from source data,
310  // so that approximate nearest neighbor (a.k.a ANN) algorithms search can be
311  // performed during online serving.
312  IndexConfig index_config = 15 [(google.api.field_behavior) = OPTIONAL];
313
314  // Optional. Service agent type used during data sync. By default, the Vertex
315  // AI Service Agent is used. When using an IAM Policy to isolate this
316  // FeatureView within a project, a separate service account should be
317  // provisioned by setting this field to `SERVICE_AGENT_TYPE_FEATURE_VIEW`.
318  // This will generate a separate service account to access the BigQuery source
319  // table.
320  ServiceAgentType service_agent_type = 14
321      [(google.api.field_behavior) = OPTIONAL];
322
323  // Output only. A Service Account unique to this FeatureView. The role
324  // bigquery.dataViewer should be granted to this service account to allow
325  // Vertex AI Feature Store to sync data to the online store.
326  string service_account_email = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
327}
328