1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1beta1; 18 19import "google/api/field_behavior.proto"; 20import "google/api/resource.proto"; 21import "google/protobuf/timestamp.proto"; 22 23option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1"; 24option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb"; 25option java_multiple_files = true; 26option java_outer_classname = "FeatureViewProto"; 27option java_package = "com.google.cloud.aiplatform.v1beta1"; 28option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1"; 29option ruby_package = "Google::Cloud::AIPlatform::V1beta1"; 30 31// FeatureView is representation of values that the FeatureOnlineStore will 32// serve based on its syncConfig. 33message FeatureView { 34 option (google.api.resource) = { 35 type: "aiplatform.googleapis.com/FeatureView" 36 pattern: "projects/{project}/locations/{location}/featureOnlineStores/{feature_online_store}/featureViews/{feature_view}" 37 }; 38 39 message BigQuerySource { 40 // Required. The BigQuery view URI that will be materialized on each sync 41 // trigger based on FeatureView.SyncConfig. 42 string uri = 1 [(google.api.field_behavior) = REQUIRED]; 43 44 // Required. Columns to construct entity_id / row keys. 45 repeated string entity_id_columns = 2 46 [(google.api.field_behavior) = REQUIRED]; 47 } 48 49 // Configuration for Sync. Only one option is set. 50 message SyncConfig { 51 // Cron schedule (https://en.wikipedia.org/wiki/Cron) to launch scheduled 52 // runs. To explicitly set a timezone to the cron tab, apply a prefix in 53 // the cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}". 54 // The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone 55 // database. For example, "CRON_TZ=America/New_York 1 * * * *", or 56 // "TZ=America/New_York 1 * * * *". 57 string cron = 1; 58 } 59 60 // Deprecated. Use 61 // [IndexConfig][google.cloud.aiplatform.v1beta1.FeatureView.IndexConfig] 62 // instead. 63 message VectorSearchConfig { 64 option deprecated = true; 65 66 message BruteForceConfig {} 67 68 message TreeAHConfig { 69 // Optional. Number of embeddings on each leaf node. The default value is 70 // 1000 if not set. 71 optional int64 leaf_node_embedding_count = 1 72 [(google.api.field_behavior) = OPTIONAL]; 73 } 74 75 enum DistanceMeasureType { 76 // Should not be set. 77 DISTANCE_MEASURE_TYPE_UNSPECIFIED = 0; 78 79 // Euclidean (L_2) Distance. 80 SQUARED_L2_DISTANCE = 1; 81 82 // Cosine Distance. Defined as 1 - cosine similarity. 83 // 84 // We strongly suggest using DOT_PRODUCT_DISTANCE + UNIT_L2_NORM instead 85 // of COSINE distance. Our algorithms have been more optimized for 86 // DOT_PRODUCT distance which, when combined with UNIT_L2_NORM, is 87 // mathematically equivalent to COSINE distance and results in the same 88 // ranking. 89 COSINE_DISTANCE = 2; 90 91 // Dot Product Distance. Defined as a negative of the dot product. 92 DOT_PRODUCT_DISTANCE = 3; 93 } 94 95 // The configuration with regard to the algorithms used for efficient 96 // search. 97 oneof algorithm_config { 98 // Optional. Configuration options for the tree-AH algorithm (Shallow tree 99 // + Asymmetric Hashing). Please refer to this paper for more details: 100 // https://arxiv.org/abs/1908.10396 101 TreeAHConfig tree_ah_config = 8 [(google.api.field_behavior) = OPTIONAL]; 102 103 // Optional. Configuration options for using brute force search, which 104 // simply implements the standard linear search in the database for each 105 // query. It is primarily meant for benchmarking and to generate the 106 // ground truth for approximate search. 107 BruteForceConfig brute_force_config = 9 108 [(google.api.field_behavior) = OPTIONAL]; 109 } 110 111 // Optional. Column of embedding. This column contains the source data to 112 // create index for vector search. embedding_column must be set when using 113 // vector search. 114 string embedding_column = 3 [(google.api.field_behavior) = OPTIONAL]; 115 116 // Optional. Columns of features that're used to filter vector search 117 // results. 118 repeated string filter_columns = 4 [(google.api.field_behavior) = OPTIONAL]; 119 120 // Optional. Column of crowding. This column contains crowding attribute 121 // which is a constraint on a neighbor list produced by 122 // [FeatureOnlineStoreService.SearchNearestEntities][google.cloud.aiplatform.v1beta1.FeatureOnlineStoreService.SearchNearestEntities] 123 // to diversify search results. If 124 // [NearestNeighborQuery.per_crowding_attribute_neighbor_count][google.cloud.aiplatform.v1beta1.NearestNeighborQuery.per_crowding_attribute_neighbor_count] 125 // is set to K in 126 // [SearchNearestEntitiesRequest][google.cloud.aiplatform.v1beta1.SearchNearestEntitiesRequest], 127 // it's guaranteed that no more than K entities of the same crowding 128 // attribute are returned in the response. 129 string crowding_column = 5 [(google.api.field_behavior) = OPTIONAL]; 130 131 // Optional. The number of dimensions of the input embedding. 132 optional int32 embedding_dimension = 6 133 [(google.api.field_behavior) = OPTIONAL]; 134 135 // Optional. The distance measure used in nearest neighbor search. 136 DistanceMeasureType distance_measure_type = 7 137 [(google.api.field_behavior) = OPTIONAL]; 138 } 139 140 // Configuration for vector indexing. 141 message IndexConfig { 142 // Configuration options for using brute force search. 143 message BruteForceConfig {} 144 145 // Configuration options for the tree-AH algorithm. 146 message TreeAHConfig { 147 // Optional. Number of embeddings on each leaf node. The default value is 148 // 1000 if not set. 149 optional int64 leaf_node_embedding_count = 1 150 [(google.api.field_behavior) = OPTIONAL]; 151 } 152 153 // The distance measure used in nearest neighbor search. 154 enum DistanceMeasureType { 155 // Should not be set. 156 DISTANCE_MEASURE_TYPE_UNSPECIFIED = 0; 157 158 // Euclidean (L_2) Distance. 159 SQUARED_L2_DISTANCE = 1; 160 161 // Cosine Distance. Defined as 1 - cosine similarity. 162 // 163 // We strongly suggest using DOT_PRODUCT_DISTANCE + UNIT_L2_NORM instead 164 // of COSINE distance. Our algorithms have been more optimized for 165 // DOT_PRODUCT distance which, when combined with UNIT_L2_NORM, is 166 // mathematically equivalent to COSINE distance and results in the same 167 // ranking. 168 COSINE_DISTANCE = 2; 169 170 // Dot Product Distance. Defined as a negative of the dot product. 171 DOT_PRODUCT_DISTANCE = 3; 172 } 173 174 // The configuration with regard to the algorithms used for efficient 175 // search. 176 oneof algorithm_config { 177 // Optional. Configuration options for the tree-AH algorithm (Shallow tree 178 // + Asymmetric Hashing). Please refer to this paper for more details: 179 // https://arxiv.org/abs/1908.10396 180 TreeAHConfig tree_ah_config = 6 [(google.api.field_behavior) = OPTIONAL]; 181 182 // Optional. Configuration options for using brute force search, which 183 // simply implements the standard linear search in the database for each 184 // query. It is primarily meant for benchmarking and to generate the 185 // ground truth for approximate search. 186 BruteForceConfig brute_force_config = 7 187 [(google.api.field_behavior) = OPTIONAL]; 188 } 189 190 // Optional. Column of embedding. This column contains the source data to 191 // create index for vector search. embedding_column must be set when using 192 // vector search. 193 string embedding_column = 1 [(google.api.field_behavior) = OPTIONAL]; 194 195 // Optional. Columns of features that're used to filter vector search 196 // results. 197 repeated string filter_columns = 2 [(google.api.field_behavior) = OPTIONAL]; 198 199 // Optional. Column of crowding. This column contains crowding attribute 200 // which is a constraint on a neighbor list produced by 201 // [FeatureOnlineStoreService.SearchNearestEntities][google.cloud.aiplatform.v1beta1.FeatureOnlineStoreService.SearchNearestEntities] 202 // to diversify search results. If 203 // [NearestNeighborQuery.per_crowding_attribute_neighbor_count][google.cloud.aiplatform.v1beta1.NearestNeighborQuery.per_crowding_attribute_neighbor_count] 204 // is set to K in 205 // [SearchNearestEntitiesRequest][google.cloud.aiplatform.v1beta1.SearchNearestEntitiesRequest], 206 // it's guaranteed that no more than K entities of the same crowding 207 // attribute are returned in the response. 208 string crowding_column = 3 [(google.api.field_behavior) = OPTIONAL]; 209 210 // Optional. The number of dimensions of the input embedding. 211 optional int32 embedding_dimension = 4 212 [(google.api.field_behavior) = OPTIONAL]; 213 214 // Optional. The distance measure used in nearest neighbor search. 215 DistanceMeasureType distance_measure_type = 5 216 [(google.api.field_behavior) = OPTIONAL]; 217 } 218 219 // A Feature Registry source for features that need to be synced to Online 220 // Store. 221 message FeatureRegistrySource { 222 // Features belonging to a single feature group that will be 223 // synced to Online Store. 224 message FeatureGroup { 225 // Required. Identifier of the feature group. 226 string feature_group_id = 1 [(google.api.field_behavior) = REQUIRED]; 227 228 // Required. Identifiers of features under the feature group. 229 repeated string feature_ids = 2 [(google.api.field_behavior) = REQUIRED]; 230 } 231 232 // Required. List of features that need to be synced to Online Store. 233 repeated FeatureGroup feature_groups = 1 234 [(google.api.field_behavior) = REQUIRED]; 235 236 // Optional. The project number of the parent project of the Feature Groups. 237 optional int64 project_number = 2 [(google.api.field_behavior) = OPTIONAL]; 238 } 239 240 // Service agent type used during data sync. 241 enum ServiceAgentType { 242 // By default, the project-level Vertex AI Service Agent is enabled. 243 SERVICE_AGENT_TYPE_UNSPECIFIED = 0; 244 245 // Indicates the project-level Vertex AI Service Agent 246 // (https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) 247 // will be used during sync jobs. 248 SERVICE_AGENT_TYPE_PROJECT = 1; 249 250 // Enable a FeatureView service account to be created by Vertex AI and 251 // output in the field `service_account_email`. This service account will 252 // be used to read from the source BigQuery table during sync. 253 SERVICE_AGENT_TYPE_FEATURE_VIEW = 2; 254 } 255 256 oneof source { 257 // Optional. Configures how data is supposed to be extracted from a BigQuery 258 // source to be loaded onto the FeatureOnlineStore. 259 BigQuerySource big_query_source = 6 260 [(google.api.field_behavior) = OPTIONAL]; 261 262 // Optional. Configures the features from a Feature Registry source that 263 // need to be loaded onto the FeatureOnlineStore. 264 FeatureRegistrySource feature_registry_source = 9 265 [(google.api.field_behavior) = OPTIONAL]; 266 } 267 268 // Identifier. Name of the FeatureView. Format: 269 // `projects/{project}/locations/{location}/featureOnlineStores/{feature_online_store}/featureViews/{feature_view}` 270 string name = 1 [(google.api.field_behavior) = IDENTIFIER]; 271 272 // Output only. Timestamp when this FeatureView was created. 273 google.protobuf.Timestamp create_time = 2 274 [(google.api.field_behavior) = OUTPUT_ONLY]; 275 276 // Output only. Timestamp when this FeatureView was last updated. 277 google.protobuf.Timestamp update_time = 3 278 [(google.api.field_behavior) = OUTPUT_ONLY]; 279 280 // Optional. Used to perform consistent read-modify-write updates. If not set, 281 // a blind "overwrite" update happens. 282 string etag = 4 [(google.api.field_behavior) = OPTIONAL]; 283 284 // Optional. The labels with user-defined metadata to organize your 285 // FeatureViews. 286 // 287 // Label keys and values can be no longer than 64 characters 288 // (Unicode codepoints), can only contain lowercase letters, numeric 289 // characters, underscores and dashes. International characters are allowed. 290 // 291 // See https://goo.gl/xmQnxf for more information on and examples of labels. 292 // No more than 64 user labels can be associated with one 293 // FeatureOnlineStore(System labels are excluded)." System reserved label keys 294 // are prefixed with "aiplatform.googleapis.com/" and are immutable. 295 map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL]; 296 297 // Configures when data is to be synced/updated for this FeatureView. At the 298 // end of the sync the latest featureValues for each entityId of this 299 // FeatureView are made ready for online serving. 300 SyncConfig sync_config = 7; 301 302 // Optional. Deprecated: please use 303 // [FeatureView.index_config][google.cloud.aiplatform.v1beta1.FeatureView.index_config] 304 // instead. 305 VectorSearchConfig vector_search_config = 8 306 [deprecated = true, (google.api.field_behavior) = OPTIONAL]; 307 308 // Optional. Configuration for index preparation for vector search. It 309 // contains the required configurations to create an index from source data, 310 // so that approximate nearest neighbor (a.k.a ANN) algorithms search can be 311 // performed during online serving. 312 IndexConfig index_config = 15 [(google.api.field_behavior) = OPTIONAL]; 313 314 // Optional. Service agent type used during data sync. By default, the Vertex 315 // AI Service Agent is used. When using an IAM Policy to isolate this 316 // FeatureView within a project, a separate service account should be 317 // provisioned by setting this field to `SERVICE_AGENT_TYPE_FEATURE_VIEW`. 318 // This will generate a separate service account to access the BigQuery source 319 // table. 320 ServiceAgentType service_agent_type = 14 321 [(google.api.field_behavior) = OPTIONAL]; 322 323 // Output only. A Service Account unique to this FeatureView. The role 324 // bigquery.dataViewer should be granted to this service account to allow 325 // Vertex AI Feature Store to sync data to the online store. 326 string service_account_email = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; 327} 328