xref: /aosp_15_r20/external/googleapis/google/cloud/webrisk/v1/webrisk.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.webrisk.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/longrunning/operations.proto";
24import "google/protobuf/timestamp.proto";
25
26option csharp_namespace = "Google.Cloud.WebRisk.V1";
27option go_package = "cloud.google.com/go/webrisk/apiv1/webriskpb;webriskpb";
28option java_multiple_files = true;
29option java_outer_classname = "WebRiskProto";
30option java_package = "com.google.webrisk.v1";
31option objc_class_prefix = "GCWR";
32option php_namespace = "Google\\Cloud\\WebRisk\\V1";
33option ruby_package = "Google::Cloud::WebRisk::V1";
34
35// Web Risk API defines an interface to detect malicious URLs on your
36// website and in client applications.
37service WebRiskService {
38  option (google.api.default_host) = "webrisk.googleapis.com";
39  option (google.api.oauth_scopes) =
40      "https://www.googleapis.com/auth/cloud-platform";
41
42  // Gets the most recent threat list diffs. These diffs should be applied to
43  // a local database of hashes to keep it up-to-date. If the local database is
44  // empty or excessively out-of-date, a complete snapshot of the database will
45  // be returned. This Method only updates a single ThreatList at a time. To
46  // update multiple ThreatList databases, this method needs to be called once
47  // for each list.
48  rpc ComputeThreatListDiff(ComputeThreatListDiffRequest)
49      returns (ComputeThreatListDiffResponse) {
50    option (google.api.http) = {
51      get: "/v1/threatLists:computeDiff"
52    };
53    option (google.api.method_signature) =
54        "threat_type,version_token,constraints";
55  }
56
57  // This method is used to check whether a URI is on a given threatList.
58  // Multiple threatLists may be searched in a single query.
59  // The response will list all requested threatLists the URI was found to
60  // match. If the URI is not found on any of the requested ThreatList an
61  // empty response will be returned.
62  rpc SearchUris(SearchUrisRequest) returns (SearchUrisResponse) {
63    option (google.api.http) = {
64      get: "/v1/uris:search"
65    };
66    option (google.api.method_signature) = "uri,threat_types";
67  }
68
69  // Gets the full hashes that match the requested hash prefix.
70  // This is used after a hash prefix is looked up in a threatList
71  // and there is a match. The client side threatList only holds partial hashes
72  // so the client must query this method to determine if there is a full
73  // hash match of a threat.
74  rpc SearchHashes(SearchHashesRequest) returns (SearchHashesResponse) {
75    option (google.api.http) = {
76      get: "/v1/hashes:search"
77    };
78    option (google.api.method_signature) = "hash_prefix,threat_types";
79  }
80
81  // Creates a Submission of a URI suspected of containing phishing content to
82  // be reviewed. If the result verifies the existence of malicious phishing
83  // content, the site will be added to the [Google's Social Engineering
84  // lists](https://support.google.com/webmasters/answer/6350487/) in order to
85  // protect users that could get exposed to this threat in the future. Only
86  // allowlisted projects can use this method during Early Access. Please reach
87  // out to Sales or your customer engineer to obtain access.
88  rpc CreateSubmission(CreateSubmissionRequest) returns (Submission) {
89    option (google.api.http) = {
90      post: "/v1/{parent=projects/*}/submissions"
91      body: "submission"
92    };
93    option (google.api.method_signature) = "parent,submission";
94  }
95
96  // Submits a URI suspected of containing malicious content to be reviewed.
97  // Returns a google.longrunning.Operation which, once the review is complete,
98  // is updated with its result. You can use the [Pub/Sub API]
99  // (https://cloud.google.com/pubsub) to receive notifications for the returned
100  // Operation. If the result verifies the existence of malicious content, the
101  // site will be added to the [Google's Social Engineering lists]
102  // (https://support.google.com/webmasters/answer/6350487/) in order to
103  // protect users that could get exposed to this threat in the future. Only
104  // allowlisted projects can use this method during Early Access. Please reach
105  // out to Sales or your customer engineer to obtain access.
106  rpc SubmitUri(SubmitUriRequest) returns (google.longrunning.Operation) {
107    option (google.api.http) = {
108      post: "/v1/{parent=projects/*}/uris:submit"
109      body: "*"
110    };
111    option (google.longrunning.operation_info) = {
112      response_type: "Submission"
113      metadata_type: "SubmitUriMetadata"
114    };
115  }
116}
117
118// Describes an API diff request.
119message ComputeThreatListDiffRequest {
120  // The constraints for this diff.
121  message Constraints {
122    // The maximum size in number of entries. The diff will not contain more
123    // entries than this value.  This should be a power of 2 between 2**10 and
124    // 2**20.  If zero, no diff size limit is set.
125    int32 max_diff_entries = 1;
126
127    // Sets the maximum number of entries that the client is willing to have
128    // in the local database. This should be a power of 2 between 2**10 and
129    // 2**20. If zero, no database size limit is set.
130    int32 max_database_entries = 2;
131
132    // The compression types supported by the client.
133    repeated CompressionType supported_compressions = 3;
134  }
135
136  // Required. The threat list to update. Only a single ThreatType should be
137  // specified per request. If you want to handle multiple ThreatTypes, you must
138  // make one request per ThreatType.
139  ThreatType threat_type = 1 [(google.api.field_behavior) = REQUIRED];
140
141  // The current version token of the client for the requested list (the
142  // client version that was received from the last successful diff).
143  // If the client does not have a version token (this is the first time calling
144  // ComputeThreatListDiff), this may be left empty and a full database
145  // snapshot will be returned.
146  bytes version_token = 2;
147
148  // Required. The constraints associated with this request.
149  Constraints constraints = 3 [(google.api.field_behavior) = REQUIRED];
150}
151
152message ComputeThreatListDiffResponse {
153  // The type of response sent to the client.
154  enum ResponseType {
155    // Unknown.
156    RESPONSE_TYPE_UNSPECIFIED = 0;
157
158    // Partial updates are applied to the client's existing local database.
159    DIFF = 1;
160
161    // Full updates resets the client's entire local database. This means
162    // that either the client had no state, was seriously out-of-date,
163    // or the client is believed to be corrupt.
164    RESET = 2;
165  }
166
167  // The expected state of a client's local database.
168  message Checksum {
169    // The SHA256 hash of the client state; that is, of the sorted list of all
170    // hashes present in the database.
171    bytes sha256 = 1;
172  }
173
174  // The type of response. This may indicate that an action must be taken by the
175  // client when the response is received.
176  ResponseType response_type = 4;
177
178  // A set of entries to add to a local threat type's list.
179  ThreatEntryAdditions additions = 5;
180
181  // A set of entries to remove from a local threat type's list.
182  // This field may be empty.
183  ThreatEntryRemovals removals = 6;
184
185  // The new opaque client version token. This should be retained by the client
186  // and passed into the next call of ComputeThreatListDiff as 'version_token'.
187  // A separate version token should be stored and used for each threatList.
188  bytes new_version_token = 7;
189
190  // The expected SHA256 hash of the client state; that is, of the sorted list
191  // of all hashes present in the database after applying the provided diff.
192  // If the client state doesn't match the expected state, the client must
193  // discard this diff and retry later.
194  Checksum checksum = 8;
195
196  // The soonest the client should wait before issuing any diff
197  // request. Querying sooner is unlikely to produce a meaningful diff.
198  // Waiting longer is acceptable considering the use case.
199  // If this field is not set clients may update as soon as they want.
200  google.protobuf.Timestamp recommended_next_diff = 2;
201}
202
203// Request to check URI entries against threatLists.
204message SearchUrisRequest {
205  // Required. The URI to be checked for matches.
206  string uri = 1 [(google.api.field_behavior) = REQUIRED];
207
208  // Required. The ThreatLists to search in. Multiple ThreatLists may be
209  // specified.
210  repeated ThreatType threat_types = 2 [(google.api.field_behavior) = REQUIRED];
211}
212
213message SearchUrisResponse {
214  // Contains threat information on a matching uri.
215  message ThreatUri {
216    // The ThreatList this threat belongs to.
217    repeated ThreatType threat_types = 1;
218
219    // The cache lifetime for the returned match. Clients must not cache this
220    // response past this timestamp to avoid false positives.
221    google.protobuf.Timestamp expire_time = 2;
222  }
223
224  // The threat list matches. This might be empty if the URI is on no list.
225  ThreatUri threat = 1;
226}
227
228// Request to return full hashes matched by the provided hash prefixes.
229message SearchHashesRequest {
230  // A hash prefix, consisting of the most significant 4-32 bytes of a SHA256
231  // hash. For JSON requests, this field is base64-encoded.
232  // Note that if this parameter is provided by a URI, it must be encoded using
233  // the web safe base64 variant (RFC 4648).
234  bytes hash_prefix = 1;
235
236  // Required. The ThreatLists to search in. Multiple ThreatLists may be
237  // specified.
238  repeated ThreatType threat_types = 2 [(google.api.field_behavior) = REQUIRED];
239}
240
241message SearchHashesResponse {
242  // Contains threat information on a matching hash.
243  message ThreatHash {
244    // The ThreatList this threat belongs to.
245    // This must contain at least one entry.
246    repeated ThreatType threat_types = 1;
247
248    // A 32 byte SHA256 hash. This field is in binary format. For JSON
249    // requests, hashes are base64-encoded.
250    bytes hash = 2;
251
252    // The cache lifetime for the returned match. Clients must not cache this
253    // response past this timestamp to avoid false positives.
254    google.protobuf.Timestamp expire_time = 3;
255  }
256
257  // The full hashes that matched the requested prefixes.
258  // The hash will be populated in the key.
259  repeated ThreatHash threats = 1;
260
261  // For requested entities that did not match the threat list, how long to
262  // cache the response until.
263  google.protobuf.Timestamp negative_expire_time = 2;
264}
265
266// The type of threat. This maps directly to the threat list a threat may
267// belong to.
268enum ThreatType {
269  // No entries should match this threat type. This threat type is unused.
270  THREAT_TYPE_UNSPECIFIED = 0;
271
272  // Malware targeting any platform.
273  MALWARE = 1;
274
275  // Social engineering targeting any platform.
276  SOCIAL_ENGINEERING = 2;
277
278  // Unwanted software targeting any platform.
279  UNWANTED_SOFTWARE = 3;
280
281  // A list of extended coverage social engineering URIs targeting any
282  // platform.
283  SOCIAL_ENGINEERING_EXTENDED_COVERAGE = 4;
284}
285
286// The ways in which threat entry sets can be compressed.
287enum CompressionType {
288  // Unknown.
289  COMPRESSION_TYPE_UNSPECIFIED = 0;
290
291  // Raw, uncompressed data.
292  RAW = 1;
293
294  // Rice-Golomb encoded data.
295  RICE = 2;
296}
297
298// Contains the set of entries to add to a local database.
299// May contain a combination of compressed and raw data in a single response.
300message ThreatEntryAdditions {
301  // The raw SHA256-formatted entries.
302  // Repeated to allow returning sets of hashes with different prefix sizes.
303  repeated RawHashes raw_hashes = 1;
304
305  // The encoded 4-byte prefixes of SHA256-formatted entries, using a
306  // Golomb-Rice encoding. The hashes are converted to uint32, sorted in
307  // ascending order, then delta encoded and stored as encoded_data.
308  RiceDeltaEncoding rice_hashes = 2;
309}
310
311// Contains the set of entries to remove from a local database.
312message ThreatEntryRemovals {
313  // The raw removal indices for a local list.
314  RawIndices raw_indices = 1;
315
316  // The encoded local, lexicographically-sorted list indices, using a
317  // Golomb-Rice encoding. Used for sending compressed removal indices. The
318  // removal indices (uint32) are sorted in ascending order, then delta encoded
319  // and stored as encoded_data.
320  RiceDeltaEncoding rice_indices = 2;
321}
322
323// A set of raw indices to remove from a local list.
324message RawIndices {
325  // The indices to remove from a lexicographically-sorted local list.
326  repeated int32 indices = 1;
327}
328
329// The uncompressed threat entries in hash format.
330// Hashes can be anywhere from 4 to 32 bytes in size. A large majority are 4
331// bytes, but some hashes are lengthened if they collide with the hash of a
332// popular URI.
333//
334// Used for sending ThreatEntryAdditons to clients that do not support
335// compression, or when sending non-4-byte hashes to clients that do support
336// compression.
337message RawHashes {
338  // The number of bytes for each prefix encoded below.  This field can be
339  // anywhere from 4 (shortest prefix) to 32 (full SHA256 hash).
340  // In practice this is almost always 4, except in exceptional circumstances.
341  int32 prefix_size = 1;
342
343  // The hashes, in binary format, concatenated into one long string. Hashes are
344  // sorted in lexicographic order. For JSON API users, hashes are
345  // base64-encoded.
346  bytes raw_hashes = 2;
347}
348
349// The Rice-Golomb encoded data. Used for sending compressed 4-byte hashes or
350// compressed removal indices.
351message RiceDeltaEncoding {
352  // The offset of the first entry in the encoded data, or, if only a single
353  // integer was encoded, that single integer's value. If the field is empty or
354  // missing, assume zero.
355  int64 first_value = 1;
356
357  // The Golomb-Rice parameter, which is a number between 2 and 28. This field
358  // is missing (that is, zero) if `num_entries` is zero.
359  int32 rice_parameter = 2;
360
361  // The number of entries that are delta encoded in the encoded data. If only a
362  // single integer was encoded, this will be zero and the single value will be
363  // stored in `first_value`.
364  int32 entry_count = 3;
365
366  // The encoded deltas that are encoded using the Golomb-Rice coder.
367  bytes encoded_data = 4;
368}
369
370// Wraps a URI that might be displaying malicious content.
371message Submission {
372  // Required. The URI that is being reported for malicious content to be
373  // analyzed.
374  string uri = 1 [(google.api.field_behavior) = REQUIRED];
375
376  // Output only. ThreatTypes found to be associated with the submitted URI
377  // after reviewing it. This might be empty if the URI was not added to any
378  // list.
379  repeated ThreatType threat_types = 2
380      [(google.api.field_behavior) = OUTPUT_ONLY];
381}
382
383// Context about the submission including the type of abuse found on the URI and
384// supporting details.
385// option (google.api.message_visibility).restriction = "TRUSTED_TESTER";
386message ThreatInfo {
387  // The abuse type found on the URI.
388  enum AbuseType {
389    // Default.
390    ABUSE_TYPE_UNSPECIFIED = 0;
391
392    // The URI contains malware.
393    MALWARE = 1;
394
395    // The URI contains social engineering.
396    SOCIAL_ENGINEERING = 2;
397
398    // The URI contains unwanted software.
399    UNWANTED_SOFTWARE = 3;
400  }
401
402  // Confidence that a URI is unsafe.
403  message Confidence {
404    // Enum representation of confidence.
405    enum ConfidenceLevel {
406      // Default.
407      CONFIDENCE_LEVEL_UNSPECIFIED = 0;
408
409      // Less than 60% confidence that the URI is unsafe.
410      LOW = 1;
411
412      // Between 60% and 80% confidence that the URI is unsafe.
413      MEDIUM = 2;
414
415      // Greater than 80% confidence that the URI is unsafe.
416      HIGH = 3;
417    }
418
419    oneof value {
420      // A decimal representation of confidence in the range of 0
421      // to 1 where 0 indicates no confidence and 1 indicates
422      // complete confidence.
423      float score = 1;
424
425      // Enum representation of confidence.
426      ConfidenceLevel level = 2;
427    }
428  }
429
430  // Context about why the URI is unsafe.
431  message ThreatJustification {
432    // Labels that explain how the URI was classified.
433    enum JustificationLabel {
434      // Default.
435      JUSTIFICATION_LABEL_UNSPECIFIED = 0;
436
437      // The submitter manually verified that the submission is unsafe.
438      MANUAL_VERIFICATION = 1;
439
440      // The submitter received the submission from an end user.
441      USER_REPORT = 2;
442
443      // The submitter received the submission from an automated system.
444      AUTOMATED_REPORT = 3;
445    }
446
447    // Labels associated with this URI that explain how it was classified.
448    repeated JustificationLabel labels = 1;
449
450    // Free-form context on why this URI is unsafe.
451    repeated string comments = 2;
452  }
453
454  // The type of abuse.
455  AbuseType abuse_type = 1;
456
457  // Confidence that the URI is unsafe.
458  Confidence threat_confidence = 2;
459
460  // Context about why the URI is unsafe.
461  ThreatJustification threat_justification = 3;
462}
463
464// Details about how the threat was discovered.
465message ThreatDiscovery {
466  // Platform types.
467  enum Platform {
468    // Default.
469    PLATFORM_UNSPECIFIED = 0;
470
471    // General Android platform.
472    ANDROID = 1;
473
474    // General iOS platform.
475    IOS = 2;
476
477    // General macOS platform.
478    MACOS = 3;
479
480    // General Windows platform.
481    WINDOWS = 4;
482  }
483
484  // Platform on which the threat was discovered.
485  Platform platform = 1;
486
487  // CLDR region code of the countries/regions the URI poses a threat ordered
488  // from most impact to least impact. Example: "US" for United States.
489  repeated string region_codes = 2;
490}
491
492// Request to send a potentially phishy URI to WebRisk.
493message CreateSubmissionRequest {
494  // Required. The name of the project that is making the submission. This
495  // string is in the format "projects/{project_number}".
496  string parent = 1 [
497    (google.api.field_behavior) = REQUIRED,
498    (google.api.resource_reference) = {
499      type: "cloudresourcemanager.googleapis.com/Project"
500    }
501  ];
502
503  // Required. The submission that contains the content of the phishing report.
504  Submission submission = 2 [(google.api.field_behavior) = REQUIRED];
505}
506
507// Request to send a potentially malicious URI to WebRisk.
508message SubmitUriRequest {
509  // Required. The name of the project that is making the submission. This
510  // string is in the format "projects/{project_number}".
511  string parent = 1 [
512    (google.api.field_behavior) = REQUIRED,
513    (google.api.resource_reference) = {
514      type: "cloudresourcemanager.googleapis.com/Project"
515    }
516  ];
517
518  // Required. The submission that contains the URI to be scanned.
519  Submission submission = 2 [(google.api.field_behavior) = REQUIRED];
520
521  // Provides additional information about the submission.
522  ThreatInfo threat_info = 3;
523
524  // Provides additional information about how the submission was discovered.
525  ThreatDiscovery threat_discovery = 4;
526}
527
528// Metadata for the Submit URI long-running operation.
529// option (google.api.message_visibility).restriction = "TRUSTED_TESTER";
530message SubmitUriMetadata {
531  // Enum that represents the state of the long-running operation.
532  enum State {
533    // Default unspecified state.
534    STATE_UNSPECIFIED = 0;
535
536    // The operation is currently running.
537    RUNNING = 1;
538
539    // The operation finished with a success status.
540    SUCCEEDED = 2;
541
542    // The operation was cancelled.
543    CANCELLED = 3;
544
545    // The operation finished with a failure status.
546    FAILED = 4;
547
548    // The operation was closed with no action taken.
549    CLOSED = 5;
550  }
551
552  // The state of the operation.
553  State state = 1;
554
555  // Creation time of the operation.
556  google.protobuf.Timestamp create_time = 2;
557
558  // Latest update time of the operation.
559  google.protobuf.Timestamp update_time = 3;
560}
561