1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.webrisk.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/protobuf/timestamp.proto";
24
25option csharp_namespace = "Google.Cloud.WebRisk.V1";
26option go_package = "cloud.google.com/go/webrisk/apiv1/webriskpb;webriskpb";
27option java_multiple_files = true;
28option java_outer_classname = "WebRiskProto";
29option java_package = "com.google.webrisk.v1";
30option objc_class_prefix = "GCWR";
31option php_namespace = "Google\\Cloud\\WebRisk\\V1";
32option ruby_package = "Google::Cloud::WebRisk::V1";
33
34// Web Risk API defines an interface to detect malicious URLs on your
35// website and in client applications.
36service WebRiskService {
37  option (google.api.default_host) = "webrisk.googleapis.com";
38  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
39
40  // Gets the most recent threat list diffs. These diffs should be applied to
41  // a local database of hashes to keep it up-to-date. If the local database is
42  // empty or excessively out-of-date, a complete snapshot of the database will
43  // be returned. This Method only updates a single ThreatList at a time. To
44  // update multiple ThreatList databases, this method needs to be called once
45  // for each list.
46  rpc ComputeThreatListDiff(ComputeThreatListDiffRequest) returns (ComputeThreatListDiffResponse) {
47    option (google.api.http) = {
48      get: "/v1/threatLists:computeDiff"
49    };
50    option (google.api.method_signature) = "threat_type,version_token,constraints";
51  }
52
53  // This method is used to check whether a URI is on a given threatList.
54  // Multiple threatLists may be searched in a single query.
55  // The response will list all requested threatLists the URI was found to
56  // match. If the URI is not found on any of the requested ThreatList an
57  // empty response will be returned.
58  rpc SearchUris(SearchUrisRequest) returns (SearchUrisResponse) {
59    option (google.api.http) = {
60      get: "/v1/uris:search"
61    };
62    option (google.api.method_signature) = "uri,threat_types";
63  }
64
65  // Gets the full hashes that match the requested hash prefix.
66  // This is used after a hash prefix is looked up in a threatList
67  // and there is a match. The client side threatList only holds partial hashes
68  // so the client must query this method to determine if there is a full
69  // hash match of a threat.
70  rpc SearchHashes(SearchHashesRequest) returns (SearchHashesResponse) {
71    option (google.api.http) = {
72      get: "/v1/hashes:search"
73    };
74    option (google.api.method_signature) = "hash_prefix,threat_types";
75  }
76
77  // Creates a Submission of a URI suspected of containing phishing content to
78  // be reviewed. If the result verifies the existence of malicious phishing
79  // content, the site will be added to the [Google's Social Engineering
80  // lists](https://support.google.com/webmasters/answer/6350487/) in order to
81  // protect users that could get exposed to this threat in the future. Only
82  // allowlisted projects can use this method during Early Access. Please reach
83  // out to Sales or your customer engineer to obtain access.
84  rpc CreateSubmission(CreateSubmissionRequest) returns (Submission) {
85    option (google.api.http) = {
86      post: "/v1/{parent=projects/*}/submissions"
87      body: "submission"
88    };
89    option (google.api.method_signature) = "parent,submission";
90  }
91}
92
93// Describes an API diff request.
94message ComputeThreatListDiffRequest {
95  // The constraints for this diff.
96  message Constraints {
97    // The maximum size in number of entries. The diff will not contain more
98    // entries than this value.  This should be a power of 2 between 2**10 and
99    // 2**20.  If zero, no diff size limit is set.
100    int32 max_diff_entries = 1;
101
102    // Sets the maximum number of entries that the client is willing to have
103    // in the local database. This should be a power of 2 between 2**10 and
104    // 2**20. If zero, no database size limit is set.
105    int32 max_database_entries = 2;
106
107    // The compression types supported by the client.
108    repeated CompressionType supported_compressions = 3;
109  }
110
111  // Required. The threat list to update. Only a single ThreatType should be specified
112  // per request. If you want to handle multiple ThreatTypes, you must make one
113  // request per ThreatType.
114  ThreatType threat_type = 1 [(google.api.field_behavior) = REQUIRED];
115
116  // The current version token of the client for the requested list (the
117  // client version that was received from the last successful diff).
118  // If the client does not have a version token (this is the first time calling
119  // ComputeThreatListDiff), this may be left empty and a full database
120  // snapshot will be returned.
121  bytes version_token = 2;
122
123  // Required. The constraints associated with this request.
124  Constraints constraints = 3 [(google.api.field_behavior) = REQUIRED];
125}
126
127message ComputeThreatListDiffResponse {
128  // The type of response sent to the client.
129  enum ResponseType {
130    // Unknown.
131    RESPONSE_TYPE_UNSPECIFIED = 0;
132
133    // Partial updates are applied to the client's existing local database.
134    DIFF = 1;
135
136    // Full updates resets the client's entire local database. This means
137    // that either the client had no state, was seriously out-of-date,
138    // or the client is believed to be corrupt.
139    RESET = 2;
140  }
141
142  // The expected state of a client's local database.
143  message Checksum {
144    // The SHA256 hash of the client state; that is, of the sorted list of all
145    // hashes present in the database.
146    bytes sha256 = 1;
147  }
148
149  // The type of response. This may indicate that an action must be taken by the
150  // client when the response is received.
151  ResponseType response_type = 4;
152
153  // A set of entries to add to a local threat type's list.
154  ThreatEntryAdditions additions = 5;
155
156  // A set of entries to remove from a local threat type's list.
157  // This field may be empty.
158  ThreatEntryRemovals removals = 6;
159
160  // The new opaque client version token. This should be retained by the client
161  // and passed into the next call of ComputeThreatListDiff as 'version_token'.
162  // A separate version token should be stored and used for each threatList.
163  bytes new_version_token = 7;
164
165  // The expected SHA256 hash of the client state; that is, of the sorted list
166  // of all hashes present in the database after applying the provided diff.
167  // If the client state doesn't match the expected state, the client must
168  // discard this diff and retry later.
169  Checksum checksum = 8;
170
171  // The soonest the client should wait before issuing any diff
172  // request. Querying sooner is unlikely to produce a meaningful diff.
173  // Waiting longer is acceptable considering the use case.
174  // If this field is not set clients may update as soon as they want.
175  google.protobuf.Timestamp recommended_next_diff = 2;
176}
177
178// Request to check URI entries against threatLists.
179message SearchUrisRequest {
180  // Required. The URI to be checked for matches.
181  string uri = 1 [(google.api.field_behavior) = REQUIRED];
182
183  // Required. The ThreatLists to search in. Multiple ThreatLists may be specified.
184  repeated ThreatType threat_types = 2 [(google.api.field_behavior) = REQUIRED];
185}
186
187message SearchUrisResponse {
188  // Contains threat information on a matching uri.
189  message ThreatUri {
190    // The ThreatList this threat belongs to.
191    repeated ThreatType threat_types = 1;
192
193    // The cache lifetime for the returned match. Clients must not cache this
194    // response past this timestamp to avoid false positives.
195    google.protobuf.Timestamp expire_time = 2;
196  }
197
198  // The threat list matches. This might be empty if the URI is on no list.
199  ThreatUri threat = 1;
200}
201
202// Request to return full hashes matched by the provided hash prefixes.
203message SearchHashesRequest {
204  // A hash prefix, consisting of the most significant 4-32 bytes of a SHA256
205  // hash. For JSON requests, this field is base64-encoded.
206  // Note that if this parameter is provided by a URI, it must be encoded using
207  // the web safe base64 variant (RFC 4648).
208  bytes hash_prefix = 1;
209
210  // Required. The ThreatLists to search in. Multiple ThreatLists may be specified.
211  repeated ThreatType threat_types = 2 [(google.api.field_behavior) = REQUIRED];
212}
213
214message SearchHashesResponse {
215  // Contains threat information on a matching hash.
216  message ThreatHash {
217    // The ThreatList this threat belongs to.
218    // This must contain at least one entry.
219    repeated ThreatType threat_types = 1;
220
221    // A 32 byte SHA256 hash. This field is in binary format. For JSON
222    // requests, hashes are base64-encoded.
223    bytes hash = 2;
224
225    // The cache lifetime for the returned match. Clients must not cache this
226    // response past this timestamp to avoid false positives.
227    google.protobuf.Timestamp expire_time = 3;
228  }
229
230  // The full hashes that matched the requested prefixes.
231  // The hash will be populated in the key.
232  repeated ThreatHash threats = 1;
233
234  // For requested entities that did not match the threat list, how long to
235  // cache the response until.
236  google.protobuf.Timestamp negative_expire_time = 2;
237}
238
239// The type of threat. This maps directly to the threat list a threat may
240// belong to.
241enum ThreatType {
242  // No entries should match this threat type. This threat type is unused.
243  THREAT_TYPE_UNSPECIFIED = 0;
244
245  // Malware targeting any platform.
246  MALWARE = 1;
247
248  // Social engineering targeting any platform.
249  SOCIAL_ENGINEERING = 2;
250
251  // Unwanted software targeting any platform.
252  UNWANTED_SOFTWARE = 3;
253
254  // A list of extended coverage social engineering URIs targeting any
255  // platform.
256  SOCIAL_ENGINEERING_EXTENDED_COVERAGE = 4;
257}
258
259// The ways in which threat entry sets can be compressed.
260enum CompressionType {
261  // Unknown.
262  COMPRESSION_TYPE_UNSPECIFIED = 0;
263
264  // Raw, uncompressed data.
265  RAW = 1;
266
267  // Rice-Golomb encoded data.
268  RICE = 2;
269}
270
271// Contains the set of entries to add to a local database.
272// May contain a combination of compressed and raw data in a single response.
273message ThreatEntryAdditions {
274  // The raw SHA256-formatted entries.
275  // Repeated to allow returning sets of hashes with different prefix sizes.
276  repeated RawHashes raw_hashes = 1;
277
278  // The encoded 4-byte prefixes of SHA256-formatted entries, using a
279  // Golomb-Rice encoding. The hashes are converted to uint32, sorted in
280  // ascending order, then delta encoded and stored as encoded_data.
281  RiceDeltaEncoding rice_hashes = 2;
282}
283
284// Contains the set of entries to remove from a local database.
285message ThreatEntryRemovals {
286  // The raw removal indices for a local list.
287  RawIndices raw_indices = 1;
288
289  // The encoded local, lexicographically-sorted list indices, using a
290  // Golomb-Rice encoding. Used for sending compressed removal indices. The
291  // removal indices (uint32) are sorted in ascending order, then delta encoded
292  // and stored as encoded_data.
293  RiceDeltaEncoding rice_indices = 2;
294}
295
296// A set of raw indices to remove from a local list.
297message RawIndices {
298  // The indices to remove from a lexicographically-sorted local list.
299  repeated int32 indices = 1;
300}
301
302// The uncompressed threat entries in hash format.
303// Hashes can be anywhere from 4 to 32 bytes in size. A large majority are 4
304// bytes, but some hashes are lengthened if they collide with the hash of a
305// popular URI.
306//
307// Used for sending ThreatEntryAdditons to clients that do not support
308// compression, or when sending non-4-byte hashes to clients that do support
309// compression.
310message RawHashes {
311  // The number of bytes for each prefix encoded below.  This field can be
312  // anywhere from 4 (shortest prefix) to 32 (full SHA256 hash).
313  // In practice this is almost always 4, except in exceptional circumstances.
314  int32 prefix_size = 1;
315
316  // The hashes, in binary format, concatenated into one long string. Hashes are
317  // sorted in lexicographic order. For JSON API users, hashes are
318  // base64-encoded.
319  bytes raw_hashes = 2;
320}
321
322// The Rice-Golomb encoded data. Used for sending compressed 4-byte hashes or
323// compressed removal indices.
324message RiceDeltaEncoding {
325  // The offset of the first entry in the encoded data, or, if only a single
326  // integer was encoded, that single integer's value. If the field is empty or
327  // missing, assume zero.
328  int64 first_value = 1;
329
330  // The Golomb-Rice parameter, which is a number between 2 and 28. This field
331  // is missing (that is, zero) if `num_entries` is zero.
332  int32 rice_parameter = 2;
333
334  // The number of entries that are delta encoded in the encoded data. If only a
335  // single integer was encoded, this will be zero and the single value will be
336  // stored in `first_value`.
337  int32 entry_count = 3;
338
339  // The encoded deltas that are encoded using the Golomb-Rice coder.
340  bytes encoded_data = 4;
341}
342
343// Wraps a URI that might be displaying malicious content.
344message Submission {
345  // Required. The URI that is being reported for malicious content to be analyzed.
346  string uri = 1 [(google.api.field_behavior) = REQUIRED];
347}
348
349// Request to send a potentially phishy URI to WebRisk.
350message CreateSubmissionRequest {
351  // Required. The name of the project that is making the submission. This string is in
352  // the format "projects/{project_number}".
353  string parent = 1 [
354    (google.api.field_behavior) = REQUIRED,
355    (google.api.resource_reference) = {
356      type: "cloudresourcemanager.googleapis.com/Project"
357    }
358  ];
359
360  // Required. The submission that contains the content of the phishing report.
361  Submission submission = 2 [(google.api.field_behavior) = REQUIRED];
362}
363