xref: /aosp_15_r20/external/federated-compute/fcp/client/diag_codes.proto (revision 14675a029014e728ec732f129a32e299b2da0601)
1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package fcp.client;
18
19option java_package = "com.google.intelligence.fcp.client";
20option java_multiple_files = true;
21
22/**
23 * Enumerations of diagnostic codes for debugging, testing, and logging.
24 *
25 * Diag codes serve two purposes:
26 *  - testing and development. The ability to log, observe and assert on
27 *    diag code traces allows for integration testing of code that runs
28 *    asynchronously in different processes or apps. Both DebugDiagCodes and
29 *    ProdDiagCodes are used to this end.
30 *  - monitoring of a deployment. Sequences of diag codes are
31 *    * easy to analyze
32 *    * have limited expressive power by design (mere integers) to discourage
33 *      logging sensitive information
34 *    * are easier to support across platforms as compared to unstructured logs,
35 *      for both policy and technical reasons.
36 *
37 * Note that only the ordinal of the diag code enum will be logged to clearcut.
38 * As a result, the diag codes for debug and production should be mutually
39 * exclusive.
40 */
41enum DebugDiagCode {
42  DEBUG_DIAG_CODE_UNDEFINED = 0;
43
44  // Codes reserved for test-only training diag codes.
45  // =================================================
46
47  /** Logged right before ClientExecution.getLoopOp() is executed */
48  TRAINING_BEFORE_LOOP_OP = 1000;
49
50  /** Logged right after ClientExecution.getLoopOp() is executed */
51  TRAINING_AFTER_LOOP_OP = 1001;
52
53  /** Logged if opstats is enabled */
54  TRAINING_OPSTATS_ENABLED = 1002;
55
56  // Codes reserved for test-only resource cache diag codes.
57  //================================================================
58
59  // Logged when a resource is requested that is in the cache.
60  RESOURCE_CACHE_HIT = 1200;
61
62  // Logged when a resource is requested that isn't in the cache.
63  RESOURCE_CACHE_MISS = 1201;
64}
65
66/**
67 * Diagnosis codes that are meant to be logged in production. These usually are
68 * pretty severe errors, public API being called, or infrequent jobs (like
69 * training or old example removal) being run.
70 *
71 * The logging of ProdDiagCode is controlled by a runtime dynamic flag. Logging
72 * can be skipped in accordance to the flag.
73 */
74enum ProdDiagCode {
75  PROD_DIAG_CODE_UNDEFINED = 0;
76
77  // Codes reserved for background training
78  // ======================================
79
80  /**
81   * Successfully interrupted TensorFlow execution happening on a separate
82   * thread.
83   */
84  BACKGROUND_TRAINING_INTERRUPT_TF_EXECUTION = 51;
85
86  /**
87   * TensorFlow session was interrupted but timed out waiting for execution to
88   * complete.
89   */
90  BACKGROUND_TRAINING_INTERRUPT_TF_EXECUTION_TIMED_OUT = 50;
91
92  /**
93   * TensorFlow session was interrupted and finished execution after the grace
94   * period.
95   */
96  BACKGROUND_TRAINING_INTERRUPT_TF_EXTENDED_EXECUTION_COMPLETED = 49;
97
98  /**
99   * TensorFlow session was interrupted but timed out waiting for execution to
100   * complete in the extended period.
101   */
102  BACKGROUND_TRAINING_INTERRUPT_TF_EXTENDED_EXECUTION_TIMED_OUT = 48;
103
104  /** Sent when the provided ClientOnlyPlan cannot be parsed. */
105  BACKGROUND_TRAINING_FAILED_CANNOT_PARSE_PLAN = 40;
106
107  /** Sent when the provided ClientOnlyPlan does not pass a sanity check. */
108  BACKGROUND_TRAINING_FAILED_PLAN_FAILS_SANITY_CHECK = 39;
109
110  /** Successfully interrupted GRPC on a separate thread. */
111  BACKGROUND_TRAINING_INTERRUPT_GRPC = 34;
112
113  /** GRPC was interrupted but timed out waiting for execution to complete. */
114  BACKGROUND_TRAINING_INTERRUPT_GRPC_TIMED_OUT = 33;
115
116  /** GRPC was interrupted and finished after the grace period. */
117  BACKGROUND_TRAINING_INTERRUPT_GRPC_EXTENDED_COMPLETED = 32;
118
119  /**
120   * GRPC was interrupted but timed out waiting for execution to complete in the
121   * extended period.
122   */
123  BACKGROUND_TRAINING_INTERRUPT_GRPC_EXTENDED_TIMED_OUT = 31;
124
125  /** Successfully interrupted HTTP on a separate thread. */
126  BACKGROUND_TRAINING_INTERRUPT_HTTP = 24;
127
128  /**
129   * HTTP was interrupted but timed out waiting for execution to complete.
130   */
131  BACKGROUND_TRAINING_INTERRUPT_HTTP_TIMED_OUT = 23;
132
133  /** HTTP was interrupted and finished after the grace period. */
134  BACKGROUND_TRAINING_INTERRUPT_HTTP_EXTENDED_COMPLETED = 22;
135
136  /**
137   * HTTP was interrupted but timed out waiting for execution to complete in the
138   * extended period.
139   */
140  BACKGROUND_TRAINING_INTERRUPT_HTTP_EXTENDED_TIMED_OUT = 21;
141
142  /**
143   * Sent when TFLite was used.
144   */
145  BACKGROUND_TRAINING_TFLITE_ENGINE_USED = 20;
146
147  /**
148   * Sent when TFLite model flatbuffer is not empty.
149   */
150  BACKGROUND_TRAINING_TFLITE_MODEL_INCLUDED = 18;
151
152  /**
153   * A generic SecAgg client error.
154   */
155  SECAGG_CLIENT_NATIVE_ERROR_GENERIC = 1369;
156
157  /**
158   * The server requested an unsupported version.
159   */
160  SECAGG_CLIENT_ERROR_UNSUPPORTED_VERSION = 1368;
161
162  /**
163   * Sent when a plan that ingests data via Dataset is attempted to be run in
164   * an environment where Dataset support is not available.
165   */
166  DATASET_NOT_SUPPORTED = 1493;
167
168  /** Logged when a CheckinRequestAck message was expected, but not received. */
169  BACKGROUND_TRAINING_CHECKIN_REQUEST_ACK_EXPECTED_BUT_NOT_RECVD = 29;
170
171  /** Logged when a CheckinRequestAck message is received. */
172  BACKGROUND_TRAINING_CHECKIN_REQUEST_ACK_RECEIVED = 28;
173
174  /**
175   * Sent when the provided eligibility eval ClientOnlyPlan cannot be parsed.
176   */
177  BACKGROUND_TRAINING_ELIGIBILITY_EVAL_FAILED_CANNOT_PARSE_PLAN = 26;
178
179  /**
180   * Codes reserved for logs related to opstats
181   * ==========================================
182   */
183
184  // Logged when the provided path for creating database is invalid.
185  OPSTATS_INVALID_FILE_PATH = 1699;
186
187  // Logged when failed to create parent directories for the database file.
188  OPSTATS_PARENT_DIR_CREATION_FAILED = 1698;
189
190  // Logged when failed to read from OpStats DB.
191  OPSTATS_READ_FAILED = 1697;
192
193  // Logged when failed to reset OpStats DB.
194  OPSTATS_RESET_FAILED = 1696;
195
196  // Logged when failed to write to OpStats DB.
197  OPSTATS_WRITE_FAILED = 1695;
198
199  // Logged when the OpStats example store is requested, but the collection uri
200  // is wrong.
201  OPSTATS_INCORRECT_COLLECTION_URI = 1694;
202
203  // Logged when the provided selection criteria for the OpStats example store
204  // is invalid.
205  OPSTATS_INVALID_SELECTION_CRITERIA = 1693;
206
207  // Logged when the OpStats example store is requested, but not enabled.
208  OPSTATS_EXAMPLE_STORE_REQUESTED_NOT_ENABLED = 1692;
209
210  // Logged when extracting the task name from the checkin response fails.
211  OPSTATS_TASK_NAME_EXTRACTION_FAILED = 1691;
212
213  // Logged when we start to construct an opstats message for a run after having
214  // successfully created an underlying db.
215  OPSTATS_DB_COMMIT_EXPECTED = 1690;
216
217  // Logged when we try to commit an opstats message to the db.
218  OPSTATS_DB_COMMIT_ATTEMPTED = 1689;
219
220  // Logged when there's already another instance of OpStatsDb which uses the
221  // same underlying file.
222  OPSTATS_MULTIPLE_DB_INSTANCE_DETECTED = 1688;
223
224  // Logged when failed to open a file descriptor for the underlying database
225  // file.
226  OPSTATS_FAILED_TO_OPEN_FILE = 1687;
227
228  /**
229   * Codes reserved for logs related to HTTP
230   * =======================================
231   */
232  /* Logged when a client using the GRPC protocol downloads a regular
233   * (non-eligibility eval) task's resource (plan or initial checkpoint) using
234   * HTTP. */
235  HTTP_GRPC_PROTOCOL_REGULAR_TASK_RESOURCE_USES_HTTP = 1799;
236  /* Logged when the attempt to fetch HTTP resources (as per
237   * `HTTP_GRPC_PROTOCOL_REGULAR_TASK_RESOURCE_USES_HTTP`) failed. */
238  HTTP_GRPC_PROTOCOL_REGULAR_TASK_RESOURCE_HTTP_FETCH_FAILED = 1798;
239  /* Logged when the attempt to fetch HTTP resources (as per
240   * `HTTP_GRPC_PROTOCOL_REGULAR_TASK_RESOURCE_USES_HTTP`) succeeded. */
241  HTTP_GRPC_PROTOCOL_REGULAR_TASK_RESOURCE_HTTP_FETCH_SUCCEEDED = 1797;
242  /* Logged when a cancellation request or an abort request failed. */
243  HTTP_CANCELLATION_OR_ABORT_REQUEST_FAILED = 1790;
244  /* Logged when a ReportEligibilityEvalTaskResult request failed. */
245  HTTP_REPORT_ELIGIBILITY_EVAL_RESULT_REQUEST_FAILED = 1789;
246  /* Logged when a ReportTaskResult request failed. */
247  HTTP_REPORT_TASK_RESULT_REQUEST_FAILED = 1788;
248  /* Logged when HTTP federated protocol is used. */
249  HTTP_FEDERATED_PROTOCOL_USED = 1787;
250
251  /**
252   * Codes reserved for logs related to temp files
253   * =============================================
254   */
255  TEMP_FILES_NATIVE_FAILED_TO_DELETE = 1090;
256
257  /**
258   * Codes reserved for logs related to Federated Select
259   * =======================================
260   */
261  /* Logged when a task tries to use Federated Select to fetch one or more
262   * slices but the feature is disabled. */
263  FEDSELECT_SLICE_HTTP_FETCH_REQUESTED_BUT_DISABLED = 1899;
264  /* Logged when a regular (non-eligibility eval) task uses Federated Select to
265   * fetch one or more slices via HTTP. */
266  FEDSELECT_SLICE_HTTP_FETCH_REQUESTED = 1898;
267  /* Logged when the attempt to fetch one or more slices via HTTP (as per
268   * `FEDSELECT_SLICE_HTTP_FETCH_REQUESTED`) failed. */
269  FEDSELECT_SLICE_HTTP_FETCH_FAILED = 1897;
270  /* Logged when the attempt to fetch one or more slices via HTTP (as per
271   * `FEDSELECT_SLICE_HTTP_FETCH_REQUESTED`) succeeded. */
272  FEDSELECT_SLICE_HTTP_FETCH_SUCCEEDED = 1896;
273
274  /**
275   * Codes for logs related to the resource cache
276   * ========================================
277   */
278
279  /* Logged when a FileBackedResourceCache fails to read the CacheManifest
280   * proto db. */
281  RESOURCE_CACHE_MANIFEST_READ_FAILED = 1999;
282  /* Logged when a FileBackedResourceCache fails to write the CacheManifest to
283   * storage. */
284  RESOURCE_CACHE_MANIFEST_WRITE_FAILED = 1998;
285  /* Logged when a FileBackedResourceCache fails to read the cached resource to
286   * storage. */
287  RESOURCE_CACHE_RESOURCE_READ_FAILED = 1997;
288  /* Logged when a FileBackedResourceCache fails to write the cached resource to
289   * storage. */
290  RESOURCE_CACHE_RESOURCE_WRITE_FAILED = 1996;
291  /* Logged when a FileBackedResourceCache is initialized without an absolute
292   * root path. */
293  RESOURCE_CACHE_CACHE_ROOT_PATH_NOT_ABSOLUTE = 1995;
294  /* Logged when a FileBackedResourceCache fails to create the cache dir on
295   * initialization. */
296  RESOURCE_CACHE_FAILED_TO_CREATE_CACHE_DIR = 1994;
297  /* Logged when a FileBackedResourceCache is initialized with an invalid cache
298   * manifest path. */
299  RESOURCE_CACHE_INVALID_MANIFEST_PATH = 1993;
300  /* Logged when a FileBackedResourceCache fails to create the parent directory
301   * of the cache manifest. */
302  RESOURCE_CACHE_FAILED_TO_CREATE_MANIFEST_DIR = 1992;
303  /* Logged when a FileBackedResourceCache fails to reset the cache manifest. */
304  RESOURCE_CACHE_FAILED_TO_RESET_MANIFEST = 1991;
305  /* Logged when a FileBackedResourceCache fails to get the size of the cache
306   * manifest. */
307  RESOURCE_CACHE_INIT_FAILED_TO_GET_MANIFEST_SIZE = 1990;
308  /* Logged when a FileBackedResourceCache fails to iterate over the cache
309   * directory during cleanup.
310   */
311  RESOURCE_CACHE_CLEANUP_FAILED_TO_ITERATE_OVER_CACHE_DIR = 1989;
312  /* Logged when a FileBackedResourceCache fails to delete a cached file during
313   * cleanup. */
314  RESOURCE_CACHE_CLEANUP_FAILED_TO_DELETE_CACHED_FILE = 1988;
315  /* Logged when a FileBackedResourceCache fails to get the file size of a
316   * cached file. */
317  RESOURCE_CACHE_CLEANUP_FAILED_TO_GET_FILE_SIZE = 1987;
318  /* Logged when a FileBackedResourceCache fails to initialize the cache
319   * manifest when it doesn't already exist. */
320  RESOURCE_CACHE_INIT_FAILED_TO_INITIALIZE_MANIFEST = 1986;
321  /* Logged when a FileBackedResourceCache fails to delete an existing cache
322   * manifest due to an error. */
323  RESOURCE_CACHE_FAILED_TO_DELETE_MANIFEST = 1985;
324  /* Logged when a FileBackedResourceCache fails in some way during cleanup in
325   * initialization. */
326  RESOURCE_CACHE_INIT_FAILED_CLEANUP = 1984;
327  /* Logged when a FileBackedResourceCache fails to check if a cached file
328   * exists during cleanup. */
329  RESOURCE_CACHE_CLEANUP_FAILED_TO_CHECK_IF_FILE_EXISTS = 1983;
330  /* Logged when a FileBackedResourceCache fails to check if a cached file
331   * exists during Put(). */
332  RESOURCE_CACHE_PUT_FAILED_TO_CHECK_IF_FILE_EXISTS = 1982;
333
334  reserved 25;
335}
336