xref: /aosp_15_r20/external/federated-compute/fcp/client/flags.h (revision 14675a029014e728ec732f129a32e299b2da0601)
1*14675a02SAndroid Build Coastguard Worker /*
2*14675a02SAndroid Build Coastguard Worker  * Copyright 2020 Google LLC
3*14675a02SAndroid Build Coastguard Worker  *
4*14675a02SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*14675a02SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*14675a02SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*14675a02SAndroid Build Coastguard Worker  *
8*14675a02SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*14675a02SAndroid Build Coastguard Worker  *
10*14675a02SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*14675a02SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*14675a02SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*14675a02SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*14675a02SAndroid Build Coastguard Worker  * limitations under the License.
15*14675a02SAndroid Build Coastguard Worker  */
16*14675a02SAndroid Build Coastguard Worker #ifndef FCP_CLIENT_FLAGS_H_
17*14675a02SAndroid Build Coastguard Worker #define FCP_CLIENT_FLAGS_H_
18*14675a02SAndroid Build Coastguard Worker 
19*14675a02SAndroid Build Coastguard Worker #include <cstdint>
20*14675a02SAndroid Build Coastguard Worker #include <string>
21*14675a02SAndroid Build Coastguard Worker 
22*14675a02SAndroid Build Coastguard Worker #include "absl/status/status.h"
23*14675a02SAndroid Build Coastguard Worker 
24*14675a02SAndroid Build Coastguard Worker namespace fcp {
25*14675a02SAndroid Build Coastguard Worker namespace client {
26*14675a02SAndroid Build Coastguard Worker 
27*14675a02SAndroid Build Coastguard Worker // A class for changing runtime behavior with "flags" - typically, server
28*14675a02SAndroid Build Coastguard Worker // provided values.
29*14675a02SAndroid Build Coastguard Worker class Flags {
30*14675a02SAndroid Build Coastguard Worker  public:
31*14675a02SAndroid Build Coastguard Worker   virtual ~Flags() = default;
32*14675a02SAndroid Build Coastguard Worker 
33*14675a02SAndroid Build Coastguard Worker   // The period of time in milliseconds between device condition checks. This is
34*14675a02SAndroid Build Coastguard Worker   // used during potentially long blocking calls such as TensorFlow or network
35*14675a02SAndroid Build Coastguard Worker   // I/O, as well as for throttling regular condition checks during plan
36*14675a02SAndroid Build Coastguard Worker   // execution (e.g. before fetching a new example).
37*14675a02SAndroid Build Coastguard Worker   virtual int64_t condition_polling_period_millis() const = 0;
38*14675a02SAndroid Build Coastguard Worker 
39*14675a02SAndroid Build Coastguard Worker   // The period of time in milliseconds allowed for TensorFlow execution to
40*14675a02SAndroid Build Coastguard Worker   // finish after it's been interrupted.
41*14675a02SAndroid Build Coastguard Worker   virtual int64_t tf_execution_teardown_grace_period_millis() const = 0;
42*14675a02SAndroid Build Coastguard Worker 
43*14675a02SAndroid Build Coastguard Worker   // The period of time in milliseconds allowed for TensorFlow execution to
44*14675a02SAndroid Build Coastguard Worker   // finish after the grace period. This allows us to decide if we want long
45*14675a02SAndroid Build Coastguard Worker   // running native execution to be forcibly resolved or continue indefinitely.
46*14675a02SAndroid Build Coastguard Worker   virtual int64_t tf_execution_teardown_extended_period_millis() const = 0;
47*14675a02SAndroid Build Coastguard Worker 
48*14675a02SAndroid Build Coastguard Worker   // The deadline in seconds for the gRPC channel used for communication
49*14675a02SAndroid Build Coastguard Worker   // between the client and server.
50*14675a02SAndroid Build Coastguard Worker   virtual int64_t grpc_channel_deadline_seconds() const = 0;
51*14675a02SAndroid Build Coastguard Worker 
52*14675a02SAndroid Build Coastguard Worker   // Whether to log the error message strings from TensorFlow exceptions.
53*14675a02SAndroid Build Coastguard Worker   virtual bool log_tensorflow_error_messages() const = 0;
54*14675a02SAndroid Build Coastguard Worker 
55*14675a02SAndroid Build Coastguard Worker   // Whether to enable recording to and querying from the Operational Statistics
56*14675a02SAndroid Build Coastguard Worker   // db.
enable_opstats()57*14675a02SAndroid Build Coastguard Worker   virtual bool enable_opstats() const { return true; }
58*14675a02SAndroid Build Coastguard Worker 
59*14675a02SAndroid Build Coastguard Worker   // The number of days for data to live in the OpStatsDb without update.
opstats_ttl_days()60*14675a02SAndroid Build Coastguard Worker   virtual int64_t opstats_ttl_days() const { return 30; }
61*14675a02SAndroid Build Coastguard Worker 
62*14675a02SAndroid Build Coastguard Worker   // The maximum size of the data stored by OpStatsDb.
opstats_db_size_limit_bytes()63*14675a02SAndroid Build Coastguard Worker   virtual int64_t opstats_db_size_limit_bytes() const {
64*14675a02SAndroid Build Coastguard Worker     return 1 * 1024 * 1024;
65*14675a02SAndroid Build Coastguard Worker   }
66*14675a02SAndroid Build Coastguard Worker 
67*14675a02SAndroid Build Coastguard Worker   // The retry delay to use when encountering a transient error during a
68*14675a02SAndroid Build Coastguard Worker   // training run before having received a RetryWindow from the server.
federated_training_transient_errors_retry_delay_secs()69*14675a02SAndroid Build Coastguard Worker   virtual int64_t federated_training_transient_errors_retry_delay_secs() const {
70*14675a02SAndroid Build Coastguard Worker     // 15 minutes
71*14675a02SAndroid Build Coastguard Worker     return 15 * 60;
72*14675a02SAndroid Build Coastguard Worker   }
73*14675a02SAndroid Build Coastguard Worker 
74*14675a02SAndroid Build Coastguard Worker   // The amount of jitter to apply when using the
75*14675a02SAndroid Build Coastguard Worker   // `federated_training_transient_errors_retry_delay_secs` flag. Must be a
76*14675a02SAndroid Build Coastguard Worker   // value between 0 and 1. E.g. a value of 0.2 means that retry delays will
77*14675a02SAndroid Build Coastguard Worker   // fall within [0.8 * target period, 1.2 * target period).
federated_training_transient_errors_retry_delay_jitter_percent()78*14675a02SAndroid Build Coastguard Worker   virtual float federated_training_transient_errors_retry_delay_jitter_percent()
79*14675a02SAndroid Build Coastguard Worker       const {
80*14675a02SAndroid Build Coastguard Worker     return 0.2;
81*14675a02SAndroid Build Coastguard Worker   }
82*14675a02SAndroid Build Coastguard Worker 
83*14675a02SAndroid Build Coastguard Worker   // The retry delay to use when encountering a permanent error during a
84*14675a02SAndroid Build Coastguard Worker   // training run (regardless of whether the client already received a
85*14675a02SAndroid Build Coastguard Worker   // RetryWindow from the server).
federated_training_permanent_errors_retry_delay_secs()86*14675a02SAndroid Build Coastguard Worker   virtual int64_t federated_training_permanent_errors_retry_delay_secs() const {
87*14675a02SAndroid Build Coastguard Worker     // 4 hours
88*14675a02SAndroid Build Coastguard Worker     return 4 * 60 * 60;
89*14675a02SAndroid Build Coastguard Worker   }
90*14675a02SAndroid Build Coastguard Worker 
91*14675a02SAndroid Build Coastguard Worker   // The amount of jitter to apply when using the
92*14675a02SAndroid Build Coastguard Worker   // `federated_training_permanent_errors_retry_delay_secs` flag. Must be a
93*14675a02SAndroid Build Coastguard Worker   // value between 0 and 1. E.g. a value of 0.2 means that retry delays will
94*14675a02SAndroid Build Coastguard Worker   // fall within [0.8 * target period, 1.2 * target period).
federated_training_permanent_errors_retry_delay_jitter_percent()95*14675a02SAndroid Build Coastguard Worker   virtual float federated_training_permanent_errors_retry_delay_jitter_percent()
96*14675a02SAndroid Build Coastguard Worker       const {
97*14675a02SAndroid Build Coastguard Worker     return 0.2;
98*14675a02SAndroid Build Coastguard Worker   }
99*14675a02SAndroid Build Coastguard Worker 
100*14675a02SAndroid Build Coastguard Worker   // The list of error codes that should be considered 'permanent'.
federated_training_permanent_error_codes()101*14675a02SAndroid Build Coastguard Worker   virtual std::vector<int32_t> federated_training_permanent_error_codes()
102*14675a02SAndroid Build Coastguard Worker       const {
103*14675a02SAndroid Build Coastguard Worker     return {
104*14675a02SAndroid Build Coastguard Worker         // The server returns NOT_FOUND if the client checks in with an unknown
105*14675a02SAndroid Build Coastguard Worker         // population name. While this can be resolved without any client
106*14675a02SAndroid Build Coastguard Worker         // changes by creating the population server-side, it is nevertheless
107*14675a02SAndroid Build Coastguard Worker         // wise to treat this as a 'permanent' error for which a longer
108*14675a02SAndroid Build Coastguard Worker         // RetryPeriod is used, because such temporary mismatches in
109*14675a02SAndroid Build Coastguard Worker         // client/server configuration are fairly common and otherwise cause
110*14675a02SAndroid Build Coastguard Worker         // clients to check in unnecessarily frequently.
111*14675a02SAndroid Build Coastguard Worker         static_cast<int32_t>(absl::StatusCode::kNotFound),
112*14675a02SAndroid Build Coastguard Worker         // INVALID_ARGUMENT generally indicates a client-side issue (e.g. a bug
113*14675a02SAndroid Build Coastguard Worker         // in the client's protocol implementation), which is unlikely to be
114*14675a02SAndroid Build Coastguard Worker         // resolved by merely retrying the request.
115*14675a02SAndroid Build Coastguard Worker         static_cast<int32_t>(absl::StatusCode::kInvalidArgument),
116*14675a02SAndroid Build Coastguard Worker         // UNIMPLEMENTED similarly could indicate a client-side issue, or a
117*14675a02SAndroid Build Coastguard Worker         // temporary server issue (e.g. a bug/missing feature implementation in
118*14675a02SAndroid Build Coastguard Worker         // the server). Either way, it is also unlikely to be resolved by merely
119*14675a02SAndroid Build Coastguard Worker         // retrying the request soon.
120*14675a02SAndroid Build Coastguard Worker         static_cast<int32_t>(absl::StatusCode::kUnimplemented)};
121*14675a02SAndroid Build Coastguard Worker   }
122*14675a02SAndroid Build Coastguard Worker 
123*14675a02SAndroid Build Coastguard Worker   // Whether use TFLite for training.
use_tflite_training()124*14675a02SAndroid Build Coastguard Worker   virtual bool use_tflite_training() const { return false; }
125*14675a02SAndroid Build Coastguard Worker 
126*14675a02SAndroid Build Coastguard Worker   // Whether to enable support for downloading plan/initial checkpoint resources
127*14675a02SAndroid Build Coastguard Worker   // via HTTP, while still using gRPC for the main protocol.
enable_grpc_with_http_resource_support()128*14675a02SAndroid Build Coastguard Worker   virtual bool enable_grpc_with_http_resource_support() const { return false; }
129*14675a02SAndroid Build Coastguard Worker 
130*14675a02SAndroid Build Coastguard Worker   // Whether to enable support for downloading eligibility eval plan/initial
131*14675a02SAndroid Build Coastguard Worker   // checkpoint resources via HTTP, while still using gRPC for the main
132*14675a02SAndroid Build Coastguard Worker   // protocol.
enable_grpc_with_eligibility_eval_http_resource_support()133*14675a02SAndroid Build Coastguard Worker   virtual bool enable_grpc_with_eligibility_eval_http_resource_support() const {
134*14675a02SAndroid Build Coastguard Worker     return false;
135*14675a02SAndroid Build Coastguard Worker   }
136*14675a02SAndroid Build Coastguard Worker 
137*14675a02SAndroid Build Coastguard Worker   // When true, TFLite interpreter will use dynamic memory allocation, and
138*14675a02SAndroid Build Coastguard Worker   // release the memory for tensors that are no longer needed.
ensure_dynamic_tensors_are_released()139*14675a02SAndroid Build Coastguard Worker   virtual bool ensure_dynamic_tensors_are_released() const { return true; }
140*14675a02SAndroid Build Coastguard Worker 
141*14675a02SAndroid Build Coastguard Worker   // When the value is above zero, any tensor size (bytes) above the threshold
142*14675a02SAndroid Build Coastguard Worker   // will be considered as a large tensor, and dynamic allocation is applied on
143*14675a02SAndroid Build Coastguard Worker   // them.
large_tensor_threshold_for_dynamic_allocation()144*14675a02SAndroid Build Coastguard Worker   virtual int32_t large_tensor_threshold_for_dynamic_allocation() const {
145*14675a02SAndroid Build Coastguard Worker     return 1000;
146*14675a02SAndroid Build Coastguard Worker   }
147*14675a02SAndroid Build Coastguard Worker 
148*14675a02SAndroid Build Coastguard Worker   // When true, the TFLite runtime graph-reordering optimization that clusters
149*14675a02SAndroid Build Coastguard Worker   // delegate nodes together is disabled.
disable_tflite_delegate_clustering()150*14675a02SAndroid Build Coastguard Worker   virtual bool disable_tflite_delegate_clustering() const { return false; }
151*14675a02SAndroid Build Coastguard Worker 
152*14675a02SAndroid Build Coastguard Worker   // When true, http request body won't be compressed.
disable_http_request_body_compression()153*14675a02SAndroid Build Coastguard Worker   virtual bool disable_http_request_body_compression() const { return false; }
154*14675a02SAndroid Build Coastguard Worker 
155*14675a02SAndroid Build Coastguard Worker   // When true, HTTP Federated Compute protocol is used.
use_http_federated_compute_protocol()156*14675a02SAndroid Build Coastguard Worker   virtual bool use_http_federated_compute_protocol() const { return false; }
157*14675a02SAndroid Build Coastguard Worker 
158*14675a02SAndroid Build Coastguard Worker   // When true, the client computes the task identity to pass in
159*14675a02SAndroid Build Coastguard Worker   // SelectorContext.
enable_computation_id()160*14675a02SAndroid Build Coastguard Worker   virtual bool enable_computation_id() const { return false; }
161*14675a02SAndroid Build Coastguard Worker 
162*14675a02SAndroid Build Coastguard Worker   // The waiting period for issuing cancellation requests before checking
163*14675a02SAndroid Build Coastguard Worker   // whether the client should be interrupted.
waiting_period_sec_for_cancellation()164*14675a02SAndroid Build Coastguard Worker   virtual int32_t waiting_period_sec_for_cancellation() const { return 10; }
165*14675a02SAndroid Build Coastguard Worker 
166*14675a02SAndroid Build Coastguard Worker   // If true, the client supports the Federated Select feature. If not
167*14675a02SAndroid Build Coastguard Worker   // then any Federated Select-specific example query will fail with an error
enable_federated_select()168*14675a02SAndroid Build Coastguard Worker   virtual bool enable_federated_select() const { return false; }
169*14675a02SAndroid Build Coastguard Worker 
170*14675a02SAndroid Build Coastguard Worker   // The max size in bytes of resources that the ResourceCache is allowed to
171*14675a02SAndroid Build Coastguard Worker   // store. If greater than 0, the client will attempt to cache resources sent
172*14675a02SAndroid Build Coastguard Worker   // by uri via the hybrid grpc-with-http-resources and the full http stack. If
173*14675a02SAndroid Build Coastguard Worker   // this value is reduced from some previous greater value, the cache dir will
174*14675a02SAndroid Build Coastguard Worker   // be reduced appropriately the next time it is initialized at the start of
175*14675a02SAndroid Build Coastguard Worker   // the next run.
max_resource_cache_size_bytes()176*14675a02SAndroid Build Coastguard Worker   virtual int64_t max_resource_cache_size_bytes() const { return 0; }
177*14675a02SAndroid Build Coastguard Worker 
178*14675a02SAndroid Build Coastguard Worker   // If true, an error during the initialization of the resource cache will
179*14675a02SAndroid Build Coastguard Worker   // publish a fatal initialization error instead of a nonfatal initialization
180*14675a02SAndroid Build Coastguard Worker   // error and halt execution.
resource_cache_initialization_error_is_fatal()181*14675a02SAndroid Build Coastguard Worker   virtual bool resource_cache_initialization_error_is_fatal() const {
182*14675a02SAndroid Build Coastguard Worker     return false;
183*14675a02SAndroid Build Coastguard Worker   }
184*14675a02SAndroid Build Coastguard Worker 
185*14675a02SAndroid Build Coastguard Worker   // The number of threads that TFLite interpreter will use.
num_threads_for_tflite()186*14675a02SAndroid Build Coastguard Worker   virtual int32_t num_threads_for_tflite() const { return 1; }
187*14675a02SAndroid Build Coastguard Worker 
188*14675a02SAndroid Build Coastguard Worker   // If true, Opstats initialization errors will be logged via
189*14675a02SAndroid Build Coastguard Worker   // PhaseLogger.LogNonfatalInitializationError(). Execution will still be
190*14675a02SAndroid Build Coastguard Worker   // allowed to continue with a no-op implementation like before.
log_opstats_initialization_errors()191*14675a02SAndroid Build Coastguard Worker   virtual bool log_opstats_initialization_errors() const { return false; }
192*14675a02SAndroid Build Coastguard Worker 
193*14675a02SAndroid Build Coastguard Worker   // If true, enables the last_successful_contribution option in the opstats
194*14675a02SAndroid Build Coastguard Worker   // selection criteria which returns an opstats entry for the last successful
195*14675a02SAndroid Build Coastguard Worker   // contribution for the currently executing task.
opstats_last_successful_contribution_criteria()196*14675a02SAndroid Build Coastguard Worker   virtual bool opstats_last_successful_contribution_criteria() const {
197*14675a02SAndroid Build Coastguard Worker     return false;
198*14675a02SAndroid Build Coastguard Worker   }
199*14675a02SAndroid Build Coastguard Worker 
200*14675a02SAndroid Build Coastguard Worker   // If true, enables support for the `TensorflowSpec.constant_inputs` field. If
201*14675a02SAndroid Build Coastguard Worker   // false, then the field will be ignored.
support_constant_tf_inputs()202*14675a02SAndroid Build Coastguard Worker   virtual bool support_constant_tf_inputs() const { return false; }
203*14675a02SAndroid Build Coastguard Worker 
204*14675a02SAndroid Build Coastguard Worker   // If true, enables an Example Query plan engine to be invoked for
205*14675a02SAndroid Build Coastguard Worker   // non-TensorFlow tasks.
enable_example_query_plan_engine()206*14675a02SAndroid Build Coastguard Worker   virtual bool enable_example_query_plan_engine() const { return false; }
207*14675a02SAndroid Build Coastguard Worker 
208*14675a02SAndroid Build Coastguard Worker   // If true, the HTTP federated protocol supports multiple task assignments.
http_protocol_supports_multiple_task_assignments()209*14675a02SAndroid Build Coastguard Worker   virtual bool http_protocol_supports_multiple_task_assignments() const {
210*14675a02SAndroid Build Coastguard Worker     return false;
211*14675a02SAndroid Build Coastguard Worker   }
212*14675a02SAndroid Build Coastguard Worker };
213*14675a02SAndroid Build Coastguard Worker }  // namespace client
214*14675a02SAndroid Build Coastguard Worker }  // namespace fcp
215*14675a02SAndroid Build Coastguard Worker 
216*14675a02SAndroid Build Coastguard Worker #endif  // FCP_CLIENT_FLAGS_H_
217