xref: /aosp_15_r20/external/federated-compute/fcp/client/fl_runner.h (revision 14675a029014e728ec732f129a32e299b2da0601)
1 /*
2  * Copyright 2020 Google LLC
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef FCP_CLIENT_FL_RUNNER_H_
17 #define FCP_CLIENT_FL_RUNNER_H_
18 
19 #include <string>
20 
21 #include "absl/status/statusor.h"
22 #include "fcp/base/monitoring.h"
23 #include "fcp/client/engine/engine.pb.h"
24 #include "fcp/client/event_publisher.h"
25 #include "fcp/client/federated_protocol.h"
26 #include "fcp/client/federated_select.h"
27 #include "fcp/client/files.h"
28 #include "fcp/client/fl_runner.pb.h"
29 #include "fcp/client/flags.h"
30 #include "fcp/client/http/http_client.h"
31 #include "fcp/client/interruptible_runner.h"
32 #include "fcp/client/log_manager.h"
33 #include "fcp/client/opstats/opstats_logger.h"
34 #include "fcp/client/phase_logger.h"
35 #include "fcp/client/simple_task_environment.h"
36 #include "fcp/protos/plan.pb.h"
37 #include "tensorflow/core/framework/tensor.h"
38 
39 namespace fcp {
40 namespace client {
41 
42 inline constexpr absl::string_view kTensorflowCheckpointAggregand =
43     "tensorflow_checkpoint";
44 
45 // Prod entry point for running a federated computation. Concurrent calls, with
46 // the same SimpleTaskEnvironment::GetBaseDir(), are not supported.
47 //
48 // This is a long running blocking call that - for a successful run -
49 // encompasses connecting to a server, downloading and running a computation,
50 // uploading results, and storing logs about the run in an operational stats DB.
51 // During that call, the function will call back (from both the calling and from
52 // newly created threads) into the dependencies injected here for to query for
53 // examples, check whether it should abort, publish events / logs for telemetry,
54 // create files, and query feature flags.
55 //
56 // Arguments:
57 // - federated_service_uri, api_key: used to connect to the Federated server.
58 // - test_cert_path: a file path to a CA certificate to be used in tests. Should
59 //     be empty for production use; when used in tests, the URI must use the
60 //     https+test:// scheme.
61 // - session_name: A client-side identifier of the type of work this computation
62 //     performs; used to annotate log entries in the operational stats DB.
63 // - population_name: a string provided to the Federated server to identify
64 //     what population this device is checking in for.
65 // - client_version: A platform-specific identifier that is used by the server
66 //     to serve versioned computations - that is, versions of a computation that
67 //     have been tested and found to be compatible with this device's version -
68 //     or reject the device.
69 // - attestation_measurement: An opaque string from a "measurement" that can be
70 // used
71 //     by the server to attest the device integrity.
72 //
73 // Returns:
74 // On success, the returned FLRunnerResult contains information on when the
75 // function should be called again for this session.
76 absl::StatusOr<FLRunnerResult> RunFederatedComputation(
77     SimpleTaskEnvironment* env_deps, EventPublisher* event_publisher,
78     Files* files, LogManager* log_manager, const Flags* flags,
79     const std::string& federated_service_uri, const std::string& api_key,
80     const std::string& test_cert_path, const std::string& session_name,
81     const std::string& population_name, const std::string& retry_token,
82     const std::string& client_version,
83     const std::string& attestation_measurement);
84 
85 // This is exposed for use in tests that require a mocked FederatedProtocol and
86 // OpStatsLogger. Otherwise, this is used internally by the other
87 // RunFederatedComputation method once the FederatedProtocol and OpStatsLogger
88 // objects have been created.
89 absl::StatusOr<FLRunnerResult> RunFederatedComputation(
90     SimpleTaskEnvironment* env_deps, PhaseLogger& phase_logger,
91     EventPublisher* event_publisher, Files* files, LogManager* log_manager,
92     ::fcp::client::opstats::OpStatsLogger* opstats_logger, const Flags* flags,
93     FederatedProtocol* federated_protocol,
94     FederatedSelectManager* fedselect_manager,
95     const fcp::client::InterruptibleRunner::TimingConfig& timing_config,
96     const absl::Time reference_time, const std::string& session_name,
97     const std::string& population_name);
98 
99 // This is exposed for use in compatibility tests only. Prod code should call
100 // RunFederatedComputation.
101 FLRunnerTensorflowSpecResult RunPlanWithTensorflowSpecForTesting(
102     SimpleTaskEnvironment* env_deps, EventPublisher* event_publisher,
103     Files* files, LogManager* log_manager, const Flags* flags,
104     const google::internal::federated::plan::ClientOnlyPlan& client_plan,
105     const std::string& checkpoint_input_filename,
106     const fcp::client::InterruptibleRunner::TimingConfig& timing_config,
107     const absl::Time run_plan_start_time, const absl::Time reference_time);
108 
109 }  // namespace client
110 }  // namespace fcp
111 
112 #endif  // FCP_CLIENT_FL_RUNNER_H_
113