xref: /aosp_15_r20/external/federated-compute/fcp/client/http/http_client.h (revision 14675a029014e728ec732f129a32e299b2da0601)
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef FCP_CLIENT_HTTP_HTTP_CLIENT_H_
17 #define FCP_CLIENT_HTTP_HTTP_CLIENT_H_
18 
19 #include <cstdint>
20 #include <memory>
21 #include <optional>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 
26 #include "absl/base/attributes.h"
27 #include "absl/container/flat_hash_map.h"
28 #include "absl/status/status.h"
29 #include "absl/status/statusor.h"
30 #include "absl/strings/string_view.h"
31 
32 namespace fcp {
33 namespace client {
34 namespace http {
35 
36 using Header = std::pair<std::string, std::string>;
37 // This is a vector of pairs and not a map since multiple request headers with
38 // the same name are allowed (see RFC2616 section 4.2).
39 using HeaderList = std::vector<Header>;
40 
41 class HttpRequest;          // forward declaration
42 class HttpRequestCallback;  // forward declaration
43 class HttpRequestHandle;    // forward declaration
44 class HttpResponse;         // forward declaration
45 
46 // An interface that allows the callers to make HTTP requests and receive their
47 // responses.
48 //
49 // Platforms will be required to pass an instance of this class to
50 // `RunFederatedComputation(...)`, and such instances must remain alive for at
51 // least the duration of those calls.
52 //
53 // Instances of this class must support being called from any thread. In such
54 // cases implementations should ideally handle multiple requests in parallel, at
55 // least up to an implementation-defined max number of parallel requests (i.e.
56 // ideally two `PerformRequests` calls on different threads can be handled in
57 // parallel, rather than having the 2nd call block until the 1st call is
58 // completed).
59 //
60 // Besides the requirements documented further below, the following high-level
61 // behavior is required of any `HttpClient` implementation:
62 // - Underlying protocols:
63 //   * Implementations must support at least HTTP/1.1 over TLS 1.2.
64 //   * Implementations are also allowed to serve requests using HTTP/2, QUIC or
65 //     other newer protocols.
66 //   * Implementations must support both IPv4 and IPv6 (but they are allowed to
67 //     fall back to IPv4).
68 // - Certificate validation:
69 //   * Implementations are responsible for TLS certificate validation and for
70 //     maintaining an up-to-date set of root certificates as well as an
71 //     up-to-date HTTP/TLS implementation.
72 //   * Implementations should not include user-added CAs, and should consider
73 //     restricting the set of CAs further to only those needed for connecting to
74 //     the expected endpoints.
75 // - Cookies:
76 //   * Implementations must not supply any cookies in requests (beyond those
77 //     that may be specified in the `HttpRequest::headers()` method).
78 //   * Implementations must not store any cookies returned by the server.
79 //   * Instead, they must return any server-specified "Set-Cookie" response
80 //     header via `HttpResponse::headers()`.
81 // - Redirects:
82 //   *  Implementations must follow HTTP redirects responses, up to an
83 //      implementation-defined maximum.
84 //   *  In such cases the response headers & body returned via the interfaces
85 //      below should be those of the final response.
86 //   *  See `HttpRequestCallback` docs below for more details.
87 // - Caching:
88 //   *  Implementations should not implement a cache as it is expected that
89 //      naive HTTP-level caching will not be effective (and since a cache may
90 //      ultimately be implemented over this interface, in the Federated Compute
91 //      library itself).
92 //   *  If implementations do implement one, however, they are expected to abide
93 //      by the standard HTTP caching rules (see the `HttpRequest::Method` docs
94 //      for more details).
95 // - Response body decompression & decoding:
96 //   *  If no "Accept-Encoding" request header is explicitly specified in
97 //      `HttpRequest::headers()`, then implementations must advertise an
98 //      "Accept-Encoding" request header themselves whose value includes at
99 //      least "gzip" (additional encodings are allowed to be specified in
100 //      addition to "gzip"), and must transparently decompress any compressed
101 //      server responses before returning the data via these interfaces.
102 //      Implementations are also allowed to advertise/support additional
103 //      encoding methods.
104 //   *  In such cases where no "Accept-Encoding" header is specified,
105 //      implementations must remove the "Content-Encoding" and
106 //      "Content-Length" headers from headers returned via
107 //      `HttpResponse::headers()` (since those wouldn't reflect the payload
108 //      delivered via this interface).
109 //   *  However, if an "Accept-Encoding" request header *is* explicitly
110 //      specified, then implementations must use that header verbatim and they
111 //      must not decompress the response (even if they natively support the
112 //      compression method), and they must leave the "Content-Encoding" and
113 //      "Content-Length" headers intact.
114 //   *  This ensures that the caller of this interface can take full control of
115 //      the decompression and/or choose to store decompressed payloads on disk
116 //      if it so chooses.
117 //   *  Implementations must transparently decode server responses served with
118 //      "Transfer-Encoding: chunked". In such cases they must remove the
119 //      "Transfer-Encoding" response header.
120 // - Request body compression & encoding:
121 //   *  If implementations receive a "Content-Encoding" request header, this
122 //      means that the request body stream they receive has already been
123 //      compressed. The implementation must leave the header and request body
124 //      intact in such cases (i.e. not re-compress it).
125 //   *  If implementations receive a "Content-Length" request header, they must
126 //      use it verbatim and they should then assume that the request body will
127 //      be of exactly that size.
128 //   *  If they do not receive such a header then they must use the
129 //      "Transfer-encoding: chunked" mechanism to transmit the request body
130 //      (i.e. they shouldn't specify a "Content-Length" header and they should
131 //      transmit the body in chunks), or use an equivalent method of streaming
132 //      the data (such as HTTP/2's data streaming).
133 class HttpClient {
134  public:
135   virtual ~HttpClient() = default;
136 
137   // Enqueues an HTTP request, without starting it yet. To start the request the
138   // `HttpRequestHandle` must be passed to `PerformRequests`. Each
139   // `HttpRequestHandle` must be passed to at most one `PerformRequests` call.
140   //
141   // The `HttpClient` implementation assumes ownership of the `HttpRequest`
142   // object, and the implementation must delete the object when the
143   // `HttpRequestHandle` is deleted.
144   ABSL_MUST_USE_RESULT
145   virtual std::unique_ptr<HttpRequestHandle> EnqueueRequest(
146       std::unique_ptr<HttpRequest> request) = 0;
147 
148   // Performs the given requests. Results will be returned to each
149   // corresponding `HttpRequestCallback` while this method is blocked. This
150   // method must block until all requests have finished or have been cancelled,
151   // and until all corresponding request callbacks have returned.
152   //
153   // By decoupling the enqueueing and starting of (groups of) requests,
154   // implementations may be able to handle concurrent requests more optimally
155   // (e.g. by issuing them over a shared HTTP connection). Having separate
156   // per-request `HttpRequestHandle` objects also makes it easier to support
157   // canceling specific requests, releasing resources for specific requests,
158   // accessing stats for specific requests, etc.
159   //
160   // The `HttpRequestHandle` and `HttpRequestCallback` instances must outlive
161   // the call to `PerformRequests`, but may be deleted any time after this call
162   // has returned.
163   //
164   // Returns an `INVALID_ARGUMENT` error if a `HttpRequestHandle` was previously
165   // already passed to another `PerformRequests` call, or if an
166   // `HttpRequestHandle`'s `Cancel` method was already called before being
167   // passed to this call.
168   virtual absl::Status PerformRequests(
169       std::vector<std::pair<HttpRequestHandle*, HttpRequestCallback*>>
170           requests) = 0;
171 };
172 
173 // An HTTP request for a single resource. Implemented by the caller of
174 // `HttpClient`.
175 //
176 // Once instances are passed to `EnqueueRequest`, their lifetime is managed by
177 // the `HttpClient` implementation. Implementations must tie the `HttpRequest`
178 // instance lifetime to the lifetime of the `HttpRequestHandle` they return
179 // (i.e. they should delete the `HttpRequest` from the `HttpRequestHandle`
180 // destructor).
181 //
182 // Methods of this class may get called from any thread (and subsequent calls
183 // are not required to all happen on the same thread).
184 class HttpRequest {
185  public:
186   // Note: the request methods imply a set of standard request properties such
187   // as cacheability, safety, and idempotency. See
188   // https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods.
189   //
190   // The caller of `HttpClient` may implement its own caching layer in the
191   // future, so implementations are not expected to cache cacheable requests
192   // (although they are technically allowed to).
193   //
194   // Implementations should not automatically retry requests, even if the
195   // request method implies it is safe or idempotent. The caller of `HttpClient`
196   // will own the responsibility for retrying requests.
197   enum class Method { kHead, kGet, kPost, kPut, kPatch, kDelete };
198 
199   // Must not be called until any corresponding `HttpRequestHandle` has been
200   // deleted.
201   virtual ~HttpRequest() = default;
202 
203   // The URI to request. Will always have an "https://" scheme (but this may be
204   // extended in the future).
205   virtual absl::string_view uri() const = 0;
206 
207   // The HTTP method to use for this request.
208   virtual Method method() const = 0;
209 
210   // Extra request headers to include with this request, in addition to any
211   // headers specified by the `HttpClient` implementation.
212   //
213   // See the `HttpClient` comment for the expected behavior w.r.t. a few
214   // specific headers.
215   virtual const HeaderList& extra_headers() const = 0;
216 
217   // Returns true if the request has a request body (which can be read using
218   // `ReadBody`). If the request body payload size is known ahead of time, then
219   // the "Content-Length" header will be set in `extra_headers()`. If it isn't
220   // known yet then the `HttpClient` implementation should use the
221   // "Transfer-Encoding: chunked" encoding to transmit the request body to the
222   // server in chunks (or use an equivalent method of streaming the data, e.g.
223   // if the connection uses HTTP/2). See the `HttpClient` comment for more
224   // details.
225   virtual bool HasBody() const = 0;
226 
227   // HttpRequests that up to `requested` bytes of the request body be read into
228   // `buffer`, and that the actual amount of bytes read is returned. The caller
229   // retains ownership of the buffer.
230   //
231   // Callees must return at least 1 byte, but may otherwise return less than the
232   // requested amount of data, if more data isn't available yet. Callees should
233   // return `OUT_OF_RANGE` when the end of data has been reached, in which case
234   // `buffer` should not be modified.
235   //
236   // Callees should return data ASAP, as delaying this for too long may cause
237   // the network stream to fall idle/run out of data to transmit.
238   //
239   // May also return other errors, in which case the request will be ended and
240   // `HttpRequestCallback::OnResponseError` will be called with the same error.
241   virtual absl::StatusOr<int64_t> ReadBody(char* buffer, int64_t requested) = 0;
242 };
243 
244 // A handle to a pending `HttpRequest`, allowing a caller of `HttpClient` to
245 // access stats for the request or to cancel ongoing requests. Implemented by
246 // the `HttpClient` implementer.
247 //
248 // The lifetimes of instances of this class are owned by the caller of
249 // `HttpClient`.
250 //
251 // Methods of this class may get called from any thread (and subsequent calls
252 // are not required to all happen on the same thread).
253 class HttpRequestHandle {
254  public:
255   // When this is called, `HttpClient` implementations should delete all their
256   // owned resources as well as the associated `HttpRequest`.
257   virtual ~HttpRequestHandle() = default;
258 
259   // The total amount of data sent/received over the network for this request up
260   // to this point. These numbers should reflect as close as possible the amount
261   // of bytes sent "over the wire". This means, for example, that if the data is
262   // compressed or if a `Transfer-Encoding` is used, the numbers should reflect
263   // the compressed and/or encoded size of the data (if the implementation is
264   // able to account for that). Implementations are allowed to account for the
265   // overhead of TLS encoding in these numbers, but are not required to (since
266   // many HTTP libraries also do not provide stats at that level of
267   // granularity).
268   //
269   // If the request was served from a cache then this should reflect only the
270   // actual bytes sent over the network (e.g. 0 if returned from disk directly,
271   // or if a cache validation request was sent only those bytes used by the
272   // validation request/response).
273   //
274   // If the request involved redirects, the numbers returned here should include
275   // the bytes sent/received for those redirects, if the implementation supports
276   // this. Otherwise they are allowed to reflect only the final
277   // request/response's bytes sent/received.
278   //
279   // Implementations should strive to return as up-to-date numbers are possible
280   // from these methods (e.g. ideally the 'sent' number should reflect the
281   // amount of request body data that has been uploaded so far, even if the
282   // upload hasn't completed fully yet; similarly the 'received' number should
283   // reflect the amount of response body data received so far, even if the
284   // response hasn't been fully received yet).
285   //
286   // The numbers returned here are not required to increase monotonically
287   // between each call to the method. E.g. implementations are allowed to return
288   // best-available estimates while the request is still in flight, and then
289   // revise the numbers down to a more accurate number once the request has been
290   // completed.
291   struct SentReceivedBytes {
292     int64_t sent_bytes;
293     int64_t received_bytes;
294   };
295   virtual SentReceivedBytes TotalSentReceivedBytes() const = 0;
296 
297   // Used to indicate that the request should be cancelled and that
298   // implementations may release resources associated with this request (e.g.
299   // the socket used by the request).
300   //
301   // Callers are still only allowed to delete this instance once after any
302   // corresponding `PerformRequests()` call has completed, and not before.
303   //
304   // If a `PerformRequests` call is ongoing for this handle, then the
305   // corresponding `HttpRequestCallback` instance may still receive further
306   // method invocations after this call returns (e.g. because an invocation may
307   // already have been in flight).
308   //
309   // If a `PerformRequests` call is ongoing for this handle, and if the
310   // `HttpRequestCallback::OnResponseStarted` method was not called yet, then
311   // the `HttpRequestCallback::OnResponseError` method must be called with
312   // status `CANCELLED`.
313   //
314   // Otherwise, if a `PerformRequests` call is ongoing for this handle, and if
315   // the `HttpRequestCallback::OnResponseCompleted` method was not called yet,
316   // then the `HttpRequestCallback::OnResponseBodyError` method must be called
317   // with status `CANCELLED`.
318   virtual void Cancel() = 0;
319 };
320 
321 // The callback interface that `HttpClient` implementations must use to deliver
322 // the response to a `HttpRequest`. Implemented by the caller of `HttpClient`.
323 //
324 // The lifetimes of instances of this class are owned by the caller of
325 // `HttpClient`. Instances must remain alive for at least as long as their
326 // corresponding `PerformRequests` call.
327 //
328 // Methods of this class may get called from any thread (incl. concurrently),
329 // but callers of this class must always call the callback methods for a
330 // specific `HttpRequest` in the order specified in each method's documentation.
331 // Implementations of this class therefore likely should use internal
332 // synchronization.
333 //
334 // For example, a call to `OnResponseBody` for a given `HttpRequest` A will
335 // always be preceded by a completed call to `OnResponseStarted` for that same
336 // request A. However, callbacks for different `HttpRequest` objects may happen
337 // concurrently, so for example, `OnResponseStarted` may be called concurrently
338 // for two different requests A and B. This latter scenario means that if the
339 // same `HttpRequestCallback` object is used to handle callbacks for both
340 // requests, then the object has to handle concurrent calls correctly.
341 class HttpRequestCallback {
342  public:
343   virtual ~HttpRequestCallback() = default;
344 
345   // Called when the final HTTP response headers have been received (i.e. after
346   // any redirects have been followed but before the response body may have been
347   // received fully) for the given `HttpRequest`. The response data can be
348   // accessed via the given `HttpResponse`, which will remain alive for the
349   // lifetime of the corresponding `HttpRequestHandle`.
350   //
351   // Note that all the data in the `HttpResponse` object should reflect the
352   // last/final response (i.e. it shouldn't reflect any already-followed
353   // redirects).
354   //
355   // If the response has a body then after this method is called
356   // `OnResponseBody` will be called one or more times to deliver the response
357   // body (or `OnResponseBodyError` if an error occurs).
358   //
359   // Note that responses with an HTTP status code other than 200 ("OK") may
360   // still have response bodies, and implementations must deliver these via the
361   // `OnResponseBody` callback, just as they should for a successful response.
362   //
363   // If this method returns an error then the `HttpClient` implementation should
364   // consider the `HttpRequest` canceled. No further methods must be called on
365   // this `HttpRequestCallback` instance for the given `HttpRequest` after in
366   // this case.
367   virtual absl::Status OnResponseStarted(const HttpRequest& request,
368                                          const HttpResponse& response) = 0;
369 
370   // Called when the request encountered an error or timed out, before receiving
371   // the response headers completely. No further methods must be called on this
372   // `HttpRequestCallback` instance for the given `HttpRequest` after this
373   // method is called.
374   //
375   // If the implementation is able to discern that the error may have been
376   // transient, they should return `UNAVAILABLE`.
377   //
378   // If more than the implementation's defined max number of redirects occurred
379   // (without reaching the final response), then implementations should return
380   // `OUT_OF_RANGE` here.
381   //
382   // If the implementation hit an implementation-specific timeout (even though
383   // implementations are discouraged from imposing such timeouts), then this
384   // should be `DEADLINE_EXCEEDED`.
385   //
386   // If the `HttpRequestHandle::Cancel` method was called before
387   // `OnResponseStarted` was called for the given `HttpRequest`, then this
388   // method will be called with a `CANCELLED` status.
389   //
390   // If the request's `HttpRequest::ReadBody` returned an unexpected error,
391   // then method will be called with that error.
392   virtual void OnResponseError(const HttpRequest& request,
393                                const absl::Status& error) = 0;
394 
395   // Called (possibly multiple times per request) when a block of response data
396   // is available in `data`. This method must only be called after
397   // `OnResponseStarted` was called for the given `HttpRequest`.
398   //
399   // Callees must process the data ASAP, as delaying this for too long may
400   // prevent additional data from arriving on the network stream.
401   //
402   // If this method returns an error then the `HttpClient` implementation should
403   // consider the `HttpRequest` canceled. No further methods must be called on
404   // this `HttpRequestCallback` instance for the given `HttpRequest` after in
405   // this case.
406   virtual absl::Status OnResponseBody(const HttpRequest& request,
407                                       const HttpResponse& response,
408                                       absl::string_view data) = 0;
409 
410   // Called when the request encountered an error or timed out while receiving
411   // the response body (i.e. after `OnResponseStarted` was called). No further
412   // methods must be called on this `HttpRequestCallback` instance for the given
413   // `HttpRequest` after this method is called.
414   //
415   // If the implementation is able to discern that the error may have been
416   // transient, they should return `UNAVAILABLE`.
417   //
418   // If the implementation hit an implementation-specific timeout (even though
419   // implementations are discouraged from imposing such timeouts), then this
420   // should be `DEADLINE_EXCEEDED`.
421   //
422   // If the `HttpRequestHandle::Cancel` method was called before
423   // `OnResponseCompleted` was called for the given `HttpRequest`, then this
424   // method will be called with a `CANCELLED` status.
425   virtual void OnResponseBodyError(const HttpRequest& request,
426                                    const HttpResponse& response,
427                                    const absl::Status& error) = 0;
428 
429   // Called when the request has completed successfully (i.e. the response
430   // headers were delivered, and if there was a response body then it was also
431   // delivered successfully). Must not be called if one of the error callbacks
432   // was already called for the given `HttpRequest`, and no further methods must
433   // be called on this `HttpRequestCallback` instance for the given
434   // `HttpRequest` after this method is called.
435   virtual void OnResponseCompleted(const HttpRequest& request,
436                                    const HttpResponse& response) = 0;
437 };
438 
439 // A response to a given `HttpRequest`. Implemented by the `HttpClient`
440 // implementer.
441 //
442 // The lifetimes of instances of this class are managed by the `HttpClient`
443 // implementer. Instances of this class must remain alive for at least long as
444 // the corresponding `HttpRequestHandle` is alive.
445 //
446 // Note that all the data in this object should be for the last/final response.
447 // I.e. any responses corresponding to redirects should not be reflected here.
448 class HttpResponse {
449  public:
450   virtual ~HttpResponse() = default;
451 
452   // The response code returned by the server (e.g. 200).
453   virtual int code() const = 0;
454 
455   // The response headers. Implementations are allowed to either coalesce
456   // repeated headers using commas (as per RFC2616 section 4.2), or to return
457   // them as separate entries.
458   //
459   // See `HttpClient` comment for the expected behavior w.r.t. a few specific
460   // headers.
461   virtual const HeaderList& headers() const = 0;
462 };
463 
464 }  // namespace http
465 }  // namespace client
466 }  // namespace fcp
467 
468 #endif  // FCP_CLIENT_HTTP_HTTP_CLIENT_H_
469