1 /* 2 * Copyright 2021 Google LLC 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef FCP_CLIENT_HTTP_HTTP_CLIENT_H_ 17 #define FCP_CLIENT_HTTP_HTTP_CLIENT_H_ 18 19 #include <cstdint> 20 #include <memory> 21 #include <optional> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 26 #include "absl/base/attributes.h" 27 #include "absl/container/flat_hash_map.h" 28 #include "absl/status/status.h" 29 #include "absl/status/statusor.h" 30 #include "absl/strings/string_view.h" 31 32 namespace fcp { 33 namespace client { 34 namespace http { 35 36 using Header = std::pair<std::string, std::string>; 37 // This is a vector of pairs and not a map since multiple request headers with 38 // the same name are allowed (see RFC2616 section 4.2). 39 using HeaderList = std::vector<Header>; 40 41 class HttpRequest; // forward declaration 42 class HttpRequestCallback; // forward declaration 43 class HttpRequestHandle; // forward declaration 44 class HttpResponse; // forward declaration 45 46 // An interface that allows the callers to make HTTP requests and receive their 47 // responses. 48 // 49 // Platforms will be required to pass an instance of this class to 50 // `RunFederatedComputation(...)`, and such instances must remain alive for at 51 // least the duration of those calls. 52 // 53 // Instances of this class must support being called from any thread. In such 54 // cases implementations should ideally handle multiple requests in parallel, at 55 // least up to an implementation-defined max number of parallel requests (i.e. 56 // ideally two `PerformRequests` calls on different threads can be handled in 57 // parallel, rather than having the 2nd call block until the 1st call is 58 // completed). 59 // 60 // Besides the requirements documented further below, the following high-level 61 // behavior is required of any `HttpClient` implementation: 62 // - Underlying protocols: 63 // * Implementations must support at least HTTP/1.1 over TLS 1.2. 64 // * Implementations are also allowed to serve requests using HTTP/2, QUIC or 65 // other newer protocols. 66 // * Implementations must support both IPv4 and IPv6 (but they are allowed to 67 // fall back to IPv4). 68 // - Certificate validation: 69 // * Implementations are responsible for TLS certificate validation and for 70 // maintaining an up-to-date set of root certificates as well as an 71 // up-to-date HTTP/TLS implementation. 72 // * Implementations should not include user-added CAs, and should consider 73 // restricting the set of CAs further to only those needed for connecting to 74 // the expected endpoints. 75 // - Cookies: 76 // * Implementations must not supply any cookies in requests (beyond those 77 // that may be specified in the `HttpRequest::headers()` method). 78 // * Implementations must not store any cookies returned by the server. 79 // * Instead, they must return any server-specified "Set-Cookie" response 80 // header via `HttpResponse::headers()`. 81 // - Redirects: 82 // * Implementations must follow HTTP redirects responses, up to an 83 // implementation-defined maximum. 84 // * In such cases the response headers & body returned via the interfaces 85 // below should be those of the final response. 86 // * See `HttpRequestCallback` docs below for more details. 87 // - Caching: 88 // * Implementations should not implement a cache as it is expected that 89 // naive HTTP-level caching will not be effective (and since a cache may 90 // ultimately be implemented over this interface, in the Federated Compute 91 // library itself). 92 // * If implementations do implement one, however, they are expected to abide 93 // by the standard HTTP caching rules (see the `HttpRequest::Method` docs 94 // for more details). 95 // - Response body decompression & decoding: 96 // * If no "Accept-Encoding" request header is explicitly specified in 97 // `HttpRequest::headers()`, then implementations must advertise an 98 // "Accept-Encoding" request header themselves whose value includes at 99 // least "gzip" (additional encodings are allowed to be specified in 100 // addition to "gzip"), and must transparently decompress any compressed 101 // server responses before returning the data via these interfaces. 102 // Implementations are also allowed to advertise/support additional 103 // encoding methods. 104 // * In such cases where no "Accept-Encoding" header is specified, 105 // implementations must remove the "Content-Encoding" and 106 // "Content-Length" headers from headers returned via 107 // `HttpResponse::headers()` (since those wouldn't reflect the payload 108 // delivered via this interface). 109 // * However, if an "Accept-Encoding" request header *is* explicitly 110 // specified, then implementations must use that header verbatim and they 111 // must not decompress the response (even if they natively support the 112 // compression method), and they must leave the "Content-Encoding" and 113 // "Content-Length" headers intact. 114 // * This ensures that the caller of this interface can take full control of 115 // the decompression and/or choose to store decompressed payloads on disk 116 // if it so chooses. 117 // * Implementations must transparently decode server responses served with 118 // "Transfer-Encoding: chunked". In such cases they must remove the 119 // "Transfer-Encoding" response header. 120 // - Request body compression & encoding: 121 // * If implementations receive a "Content-Encoding" request header, this 122 // means that the request body stream they receive has already been 123 // compressed. The implementation must leave the header and request body 124 // intact in such cases (i.e. not re-compress it). 125 // * If implementations receive a "Content-Length" request header, they must 126 // use it verbatim and they should then assume that the request body will 127 // be of exactly that size. 128 // * If they do not receive such a header then they must use the 129 // "Transfer-encoding: chunked" mechanism to transmit the request body 130 // (i.e. they shouldn't specify a "Content-Length" header and they should 131 // transmit the body in chunks), or use an equivalent method of streaming 132 // the data (such as HTTP/2's data streaming). 133 class HttpClient { 134 public: 135 virtual ~HttpClient() = default; 136 137 // Enqueues an HTTP request, without starting it yet. To start the request the 138 // `HttpRequestHandle` must be passed to `PerformRequests`. Each 139 // `HttpRequestHandle` must be passed to at most one `PerformRequests` call. 140 // 141 // The `HttpClient` implementation assumes ownership of the `HttpRequest` 142 // object, and the implementation must delete the object when the 143 // `HttpRequestHandle` is deleted. 144 ABSL_MUST_USE_RESULT 145 virtual std::unique_ptr<HttpRequestHandle> EnqueueRequest( 146 std::unique_ptr<HttpRequest> request) = 0; 147 148 // Performs the given requests. Results will be returned to each 149 // corresponding `HttpRequestCallback` while this method is blocked. This 150 // method must block until all requests have finished or have been cancelled, 151 // and until all corresponding request callbacks have returned. 152 // 153 // By decoupling the enqueueing and starting of (groups of) requests, 154 // implementations may be able to handle concurrent requests more optimally 155 // (e.g. by issuing them over a shared HTTP connection). Having separate 156 // per-request `HttpRequestHandle` objects also makes it easier to support 157 // canceling specific requests, releasing resources for specific requests, 158 // accessing stats for specific requests, etc. 159 // 160 // The `HttpRequestHandle` and `HttpRequestCallback` instances must outlive 161 // the call to `PerformRequests`, but may be deleted any time after this call 162 // has returned. 163 // 164 // Returns an `INVALID_ARGUMENT` error if a `HttpRequestHandle` was previously 165 // already passed to another `PerformRequests` call, or if an 166 // `HttpRequestHandle`'s `Cancel` method was already called before being 167 // passed to this call. 168 virtual absl::Status PerformRequests( 169 std::vector<std::pair<HttpRequestHandle*, HttpRequestCallback*>> 170 requests) = 0; 171 }; 172 173 // An HTTP request for a single resource. Implemented by the caller of 174 // `HttpClient`. 175 // 176 // Once instances are passed to `EnqueueRequest`, their lifetime is managed by 177 // the `HttpClient` implementation. Implementations must tie the `HttpRequest` 178 // instance lifetime to the lifetime of the `HttpRequestHandle` they return 179 // (i.e. they should delete the `HttpRequest` from the `HttpRequestHandle` 180 // destructor). 181 // 182 // Methods of this class may get called from any thread (and subsequent calls 183 // are not required to all happen on the same thread). 184 class HttpRequest { 185 public: 186 // Note: the request methods imply a set of standard request properties such 187 // as cacheability, safety, and idempotency. See 188 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods. 189 // 190 // The caller of `HttpClient` may implement its own caching layer in the 191 // future, so implementations are not expected to cache cacheable requests 192 // (although they are technically allowed to). 193 // 194 // Implementations should not automatically retry requests, even if the 195 // request method implies it is safe or idempotent. The caller of `HttpClient` 196 // will own the responsibility for retrying requests. 197 enum class Method { kHead, kGet, kPost, kPut, kPatch, kDelete }; 198 199 // Must not be called until any corresponding `HttpRequestHandle` has been 200 // deleted. 201 virtual ~HttpRequest() = default; 202 203 // The URI to request. Will always have an "https://" scheme (but this may be 204 // extended in the future). 205 virtual absl::string_view uri() const = 0; 206 207 // The HTTP method to use for this request. 208 virtual Method method() const = 0; 209 210 // Extra request headers to include with this request, in addition to any 211 // headers specified by the `HttpClient` implementation. 212 // 213 // See the `HttpClient` comment for the expected behavior w.r.t. a few 214 // specific headers. 215 virtual const HeaderList& extra_headers() const = 0; 216 217 // Returns true if the request has a request body (which can be read using 218 // `ReadBody`). If the request body payload size is known ahead of time, then 219 // the "Content-Length" header will be set in `extra_headers()`. If it isn't 220 // known yet then the `HttpClient` implementation should use the 221 // "Transfer-Encoding: chunked" encoding to transmit the request body to the 222 // server in chunks (or use an equivalent method of streaming the data, e.g. 223 // if the connection uses HTTP/2). See the `HttpClient` comment for more 224 // details. 225 virtual bool HasBody() const = 0; 226 227 // HttpRequests that up to `requested` bytes of the request body be read into 228 // `buffer`, and that the actual amount of bytes read is returned. The caller 229 // retains ownership of the buffer. 230 // 231 // Callees must return at least 1 byte, but may otherwise return less than the 232 // requested amount of data, if more data isn't available yet. Callees should 233 // return `OUT_OF_RANGE` when the end of data has been reached, in which case 234 // `buffer` should not be modified. 235 // 236 // Callees should return data ASAP, as delaying this for too long may cause 237 // the network stream to fall idle/run out of data to transmit. 238 // 239 // May also return other errors, in which case the request will be ended and 240 // `HttpRequestCallback::OnResponseError` will be called with the same error. 241 virtual absl::StatusOr<int64_t> ReadBody(char* buffer, int64_t requested) = 0; 242 }; 243 244 // A handle to a pending `HttpRequest`, allowing a caller of `HttpClient` to 245 // access stats for the request or to cancel ongoing requests. Implemented by 246 // the `HttpClient` implementer. 247 // 248 // The lifetimes of instances of this class are owned by the caller of 249 // `HttpClient`. 250 // 251 // Methods of this class may get called from any thread (and subsequent calls 252 // are not required to all happen on the same thread). 253 class HttpRequestHandle { 254 public: 255 // When this is called, `HttpClient` implementations should delete all their 256 // owned resources as well as the associated `HttpRequest`. 257 virtual ~HttpRequestHandle() = default; 258 259 // The total amount of data sent/received over the network for this request up 260 // to this point. These numbers should reflect as close as possible the amount 261 // of bytes sent "over the wire". This means, for example, that if the data is 262 // compressed or if a `Transfer-Encoding` is used, the numbers should reflect 263 // the compressed and/or encoded size of the data (if the implementation is 264 // able to account for that). Implementations are allowed to account for the 265 // overhead of TLS encoding in these numbers, but are not required to (since 266 // many HTTP libraries also do not provide stats at that level of 267 // granularity). 268 // 269 // If the request was served from a cache then this should reflect only the 270 // actual bytes sent over the network (e.g. 0 if returned from disk directly, 271 // or if a cache validation request was sent only those bytes used by the 272 // validation request/response). 273 // 274 // If the request involved redirects, the numbers returned here should include 275 // the bytes sent/received for those redirects, if the implementation supports 276 // this. Otherwise they are allowed to reflect only the final 277 // request/response's bytes sent/received. 278 // 279 // Implementations should strive to return as up-to-date numbers are possible 280 // from these methods (e.g. ideally the 'sent' number should reflect the 281 // amount of request body data that has been uploaded so far, even if the 282 // upload hasn't completed fully yet; similarly the 'received' number should 283 // reflect the amount of response body data received so far, even if the 284 // response hasn't been fully received yet). 285 // 286 // The numbers returned here are not required to increase monotonically 287 // between each call to the method. E.g. implementations are allowed to return 288 // best-available estimates while the request is still in flight, and then 289 // revise the numbers down to a more accurate number once the request has been 290 // completed. 291 struct SentReceivedBytes { 292 int64_t sent_bytes; 293 int64_t received_bytes; 294 }; 295 virtual SentReceivedBytes TotalSentReceivedBytes() const = 0; 296 297 // Used to indicate that the request should be cancelled and that 298 // implementations may release resources associated with this request (e.g. 299 // the socket used by the request). 300 // 301 // Callers are still only allowed to delete this instance once after any 302 // corresponding `PerformRequests()` call has completed, and not before. 303 // 304 // If a `PerformRequests` call is ongoing for this handle, then the 305 // corresponding `HttpRequestCallback` instance may still receive further 306 // method invocations after this call returns (e.g. because an invocation may 307 // already have been in flight). 308 // 309 // If a `PerformRequests` call is ongoing for this handle, and if the 310 // `HttpRequestCallback::OnResponseStarted` method was not called yet, then 311 // the `HttpRequestCallback::OnResponseError` method must be called with 312 // status `CANCELLED`. 313 // 314 // Otherwise, if a `PerformRequests` call is ongoing for this handle, and if 315 // the `HttpRequestCallback::OnResponseCompleted` method was not called yet, 316 // then the `HttpRequestCallback::OnResponseBodyError` method must be called 317 // with status `CANCELLED`. 318 virtual void Cancel() = 0; 319 }; 320 321 // The callback interface that `HttpClient` implementations must use to deliver 322 // the response to a `HttpRequest`. Implemented by the caller of `HttpClient`. 323 // 324 // The lifetimes of instances of this class are owned by the caller of 325 // `HttpClient`. Instances must remain alive for at least as long as their 326 // corresponding `PerformRequests` call. 327 // 328 // Methods of this class may get called from any thread (incl. concurrently), 329 // but callers of this class must always call the callback methods for a 330 // specific `HttpRequest` in the order specified in each method's documentation. 331 // Implementations of this class therefore likely should use internal 332 // synchronization. 333 // 334 // For example, a call to `OnResponseBody` for a given `HttpRequest` A will 335 // always be preceded by a completed call to `OnResponseStarted` for that same 336 // request A. However, callbacks for different `HttpRequest` objects may happen 337 // concurrently, so for example, `OnResponseStarted` may be called concurrently 338 // for two different requests A and B. This latter scenario means that if the 339 // same `HttpRequestCallback` object is used to handle callbacks for both 340 // requests, then the object has to handle concurrent calls correctly. 341 class HttpRequestCallback { 342 public: 343 virtual ~HttpRequestCallback() = default; 344 345 // Called when the final HTTP response headers have been received (i.e. after 346 // any redirects have been followed but before the response body may have been 347 // received fully) for the given `HttpRequest`. The response data can be 348 // accessed via the given `HttpResponse`, which will remain alive for the 349 // lifetime of the corresponding `HttpRequestHandle`. 350 // 351 // Note that all the data in the `HttpResponse` object should reflect the 352 // last/final response (i.e. it shouldn't reflect any already-followed 353 // redirects). 354 // 355 // If the response has a body then after this method is called 356 // `OnResponseBody` will be called one or more times to deliver the response 357 // body (or `OnResponseBodyError` if an error occurs). 358 // 359 // Note that responses with an HTTP status code other than 200 ("OK") may 360 // still have response bodies, and implementations must deliver these via the 361 // `OnResponseBody` callback, just as they should for a successful response. 362 // 363 // If this method returns an error then the `HttpClient` implementation should 364 // consider the `HttpRequest` canceled. No further methods must be called on 365 // this `HttpRequestCallback` instance for the given `HttpRequest` after in 366 // this case. 367 virtual absl::Status OnResponseStarted(const HttpRequest& request, 368 const HttpResponse& response) = 0; 369 370 // Called when the request encountered an error or timed out, before receiving 371 // the response headers completely. No further methods must be called on this 372 // `HttpRequestCallback` instance for the given `HttpRequest` after this 373 // method is called. 374 // 375 // If the implementation is able to discern that the error may have been 376 // transient, they should return `UNAVAILABLE`. 377 // 378 // If more than the implementation's defined max number of redirects occurred 379 // (without reaching the final response), then implementations should return 380 // `OUT_OF_RANGE` here. 381 // 382 // If the implementation hit an implementation-specific timeout (even though 383 // implementations are discouraged from imposing such timeouts), then this 384 // should be `DEADLINE_EXCEEDED`. 385 // 386 // If the `HttpRequestHandle::Cancel` method was called before 387 // `OnResponseStarted` was called for the given `HttpRequest`, then this 388 // method will be called with a `CANCELLED` status. 389 // 390 // If the request's `HttpRequest::ReadBody` returned an unexpected error, 391 // then method will be called with that error. 392 virtual void OnResponseError(const HttpRequest& request, 393 const absl::Status& error) = 0; 394 395 // Called (possibly multiple times per request) when a block of response data 396 // is available in `data`. This method must only be called after 397 // `OnResponseStarted` was called for the given `HttpRequest`. 398 // 399 // Callees must process the data ASAP, as delaying this for too long may 400 // prevent additional data from arriving on the network stream. 401 // 402 // If this method returns an error then the `HttpClient` implementation should 403 // consider the `HttpRequest` canceled. No further methods must be called on 404 // this `HttpRequestCallback` instance for the given `HttpRequest` after in 405 // this case. 406 virtual absl::Status OnResponseBody(const HttpRequest& request, 407 const HttpResponse& response, 408 absl::string_view data) = 0; 409 410 // Called when the request encountered an error or timed out while receiving 411 // the response body (i.e. after `OnResponseStarted` was called). No further 412 // methods must be called on this `HttpRequestCallback` instance for the given 413 // `HttpRequest` after this method is called. 414 // 415 // If the implementation is able to discern that the error may have been 416 // transient, they should return `UNAVAILABLE`. 417 // 418 // If the implementation hit an implementation-specific timeout (even though 419 // implementations are discouraged from imposing such timeouts), then this 420 // should be `DEADLINE_EXCEEDED`. 421 // 422 // If the `HttpRequestHandle::Cancel` method was called before 423 // `OnResponseCompleted` was called for the given `HttpRequest`, then this 424 // method will be called with a `CANCELLED` status. 425 virtual void OnResponseBodyError(const HttpRequest& request, 426 const HttpResponse& response, 427 const absl::Status& error) = 0; 428 429 // Called when the request has completed successfully (i.e. the response 430 // headers were delivered, and if there was a response body then it was also 431 // delivered successfully). Must not be called if one of the error callbacks 432 // was already called for the given `HttpRequest`, and no further methods must 433 // be called on this `HttpRequestCallback` instance for the given 434 // `HttpRequest` after this method is called. 435 virtual void OnResponseCompleted(const HttpRequest& request, 436 const HttpResponse& response) = 0; 437 }; 438 439 // A response to a given `HttpRequest`. Implemented by the `HttpClient` 440 // implementer. 441 // 442 // The lifetimes of instances of this class are managed by the `HttpClient` 443 // implementer. Instances of this class must remain alive for at least long as 444 // the corresponding `HttpRequestHandle` is alive. 445 // 446 // Note that all the data in this object should be for the last/final response. 447 // I.e. any responses corresponding to redirects should not be reflected here. 448 class HttpResponse { 449 public: 450 virtual ~HttpResponse() = default; 451 452 // The response code returned by the server (e.g. 200). 453 virtual int code() const = 0; 454 455 // The response headers. Implementations are allowed to either coalesce 456 // repeated headers using commas (as per RFC2616 section 4.2), or to return 457 // them as separate entries. 458 // 459 // See `HttpClient` comment for the expected behavior w.r.t. a few specific 460 // headers. 461 virtual const HeaderList& headers() const = 0; 462 }; 463 464 } // namespace http 465 } // namespace client 466 } // namespace fcp 467 468 #endif // FCP_CLIENT_HTTP_HTTP_CLIENT_H_ 469