1 // 2 // 3 // Copyright 2018 gRPC authors. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 // 18 19 #ifndef GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_DATA_STORE_H 20 #define GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_DATA_STORE_H 21 22 #include <grpc/support/port_platform.h> 23 24 #include <stddef.h> 25 #include <stdint.h> 26 27 #include <memory> 28 #include <set> 29 #include <string> 30 #include <unordered_map> 31 #include <utility> 32 33 #include <grpcpp/support/config.h> 34 35 namespace grpc { 36 namespace load_reporter { 37 38 // The load data storage is organized in hierarchy. The LoadDataStore is the 39 // top-level data store. In LoadDataStore, for each host we keep a 40 // PerHostStore, in which for each balancer we keep a PerBalancerStore. Each 41 // PerBalancerStore maintains a map of load records, mapping from LoadRecordKey 42 // to LoadRecordValue. The LoadRecordValue contains a map of customized call 43 // metrics, mapping from a call metric name to the CallMetricValue. 44 45 // The value of a customized call metric. 46 class CallMetricValue { 47 public: 48 explicit CallMetricValue(uint64_t num_calls = 0, 49 double total_metric_value = 0) num_calls_(num_calls)50 : num_calls_(num_calls), total_metric_value_(total_metric_value) {} 51 MergeFrom(CallMetricValue other)52 void MergeFrom(CallMetricValue other) { 53 num_calls_ += other.num_calls_; 54 total_metric_value_ += other.total_metric_value_; 55 } 56 57 // Getters. num_calls()58 uint64_t num_calls() const { return num_calls_; } total_metric_value()59 double total_metric_value() const { return total_metric_value_; } 60 61 private: 62 // The number of calls that finished with this metric. 63 uint64_t num_calls_ = 0; 64 // The sum of metric values across all the calls that finished with this 65 // metric. 66 double total_metric_value_ = 0; 67 }; 68 69 // The key of a load record. 70 class LoadRecordKey { 71 public: LoadRecordKey(std::string lb_id,std::string lb_tag,std::string user_id,std::string client_ip_hex)72 LoadRecordKey(std::string lb_id, std::string lb_tag, std::string user_id, 73 std::string client_ip_hex) 74 : lb_id_(std::move(lb_id)), 75 lb_tag_(std::move(lb_tag)), 76 user_id_(std::move(user_id)), 77 client_ip_hex_(std::move(client_ip_hex)) {} 78 79 // Parses the input client_ip_and_token to set client IP, LB ID, and LB tag. 80 LoadRecordKey(const std::string& client_ip_and_token, std::string user_id); 81 ToString()82 std::string ToString() const { 83 return "[lb_id_=" + lb_id_ + ", lb_tag_=" + lb_tag_ + 84 ", user_id_=" + user_id_ + ", client_ip_hex_=" + client_ip_hex_ + 85 "]"; 86 } 87 88 bool operator==(const LoadRecordKey& other) const { 89 return lb_id_ == other.lb_id_ && lb_tag_ == other.lb_tag_ && 90 user_id_ == other.user_id_ && client_ip_hex_ == other.client_ip_hex_; 91 } 92 93 // Gets the client IP bytes in network order (i.e., big-endian). 94 std::string GetClientIpBytes() const; 95 96 // Getters. lb_id()97 const std::string& lb_id() const { return lb_id_; } lb_tag()98 const std::string& lb_tag() const { return lb_tag_; } user_id()99 const std::string& user_id() const { return user_id_; } client_ip_hex()100 const std::string& client_ip_hex() const { return client_ip_hex_; } 101 102 struct Hasher { hash_combineHasher103 void hash_combine(size_t* seed, const std::string& k) const { 104 *seed ^= std::hash<std::string>()(k) + 0x9e3779b9 + (*seed << 6) + 105 (*seed >> 2); 106 } 107 operatorHasher108 size_t operator()(const LoadRecordKey& k) const { 109 size_t h = 0; 110 hash_combine(&h, k.lb_id_); 111 hash_combine(&h, k.lb_tag_); 112 hash_combine(&h, k.user_id_); 113 hash_combine(&h, k.client_ip_hex_); 114 return h; 115 } 116 }; 117 118 private: 119 std::string lb_id_; 120 std::string lb_tag_; 121 std::string user_id_; 122 std::string client_ip_hex_; 123 }; 124 125 // The value of a load record. 126 class LoadRecordValue { 127 public: 128 explicit LoadRecordValue(uint64_t start_count = 0, uint64_t ok_count = 0, 129 uint64_t error_count = 0, uint64_t bytes_sent = 0, 130 uint64_t bytes_recv = 0, uint64_t latency_ms = 0) start_count_(start_count)131 : start_count_(start_count), 132 ok_count_(ok_count), 133 error_count_(error_count), 134 bytes_sent_(bytes_sent), 135 bytes_recv_(bytes_recv), 136 latency_ms_(latency_ms) {} 137 138 LoadRecordValue(std::string metric_name, uint64_t num_calls, 139 double total_metric_value); 140 MergeFrom(const LoadRecordValue & other)141 void MergeFrom(const LoadRecordValue& other) { 142 start_count_ += other.start_count_; 143 ok_count_ += other.ok_count_; 144 error_count_ += other.error_count_; 145 bytes_sent_ += other.bytes_sent_; 146 bytes_recv_ += other.bytes_recv_; 147 latency_ms_ += other.latency_ms_; 148 for (const auto& p : other.call_metrics_) { 149 const std::string& key = p.first; 150 const CallMetricValue& value = p.second; 151 call_metrics_[key].MergeFrom(value); 152 } 153 } 154 GetNumCallsInProgressDelta()155 int64_t GetNumCallsInProgressDelta() const { 156 return static_cast<int64_t>(start_count_ - ok_count_ - error_count_); 157 } 158 ToString()159 std::string ToString() const { 160 return "[start_count_=" + std::to_string(start_count_) + 161 ", ok_count_=" + std::to_string(ok_count_) + 162 ", error_count_=" + std::to_string(error_count_) + 163 ", bytes_sent_=" + std::to_string(bytes_sent_) + 164 ", bytes_recv_=" + std::to_string(bytes_recv_) + 165 ", latency_ms_=" + std::to_string(latency_ms_) + ", " + 166 std::to_string(call_metrics_.size()) + " other call metric(s)]"; 167 } 168 InsertCallMetric(const std::string & metric_name,const CallMetricValue & metric_value)169 bool InsertCallMetric(const std::string& metric_name, 170 const CallMetricValue& metric_value) { 171 return call_metrics_.insert({metric_name, metric_value}).second; 172 } 173 174 // Getters. start_count()175 uint64_t start_count() const { return start_count_; } ok_count()176 uint64_t ok_count() const { return ok_count_; } error_count()177 uint64_t error_count() const { return error_count_; } bytes_sent()178 uint64_t bytes_sent() const { return bytes_sent_; } bytes_recv()179 uint64_t bytes_recv() const { return bytes_recv_; } latency_ms()180 uint64_t latency_ms() const { return latency_ms_; } call_metrics()181 const std::unordered_map<std::string, CallMetricValue>& call_metrics() const { 182 return call_metrics_; 183 } 184 185 private: 186 uint64_t start_count_ = 0; 187 uint64_t ok_count_ = 0; 188 uint64_t error_count_ = 0; 189 uint64_t bytes_sent_ = 0; 190 uint64_t bytes_recv_ = 0; 191 uint64_t latency_ms_ = 0; 192 std::unordered_map<std::string, CallMetricValue> call_metrics_; 193 }; 194 195 // Stores the data associated with a particular LB ID. 196 class PerBalancerStore { 197 public: 198 using LoadRecordMap = 199 std::unordered_map<LoadRecordKey, LoadRecordValue, LoadRecordKey::Hasher>; 200 PerBalancerStore(std::string lb_id,std::string load_key)201 PerBalancerStore(std::string lb_id, std::string load_key) 202 : lb_id_(std::move(lb_id)), load_key_(std::move(load_key)) {} 203 204 // Merge a load record with the given key and value if the store is not 205 // suspended. 206 void MergeRow(const LoadRecordKey& key, const LoadRecordValue& value); 207 208 // Suspend this store, so that no detailed load data will be recorded. 209 void Suspend(); 210 // Resume this store from suspension. 211 void Resume(); 212 // Is this store suspended or not? IsSuspended()213 bool IsSuspended() const { return suspended_; } 214 IsNumCallsInProgressChangedSinceLastReport()215 bool IsNumCallsInProgressChangedSinceLastReport() const { 216 return num_calls_in_progress_ != last_reported_num_calls_in_progress_; 217 } 218 219 uint64_t GetNumCallsInProgressForReport(); 220 ToString()221 std::string ToString() { 222 return "[PerBalancerStore lb_id_=" + lb_id_ + " load_key_=" + load_key_ + 223 "]"; 224 } 225 ClearLoadRecordMap()226 void ClearLoadRecordMap() { load_record_map_.clear(); } 227 228 // Getters. lb_id()229 const std::string& lb_id() const { return lb_id_; } load_key()230 const std::string& load_key() const { return load_key_; } load_record_map()231 const LoadRecordMap& load_record_map() const { return load_record_map_; } 232 233 private: 234 std::string lb_id_; 235 // TODO(juanlishen): Use bytestring protobuf type? 236 std::string load_key_; 237 LoadRecordMap load_record_map_; 238 uint64_t num_calls_in_progress_ = 0; 239 uint64_t last_reported_num_calls_in_progress_ = 0; 240 bool suspended_ = false; 241 }; 242 243 // Stores the data associated with a particular host. 244 class PerHostStore { 245 public: 246 // When a report stream is created, a PerBalancerStore is created for the 247 // LB ID (guaranteed unique) associated with that stream. If it is the only 248 // active store, adopt all the orphaned stores. If it is the first created 249 // store, adopt the store of kInvalidLbId. 250 void ReportStreamCreated(const std::string& lb_id, 251 const std::string& load_key); 252 253 // When a report stream is closed, the PerBalancerStores assigned to the 254 // associate LB ID need to be re-assigned to other active balancers, 255 // ideally with the same load key. If there is no active balancer, we have 256 // to suspend those stores and drop the incoming load data until they are 257 // resumed. 258 void ReportStreamClosed(const std::string& lb_id); 259 260 // Returns null if not found. Caller doesn't own the returned store. 261 PerBalancerStore* FindPerBalancerStore(const std::string& lb_id) const; 262 263 // Returns null if lb_id is not found. The returned pointer points to the 264 // underlying data structure, which is not owned by the caller. 265 const std::set<PerBalancerStore*>* GetAssignedStores( 266 const std::string& lb_id) const; 267 268 private: 269 // Creates a PerBalancerStore for the given LB ID, assigns the store to 270 // itself, and records the LB ID to the load key. 271 void SetUpForNewLbId(const std::string& lb_id, const std::string& load_key); 272 273 void AssignOrphanedStore(PerBalancerStore* orphaned_store, 274 const std::string& new_receiver); 275 276 std::unordered_map<std::string, std::set<std::string>> 277 load_key_to_receiving_lb_ids_; 278 279 // Key: LB ID. The key set includes all the LB IDs that have been 280 // allocated for reporting streams so far. 281 // Value: the unique pointer to the PerBalancerStore of the LB ID. 282 std::unordered_map<std::string, std::unique_ptr<PerBalancerStore>> 283 per_balancer_stores_; 284 285 // Key: LB ID. The key set includes the LB IDs of the balancers that are 286 // currently receiving report. 287 // Value: the set of raw pointers to the PerBalancerStores assigned to the LB 288 // ID. Note that the sets in assigned_stores_ form a division of the value set 289 // of per_balancer_stores_. 290 std::unordered_map<std::string, std::set<PerBalancerStore*>> assigned_stores_; 291 }; 292 293 // Thread-unsafe two-level bookkeeper of all the load data. 294 // Note: We never remove any store objects from this class, as per the 295 // current spec. That's because premature removal of the store objects 296 // may lead to loss of critical information, e.g., mapping from lb_id to 297 // load_key, and the number of in-progress calls. Such loss will cause 298 // information inconsistency when the balancer is re-connected. Keeping 299 // all the stores should be fine for PerHostStore, since we assume there 300 // should only be a few hostnames. But it's a potential problem for 301 // PerBalancerStore. 302 class LoadDataStore { 303 public: 304 // Returns null if not found. Caller doesn't own the returned store. 305 PerBalancerStore* FindPerBalancerStore(const std::string& hostname, 306 const std::string& lb_id) const; 307 308 // Returns null if hostname or lb_id is not found. The returned pointer points 309 // to the underlying data structure, which is not owned by the caller. 310 const std::set<PerBalancerStore*>* GetAssignedStores(const string& hostname, 311 const string& lb_id); 312 313 // If a PerBalancerStore can be found by the hostname and LB ID in 314 // LoadRecordKey, the load data will be merged to that store. Otherwise, 315 // only track the number of the in-progress calls for this unknown LB ID. 316 void MergeRow(const std::string& hostname, const LoadRecordKey& key, 317 const LoadRecordValue& value); 318 319 // Is the given lb_id a tracked unknown LB ID (i.e., the LB ID was associated 320 // with some received load data but unknown to this load data store)? IsTrackedUnknownBalancerId(const std::string & lb_id)321 bool IsTrackedUnknownBalancerId(const std::string& lb_id) const { 322 return unknown_balancer_id_trackers_.find(lb_id) != 323 unknown_balancer_id_trackers_.end(); 324 } 325 326 // Wrapper around PerHostStore::ReportStreamCreated. 327 void ReportStreamCreated(const std::string& hostname, 328 const std::string& lb_id, 329 const std::string& load_key); 330 331 // Wrapper around PerHostStore::ReportStreamClosed. 332 void ReportStreamClosed(const std::string& hostname, 333 const std::string& lb_id); 334 335 private: 336 // Buffered data that was fetched from Census but hasn't been sent to 337 // balancer. We need to keep this data ourselves because Census will 338 // delete the data once it's returned. 339 std::unordered_map<std::string, PerHostStore> per_host_stores_; 340 341 // Tracks the number of in-progress calls for each unknown LB ID. 342 std::unordered_map<std::string, uint64_t> unknown_balancer_id_trackers_; 343 }; 344 345 } // namespace load_reporter 346 } // namespace grpc 347 348 #endif // GRPC_SRC_CPP_SERVER_LOAD_REPORTER_LOAD_DATA_STORE_H 349