1 // Copyright 2023 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 #pragma once
16 #include <pw_chrono/system_clock.h>
17 
18 #include <algorithm>
19 #include <array>
20 #include <functional>
21 #include <optional>
22 #include <utility>
23 #include <vector>
24 
25 #include "pw_bluetooth_sapphire/internal/host/common/assert.h"
26 
27 namespace bt::internal {
28 
29 // This class is not thread-safe.
30 // TODO(fxbug.dev/42150683): Store each retirement's timestamp in order to
31 // provide information like how much time the log depth represents and overall
32 // throughput (bytes/sec and packets/sec)
33 class RetireLog final {
34  public:
35   // Create a bounded buffer intended to hold recently retired PipelineMonitor
36   // tokens. It supports efficient querying of statistics about logged events.
37   // |min_depth| specifies the number of entries that must be logged before
38   // queries return meaningful results and must be non-zero. |max_depth| limits
39   // the number of recent entries that are kept in memory. Each entry is
40   // represented by Retired, so the log memory use is roughly sizeof(Retired) *
41   // max_depth. The designed range is between 100 and 100,000 entries deep.
42   explicit RetireLog(size_t min_depth, size_t max_depth);
43 
44   ~RetireLog() = default;
45 
46   // Add an entry to the log. If depth() is less than max_depth, the log is
47   // expanded. Otherwise, the oldest entry is replaced. This is a fast operation
48   // that does not allocate.
49   void Retire(size_t byte_count, pw::chrono::SystemClock::duration age);
50 
51   // The current number of log entries.
depth()52   [[nodiscard]] size_t depth() const { return buffer_.size(); }
53 
54   // Compute the quantiles at cut points specified in |partitions| as numbers
55   // between 0 and 1. Each partition specifies a point in the distribution of
56   // |bytes_count|s in the log. Returns an array of |byte_count| entries
57   // corresponding to those points, if |depth()| is at least min_depth as
58   // provided to the ctor. Otherwise, returns std::nullopt.
59   //
60   // Cut points may, but do not need to, be evenly distributed, e.g. {.25, .5,
61   // .75} for quartiles. If a cut point is "between" log entry values, the
62   // nearest value is chosen without interpolation (e.g. for 0.5 with an even
63   // log depth, a biased median is returned rather than the average of the true
64   // median samples).
65   //
66   // TODO(fxbug.dev/42150683): Add a |max_age| parameter to window to only
67   // samples that are recent enough to be relevant
68   template <size_t NumQuantiles>
69   [[nodiscard]] std::optional<std::array<size_t, NumQuantiles>>
ComputeByteCountQuantiles(std::array<double,NumQuantiles> partitions)70   ComputeByteCountQuantiles(std::array<double, NumQuantiles> partitions) const {
71     return ComputeQuantiles(partitions, &Retired::byte_count);
72   }
73 
74   // Similar to ComputeByteCountQuantiles, but for the |age| durations logged in
75   // |Retire|.
76   template <size_t NumQuantiles>
77   [[nodiscard]] std::optional<
78       std::array<pw::chrono::SystemClock::duration, NumQuantiles>>
ComputeAgeQuantiles(std::array<double,NumQuantiles> partitions)79   ComputeAgeQuantiles(std::array<double, NumQuantiles> partitions) const {
80     return ComputeQuantiles(partitions, &Retired::age);
81   }
82 
83  private:
84   struct Retired {
85     size_t byte_count;
86     pw::chrono::SystemClock::duration age;
87   };
88 
89   // Helper function to build the index_sequence
90   template <typename ArrayT, typename PointerT>
ComputeQuantiles(ArrayT partitions,PointerT element_ptr)91   [[nodiscard]] auto ComputeQuantiles(ArrayT partitions,
92                                       PointerT element_ptr) const {
93     return ComputeQuantilesImpl(
94         partitions, element_ptr, std::make_index_sequence<partitions.size()>());
95   }
96 
97   // Computes the indexes to sample the retire log as if it were sorted, then
98   // returns those samples in the same order as specified. |element_ptr|
99   // specifies which field in Retired to sort by. The log isn't actually fully
100   // sorted; instead, a k-selection algorithm is used for each sample. Assuming
101   // partitions.size() << depth(), this runs on average linearly to their
102   // product.
103   template <size_t NumQuantiles, typename ElementT, size_t... Indexes>
104   [[nodiscard]] std::optional<std::array<ElementT, NumQuantiles>>
ComputeQuantilesImpl(std::array<double,NumQuantiles> partitions,ElementT Retired::* element_ptr,std::index_sequence<Indexes...>)105   ComputeQuantilesImpl(std::array<double, NumQuantiles> partitions,
106                        ElementT Retired::* element_ptr,
107                        std::index_sequence<Indexes...> /*unused*/) const {
108     if (depth() < min_depth_) {
109       return std::nullopt;
110     }
111 
112     // Computing quantiles is done in-place with k-selection routines, so use a
113     // working copy that is reused across invocations to prevent re-allocation
114     // with each call
115     std::vector<ElementT>& elements =
116         std::get<std::vector<ElementT>>(quantile_scratchpads_);
117     elements.resize(depth());
118     std::transform(buffer_.begin(),
119                    buffer_.end(),
120                    elements.begin(),
121                    std::mem_fn(element_ptr));
122 
123     // The k-selection we use is std::nth_element, which conveniently does a
124     // partial sort about k. By pre-sorting values of k, each invocation of
125     // nth_element selects only from the elements that are greater than or equal
126     // to the previous selection. The values are sorted with their corresponding
127     // indexes to remember the original order for the output
128     std::array partitions_and_indexes = {
129         std::pair{partitions[Indexes], Indexes}...};
130     std::sort(partitions_and_indexes.begin(), partitions_and_indexes.end());
131 
132     std::array<ElementT, NumQuantiles> quantiles;  // output
133     auto unsorted_begin = elements.begin();
134     for (auto [partition, index] : partitions_and_indexes) {
135       PW_CHECK(partition >= 0.);
136       PW_CHECK(partition <= 1.);
137       // Even though the last element is at index depth()-1, use depth() here
138       // instead to ensure the final (max) element has sufficient range
139       // representation.
140       const size_t cut_point =
141           static_cast<size_t>(static_cast<double>(depth()) * partition);
142       PW_CHECK(cut_point <= depth());
143 
144       // In the case that partition = 1.0, cut_point = depth(). Saturate to the
145       // final (max) element.
146       const size_t selection_offset = std::min(cut_point, depth() - 1);
147       const auto cut_iter = std::next(elements.begin(), selection_offset);
148       std::nth_element(unsorted_begin, cut_iter, elements.end());
149       // Post-condition: the element at cut_iter is what it would be if
150       // |elements| were sorted, with all preceding elements no greater than
151       // *cut_iter and all successive elements no less than *cut_iter.
152       quantiles[index] = *cut_iter;
153 
154       // Technically the next unsorted element is at cut_iter+1 but moving
155       // unsorted_begin past cut_iter causes problems if multiple values of
156       // |partition| are the same.
157       unsorted_begin = cut_iter;
158     }
159     return quantiles;
160   }
161 
162   const size_t min_depth_;
163   const size_t max_depth_;
164 
165   // Circular buffer of recently retired entries, kept in increasing retirement
166   // time order starting at the oldest at |next_insertion_index_|. Bounded to
167   // |max_depth_| in size.
168   std::vector<Retired> buffer_;
169   size_t next_insertion_index_ = 0;
170 
171   // Used by ComputeQuantilesImpl to store type-dependent k-selection
172   // computation working data. We could probably save memory by using the same
173   // scratchpad for both byte_count and age, but it's not worth the extra code
174   // complexity at this time.
175   mutable std::tuple<std::vector<size_t>,
176                      std::vector<pw::chrono::SystemClock::duration>>
177       quantile_scratchpads_;
178 };
179 
180 }  // namespace bt::internal
181