xref: /aosp_15_r20/external/perfetto/src/trace_processor/db/column/arrangement_overlay.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/db/column/arrangement_overlay.h"
18 
19 #include <algorithm>
20 #include <cstdint>
21 #include <memory>
22 #include <optional>
23 #include <unordered_set>
24 #include <utility>
25 #include <vector>
26 
27 #include "perfetto/base/logging.h"
28 #include "perfetto/trace_processor/basic_types.h"
29 #include "src/trace_processor/containers/bit_vector.h"
30 #include "src/trace_processor/db/column/data_layer.h"
31 #include "src/trace_processor/db/column/types.h"
32 #include "src/trace_processor/tp_metatrace.h"
33 
34 #include "protos/perfetto/trace_processor/metatrace_categories.pbzero.h"
35 
36 namespace perfetto::trace_processor::column {
37 
Flatten(uint32_t * start,const uint32_t * end,uint32_t stride)38 void ArrangementOverlay::Flatten(uint32_t* start,
39                                  const uint32_t* end,
40                                  uint32_t stride) {
41   for (uint32_t* it = start; it < end; it += stride) {
42     *it = (*arrangement_)[*it];
43   }
44 }
45 
ChainImpl(std::unique_ptr<DataLayerChain> inner,const std::vector<uint32_t> * arrangement,Indices::State arrangement_state,bool does_arrangement_order_storage)46 ArrangementOverlay::ChainImpl::ChainImpl(
47     std::unique_ptr<DataLayerChain> inner,
48     const std::vector<uint32_t>* arrangement,
49     Indices::State arrangement_state,
50     bool does_arrangement_order_storage)
51     : inner_(std::move(inner)),
52       arrangement_(arrangement),
53       arrangement_state_(arrangement_state),
54       does_arrangement_order_storage_(does_arrangement_order_storage) {}
55 
SingleSearch(FilterOp op,SqlValue sql_val,uint32_t index) const56 SingleSearchResult ArrangementOverlay::ChainImpl::SingleSearch(
57     FilterOp op,
58     SqlValue sql_val,
59     uint32_t index) const {
60   return inner_->SingleSearch(op, sql_val, (*arrangement_)[index]);
61 }
62 
ValidateSearchConstraints(FilterOp op,SqlValue value) const63 SearchValidationResult ArrangementOverlay::ChainImpl::ValidateSearchConstraints(
64     FilterOp op,
65     SqlValue value) const {
66   return inner_->ValidateSearchConstraints(op, value);
67 }
68 
SearchValidated(FilterOp op,SqlValue sql_val,Range in) const69 RangeOrBitVector ArrangementOverlay::ChainImpl::SearchValidated(
70     FilterOp op,
71     SqlValue sql_val,
72     Range in) const {
73   PERFETTO_TP_TRACE(metatrace::Category::DB,
74                     "ArrangementOverlay::ChainImpl::Search");
75 
76   if (does_arrangement_order_storage_ && op != FilterOp::kGlob &&
77       op != FilterOp::kRegex) {
78     OrderedIndices indices{arrangement_->data() + in.start, in.size(),
79                            arrangement_state_};
80     if (op == FilterOp::kNe) {
81       // Do an equality search and "invert" the range.
82       Range inner_res =
83           inner_->OrderedIndexSearchValidated(FilterOp::kEq, sql_val, indices);
84       BitVector bv(in.start);
85       bv.Resize(in.start + inner_res.start, true);
86       bv.Resize(in.start + inner_res.end, false);
87       bv.Resize(in.end, true);
88       return RangeOrBitVector(std::move(bv));
89     }
90     Range inner_res = inner_->OrderedIndexSearchValidated(op, sql_val, indices);
91     return RangeOrBitVector(
92         Range(in.start + inner_res.start, in.start + inner_res.end));
93   }
94 
95   const auto& arrangement = *arrangement_;
96   PERFETTO_DCHECK(in.end <= arrangement.size());
97   const auto [min_i, max_i] =
98       std::minmax_element(arrangement.begin() + static_cast<int32_t>(in.start),
99                           arrangement.begin() + static_cast<int32_t>(in.end));
100 
101   auto storage_result =
102       inner_->SearchValidated(op, sql_val, Range(*min_i, *max_i + 1));
103   BitVector::Builder builder(in.end, in.start);
104   if (storage_result.IsRange()) {
105     Range storage_range = std::move(storage_result).TakeIfRange();
106     for (uint32_t i = in.start; i < in.end; ++i) {
107       builder.Append(storage_range.Contains(arrangement[i]));
108     }
109   } else {
110     BitVector storage_bitvector = std::move(storage_result).TakeIfBitVector();
111     PERFETTO_DCHECK(storage_bitvector.size() == *max_i + 1);
112 
113     // After benchmarking, it turns out this complexity *is* actually worthwhile
114     // and has a noticable impact on the performance of this function in real
115     // world tables.
116 
117     // Fast path: we compare as many groups of 64 elements as we can.
118     // This should be very easy for the compiler to auto-vectorize.
119     const uint32_t* arrangement_idx = arrangement.data() + in.start;
120     uint32_t fast_path_elements = builder.BitsInCompleteWordsUntilFull();
121     for (uint32_t i = 0; i < fast_path_elements; i += BitVector::kBitsInWord) {
122       uint64_t word = 0;
123       // This part should be optimised by SIMD and is expected to be fast.
124       for (uint32_t k = 0; k < BitVector::kBitsInWord; ++k, ++arrangement_idx) {
125         bool comp_result = storage_bitvector.IsSet(*arrangement_idx);
126         word |= static_cast<uint64_t>(comp_result) << k;
127       }
128       builder.AppendWord(word);
129     }
130 
131     // Slow path: we compare <64 elements and append to fill the Builder.
132     uint32_t back_elements = builder.BitsUntilFull();
133     for (uint32_t i = 0; i < back_elements; ++i, ++arrangement_idx) {
134       builder.Append(storage_bitvector.IsSet(*arrangement_idx));
135     }
136   }
137   return RangeOrBitVector(std::move(builder).Build());
138 }
139 
IndexSearchValidated(FilterOp op,SqlValue sql_val,Indices & indices) const140 void ArrangementOverlay::ChainImpl::IndexSearchValidated(
141     FilterOp op,
142     SqlValue sql_val,
143     Indices& indices) const {
144   PERFETTO_TP_TRACE(metatrace::Category::DB,
145                     "ArrangementOverlay::ChainImpl::IndexSearch");
146 
147   for (auto& i : indices.tokens) {
148     i.index = (*arrangement_)[i.index];
149   }
150   // If the indices state is monotonic, we can just pass the arrangement's
151   // state.
152   indices.state = indices.state == Indices::State::kMonotonic
153                       ? arrangement_state_
154                       : Indices::State::kNonmonotonic;
155   return inner_->IndexSearchValidated(op, sql_val, indices);
156 }
157 
StableSort(Token * start,Token * end,SortDirection direction) const158 void ArrangementOverlay::ChainImpl::StableSort(Token* start,
159                                                Token* end,
160                                                SortDirection direction) const {
161   for (Token* it = start; it != end; ++it) {
162     it->index = (*arrangement_)[it->index];
163   }
164   inner_->StableSort(start, end, direction);
165 }
166 
Distinct(Indices & indices) const167 void ArrangementOverlay::ChainImpl::Distinct(Indices& indices) const {
168   PERFETTO_TP_TRACE(metatrace::Category::DB,
169                     "ArrangementOverlay::ChainImpl::Distinct");
170   // TODO(mayzner): Utilize `does_arrangmeent_order_storage_`.
171   std::unordered_set<uint32_t> s;
172   indices.tokens.erase(
173       std::remove_if(indices.tokens.begin(), indices.tokens.end(),
174                      [this, &s](Token& idx) {
175                        if (s.insert(idx.index).second) {
176                          idx.index = (*arrangement_)[idx.index];
177                          return false;
178                        }
179                        return true;
180                      }),
181       indices.tokens.end());
182   inner_->Distinct(indices);
183 }
184 
MaxElement(Indices & indices) const185 std::optional<Token> ArrangementOverlay::ChainImpl::MaxElement(
186     Indices& indices) const {
187   PERFETTO_TP_TRACE(metatrace::Category::DB,
188                     "ArrangementOverlay::ChainImpl::MaxElement");
189   for (auto& i : indices.tokens) {
190     i.index = (*arrangement_)[i.index];
191   }
192   // If the indices state is monotonic, we can just pass the arrangement's
193   // state.
194   indices.state = indices.state == Indices::State::kMonotonic
195                       ? arrangement_state_
196                       : Indices::State::kNonmonotonic;
197   return inner_->MaxElement(indices);
198 }
199 
MinElement(Indices & indices) const200 std::optional<Token> ArrangementOverlay::ChainImpl::MinElement(
201     Indices& indices) const {
202   PERFETTO_TP_TRACE(metatrace::Category::DB,
203                     "ArrangementOverlay::ChainImpl::MinElement");
204   for (auto& i : indices.tokens) {
205     i.index = (*arrangement_)[i.index];
206   }
207   // If the indices state is monotonic, we can just pass the arrangement's
208   // state.
209   indices.state = indices.state == Indices::State::kMonotonic
210                       ? arrangement_state_
211                       : Indices::State::kNonmonotonic;
212   return inner_->MinElement(indices);
213 }
214 
Get_AvoidUsingBecauseSlow(uint32_t index) const215 SqlValue ArrangementOverlay::ChainImpl::Get_AvoidUsingBecauseSlow(
216     uint32_t index) const {
217   return inner_->Get_AvoidUsingBecauseSlow((*arrangement_)[index]);
218 }
219 
220 }  // namespace perfetto::trace_processor::column
221