1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_ 18 #define SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_ 19 20 #include <cstddef> 21 #include <cstdint> 22 #include <functional> 23 #include <limits> 24 #include <memory> 25 #include <optional> 26 #include <string> 27 #include <utility> 28 #include <vector> 29 30 #include "perfetto/base/logging.h" 31 #include "perfetto/ext/base/flat_hash_map.h" 32 #include "perfetto/ext/base/hash.h" 33 #include "perfetto/ext/base/paged_memory.h" 34 #include "perfetto/ext/base/string_view.h" 35 #include "perfetto/protozero/proto_utils.h" 36 #include "src/trace_processor/containers/null_term_string_view.h" 37 38 namespace perfetto::trace_processor { 39 40 // Interns strings in a string pool and hands out compact StringIds which can 41 // be used to retrieve the string in O(1). 42 class StringPool { 43 public: 44 struct Id { 45 Id() = default; 46 47 constexpr bool operator==(const Id& other) const { return other.id == id; } 48 constexpr bool operator!=(const Id& other) const { 49 return !(other == *this); 50 } 51 constexpr bool operator<(const Id& other) const { return id < other.id; } 52 is_nullId53 constexpr bool is_null() const { return id == 0u; } 54 is_large_stringId55 constexpr bool is_large_string() const { 56 return id & kLargeStringFlagBitMask; 57 } 58 block_offsetId59 constexpr uint32_t block_offset() const { return id & kBlockOffsetBitMask; } 60 block_indexId61 constexpr uint32_t block_index() const { 62 return (id & kBlockIndexBitMask) >> kNumBlockOffsetBits; 63 } 64 large_string_indexId65 constexpr uint32_t large_string_index() const { 66 PERFETTO_DCHECK(is_large_string()); 67 return id & ~kLargeStringFlagBitMask; 68 } 69 raw_idId70 constexpr uint32_t raw_id() const { return id; } 71 LargeStringId72 static constexpr Id LargeString(size_t index) { 73 PERFETTO_DCHECK(index <= static_cast<uint32_t>(index)); 74 PERFETTO_DCHECK(!(index & kLargeStringFlagBitMask)); 75 return Id(kLargeStringFlagBitMask | static_cast<uint32_t>(index)); 76 } 77 BlockStringId78 static constexpr Id BlockString(size_t index, uint32_t offset) { 79 PERFETTO_DCHECK(index < (1u << (kNumBlockIndexBits + 1))); 80 PERFETTO_DCHECK(offset < (1u << (kNumBlockOffsetBits + 1))); 81 return Id(~kLargeStringFlagBitMask & 82 (static_cast<uint32_t>(index << kNumBlockOffsetBits) | 83 (offset & kBlockOffsetBitMask))); 84 } 85 RawId86 static constexpr Id Raw(uint32_t raw) { return Id(raw); } 87 NullId88 static constexpr Id Null() { return Id(0u); } 89 90 private: IdId91 constexpr explicit Id(uint32_t i) : id(i) {} 92 93 uint32_t id; 94 }; 95 96 // Iterator over the strings in the pool. 97 class Iterator { 98 public: 99 explicit Iterator(const StringPool*); 100 101 explicit operator bool() const; 102 Iterator& operator++(); 103 104 NullTermStringView StringView(); 105 Id StringId(); 106 107 private: 108 const StringPool* pool_ = nullptr; 109 uint32_t block_index_ = 0; 110 uint32_t block_offset_ = 0; 111 uint32_t large_strings_index_ = 0; 112 }; 113 114 StringPool(); 115 ~StringPool(); 116 117 // Allow std::move(). 118 StringPool(StringPool&&) noexcept; 119 StringPool& operator=(StringPool&&) noexcept; 120 121 // Disable implicit copy. 122 StringPool(const StringPool&) = delete; 123 StringPool& operator=(const StringPool&) = delete; 124 InternString(base::StringView str)125 Id InternString(base::StringView str) { 126 if (str.data() == nullptr) 127 return Id::Null(); 128 129 auto hash = str.Hash(); 130 131 // Perform a hashtable insertion with a null ID just to check if the string 132 // is already inserted. If it's not, overwrite 0 with the actual Id. 133 auto it_and_inserted = string_index_.Insert(hash, Id()); 134 Id* id = it_and_inserted.first; 135 if (!it_and_inserted.second) { 136 PERFETTO_DCHECK(Get(*id) == str); 137 return *id; 138 } 139 *id = InsertString(str, hash); 140 return *id; 141 } 142 GetId(base::StringView str)143 std::optional<Id> GetId(base::StringView str) const { 144 if (str.data() == nullptr) 145 return Id::Null(); 146 147 auto hash = str.Hash(); 148 Id* id = string_index_.Find(hash); 149 if (id) { 150 PERFETTO_DCHECK(Get(*id) == str); 151 return *id; 152 } 153 return std::nullopt; 154 } 155 Get(Id id)156 NullTermStringView Get(Id id) const { 157 if (id.is_null()) 158 return NullTermStringView(); 159 if (id.is_large_string()) 160 return GetLargeString(id); 161 return GetFromBlockPtr(IdToPtr(id)); 162 } 163 CreateIterator()164 Iterator CreateIterator() const { return Iterator(this); } 165 size()166 size_t size() const { return string_index_.size(); } 167 168 // Maximum Id of a small (not large) string in the string pool. MaxSmallStringId()169 StringPool::Id MaxSmallStringId() const { 170 return Id::BlockString(blocks_.size() - 1, blocks_.back().pos()); 171 } 172 173 // Returns whether there is at least one large string in a string pool HasLargeString()174 bool HasLargeString() const { return !large_strings_.empty(); } 175 176 private: 177 using StringHash = uint64_t; 178 179 struct Block { BlockBlock180 explicit Block(size_t size) 181 : mem_(base::PagedMemory::Allocate(size, 182 base::PagedMemory::kDontCommit)), 183 size_(size) {} 184 ~Block() = default; 185 186 // Allow std::move(). 187 Block(Block&&) noexcept = default; 188 Block& operator=(Block&&) = default; 189 190 // Disable implicit copy. 191 Block(const Block&) = delete; 192 Block& operator=(const Block&) = delete; 193 GetBlock194 uint8_t* Get(uint32_t offset) const { 195 return static_cast<uint8_t*>(mem_.Get()) + offset; 196 } 197 198 std::pair<bool /*success*/, uint32_t /*offset*/> TryInsert( 199 base::StringView str); 200 OffsetOfBlock201 uint32_t OffsetOf(const uint8_t* ptr) const { 202 PERFETTO_DCHECK(Get(0) < ptr && 203 ptr <= Get(static_cast<uint32_t>(size_ - 1))); 204 return static_cast<uint32_t>(ptr - Get(0)); 205 } 206 posBlock207 uint32_t pos() const { return pos_; } 208 209 private: 210 base::PagedMemory mem_; 211 uint32_t pos_ = 0; 212 size_t size_ = 0; 213 }; 214 215 friend class Iterator; 216 friend class StringPoolTest; 217 218 // StringPool IDs are 32-bit. If the MSB is 1, the remaining bits of the ID 219 // are an index into the |large_strings_| vector. Otherwise, the next 6 bits 220 // are the index of the Block in the pool, and the remaining 25 bits the 221 // offset of the encoded string inside the pool. 222 // 223 // [31] [30:25] [24:0] 224 // | | | 225 // | | +---- offset in block (or LSB of large string index). 226 // | +------------ block index (or MSB of large string index). 227 // +------------------- 1: large string, 0: string in a Block. 228 static constexpr size_t kNumBlockIndexBits = 6; 229 static constexpr size_t kNumBlockOffsetBits = 25; 230 231 static constexpr size_t kLargeStringFlagBitMask = 1u << 31; 232 static constexpr size_t kBlockOffsetBitMask = (1u << kNumBlockOffsetBits) - 1; 233 static constexpr size_t kBlockIndexBitMask = 234 0xffffffff & ~kLargeStringFlagBitMask & ~kBlockOffsetBitMask; 235 236 static constexpr size_t kBlockSizeBytes = kBlockOffsetBitMask + 1; // 32 MB 237 238 // If a string doesn't fit into the current block, we can either start a new 239 // block or insert the string into the |large_strings_| vector. To maximize 240 // the used proportion of each block's memory, we only start a new block if 241 // the string isn't very large. 242 static constexpr size_t kMinLargeStringSizeBytes = kBlockSizeBytes / 8; 243 244 // Number of bytes to reserve for size and null terminator. 245 // This is the upper limit on metadata size: 5 bytes for max uint32, 246 // plus 1 byte for null terminator. The actual size may be lower. 247 static constexpr uint8_t kMaxMetadataSize = 6; 248 249 // Inserts the string with the given hash into the pool and return its Id. 250 Id InsertString(base::StringView, uint64_t hash); 251 252 // Insert a large string into the pool and return its Id. 253 Id InsertLargeString(base::StringView, uint64_t hash); 254 255 // The returned pointer points to the start of the string metadata (i.e. the 256 // first byte of the size). IdToPtr(Id id)257 const uint8_t* IdToPtr(Id id) const { 258 // If the MSB is set, the ID represents an index into |large_strings_|, so 259 // shouldn't be converted into a block pointer. 260 PERFETTO_DCHECK(!id.is_large_string()); 261 262 size_t block_index = id.block_index(); 263 uint32_t block_offset = id.block_offset(); 264 265 PERFETTO_DCHECK(block_index < blocks_.size()); 266 PERFETTO_DCHECK(block_offset < blocks_[block_index].pos()); 267 268 return blocks_[block_index].Get(block_offset); 269 } 270 271 // |ptr| should point to the start of the string metadata (i.e. the first byte 272 // of the size). 273 // Returns pointer to the start of the string. ReadSize(const uint8_t * ptr,uint32_t * size)274 static const uint8_t* ReadSize(const uint8_t* ptr, uint32_t* size) { 275 uint64_t value = 0; 276 const uint8_t* str_ptr = protozero::proto_utils::ParseVarInt( 277 ptr, ptr + kMaxMetadataSize, &value); 278 PERFETTO_DCHECK(str_ptr != ptr); 279 PERFETTO_DCHECK(value < std::numeric_limits<uint32_t>::max()); 280 *size = static_cast<uint32_t>(value); 281 return str_ptr; 282 } 283 284 // |ptr| should point to the start of the string metadata (i.e. the first byte 285 // of the size). GetFromBlockPtr(const uint8_t * ptr)286 static NullTermStringView GetFromBlockPtr(const uint8_t* ptr) { 287 uint32_t size = 0; 288 const uint8_t* str_ptr = ReadSize(ptr, &size); 289 return {reinterpret_cast<const char*>(str_ptr), size}; 290 } 291 292 // Lookup a string in the |large_strings_| vector. |id| should have the MSB 293 // set. GetLargeString(Id id)294 NullTermStringView GetLargeString(Id id) const { 295 PERFETTO_DCHECK(id.is_large_string()); 296 size_t index = id.large_string_index(); 297 PERFETTO_DCHECK(index < large_strings_.size()); 298 const std::string* str = large_strings_[index].get(); 299 return {str->c_str(), str->size()}; 300 } 301 302 // The actual memory storing the strings. 303 std::vector<Block> blocks_; 304 305 // Any string that is too large to fit into a Block is stored separately 306 // (inside a unique_ptr to ensure any references to it remain valid even if 307 // |large_strings_| is resized). 308 std::vector<std::unique_ptr<std::string>> large_strings_; 309 310 // Maps hashes of strings to the Id in the string pool. 311 base::FlatHashMap<StringHash, 312 Id, 313 base::AlreadyHashed<StringHash>, 314 base::LinearProbe, 315 /*AppendOnly=*/true> 316 string_index_{/*initial_capacity=*/4096u}; 317 }; 318 319 } // namespace perfetto::trace_processor 320 321 template <> 322 struct std::hash<::perfetto::trace_processor::StringPool::Id> { 323 using argument_type = ::perfetto::trace_processor::StringPool::Id; 324 using result_type = size_t; 325 326 result_type operator()(const argument_type& r) const { 327 return std::hash<uint32_t>{}(r.raw_id()); 328 } 329 }; 330 331 #endif // SRC_TRACE_PROCESSOR_CONTAINERS_STRING_POOL_H_ 332