1 //===- StratifiedSets.h - Abstract stratified sets implementation. --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_ADT_STRATIFIEDSETS_H 10 #define LLVM_ADT_STRATIFIEDSETS_H 11 12 #include "AliasAnalysisSummary.h" 13 #include "llvm/ADT/DenseMap.h" 14 #include "llvm/ADT/SmallSet.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include <bitset> 17 #include <cassert> 18 #include <cmath> 19 #include <type_traits> 20 #include <utility> 21 #include <vector> 22 23 namespace llvm { 24 namespace cflaa { 25 /// An index into Stratified Sets. 26 typedef unsigned StratifiedIndex; 27 /// NOTE: ^ This can't be a short -- bootstrapping clang has a case where 28 /// ~1M sets exist. 29 30 // Container of information related to a value in a StratifiedSet. 31 struct StratifiedInfo { 32 StratifiedIndex Index; 33 /// For field sensitivity, etc. we can tack fields on here. 34 }; 35 36 /// A "link" between two StratifiedSets. 37 struct StratifiedLink { 38 /// This is a value used to signify "does not exist" where the 39 /// StratifiedIndex type is used. 40 /// 41 /// This is used instead of std::optional<StratifiedIndex> because 42 /// std::optional<StratifiedIndex> would eat up a considerable amount of extra 43 /// memory, after struct padding/alignment is taken into account. 44 static const StratifiedIndex SetSentinel; 45 46 /// The index for the set "above" current 47 StratifiedIndex Above; 48 49 /// The link for the set "below" current 50 StratifiedIndex Below; 51 52 /// Attributes for these StratifiedSets. 53 AliasAttrs Attrs; 54 StratifiedLinkStratifiedLink55 StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {} 56 hasBelowStratifiedLink57 bool hasBelow() const { return Below != SetSentinel; } hasAboveStratifiedLink58 bool hasAbove() const { return Above != SetSentinel; } 59 clearBelowStratifiedLink60 void clearBelow() { Below = SetSentinel; } clearAboveStratifiedLink61 void clearAbove() { Above = SetSentinel; } 62 }; 63 64 /// These are stratified sets, as described in "Fast algorithms for 65 /// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M 66 /// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets 67 /// of Value*s. If two Value*s are in the same set, or if both sets have 68 /// overlapping attributes, then the Value*s are said to alias. 69 /// 70 /// Sets may be related by position, meaning that one set may be considered as 71 /// above or below another. In CFL Alias Analysis, this gives us an indication 72 /// of how two variables are related; if the set of variable A is below a set 73 /// containing variable B, then at some point, a variable that has interacted 74 /// with B (or B itself) was either used in order to extract the variable A, or 75 /// was used as storage of variable A. 76 /// 77 /// Sets may also have attributes (as noted above). These attributes are 78 /// generally used for noting whether a variable in the set has interacted with 79 /// a variable whose origins we don't quite know (i.e. globals/arguments), or if 80 /// the variable may have had operations performed on it (modified in a function 81 /// call). All attributes that exist in a set A must exist in all sets marked as 82 /// below set A. 83 template <typename T> class StratifiedSets { 84 public: 85 StratifiedSets() = default; 86 StratifiedSets(StratifiedSets &&) = default; 87 StratifiedSets &operator=(StratifiedSets &&) = default; 88 StratifiedSets(DenseMap<T,StratifiedInfo> Map,std::vector<StratifiedLink> Links)89 StratifiedSets(DenseMap<T, StratifiedInfo> Map, 90 std::vector<StratifiedLink> Links) 91 : Values(std::move(Map)), Links(std::move(Links)) {} 92 find(const T & Elem)93 std::optional<StratifiedInfo> find(const T &Elem) const { 94 auto Iter = Values.find(Elem); 95 if (Iter == Values.end()) 96 return std::nullopt; 97 return Iter->second; 98 } 99 getLink(StratifiedIndex Index)100 const StratifiedLink &getLink(StratifiedIndex Index) const { 101 assert(inbounds(Index)); 102 return Links[Index]; 103 } 104 105 private: 106 DenseMap<T, StratifiedInfo> Values; 107 std::vector<StratifiedLink> Links; 108 inbounds(StratifiedIndex Idx)109 bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); } 110 }; 111 112 /// Generic Builder class that produces StratifiedSets instances. 113 /// 114 /// The goal of this builder is to efficiently produce correct StratifiedSets 115 /// instances. To this end, we use a few tricks: 116 /// > Set chains (A method for linking sets together) 117 /// > Set remaps (A method for marking a set as an alias [irony?] of another) 118 /// 119 /// ==== Set chains ==== 120 /// This builder has a notion of some value A being above, below, or with some 121 /// other value B: 122 /// > The `A above B` relationship implies that there is a reference edge 123 /// going from A to B. Namely, it notes that A can store anything in B's set. 124 /// > The `A below B` relationship is the opposite of `A above B`. It implies 125 /// that there's a dereference edge going from A to B. 126 /// > The `A with B` relationship states that there's an assignment edge going 127 /// from A to B, and that A and B should be treated as equals. 128 /// 129 /// As an example, take the following code snippet: 130 /// 131 /// %a = alloca i32, align 4 132 /// %ap = alloca i32*, align 8 133 /// %app = alloca i32**, align 8 134 /// store %a, %ap 135 /// store %ap, %app 136 /// %aw = getelementptr %ap, i32 0 137 /// 138 /// Given this, the following relations exist: 139 /// - %a below %ap & %ap above %a 140 /// - %ap below %app & %app above %ap 141 /// - %aw with %ap & %ap with %aw 142 /// 143 /// These relations produce the following sets: 144 /// [{%a}, {%ap, %aw}, {%app}] 145 /// 146 /// ...Which state that the only MayAlias relationship in the above program is 147 /// between %ap and %aw. 148 /// 149 /// Because LLVM allows arbitrary casts, code like the following needs to be 150 /// supported: 151 /// %ip = alloca i64, align 8 152 /// %ipp = alloca i64*, align 8 153 /// %i = bitcast i64** ipp to i64 154 /// store i64* %ip, i64** %ipp 155 /// store i64 %i, i64* %ip 156 /// 157 /// Which, because %ipp ends up *both* above and below %ip, is fun. 158 /// 159 /// This is solved by merging %i and %ipp into a single set (...which is the 160 /// only way to solve this, since their bit patterns are equivalent). Any sets 161 /// that ended up in between %i and %ipp at the time of merging (in this case, 162 /// the set containing %ip) also get conservatively merged into the set of %i 163 /// and %ipp. In short, the resulting StratifiedSet from the above code would be 164 /// {%ip, %ipp, %i}. 165 /// 166 /// ==== Set remaps ==== 167 /// More of an implementation detail than anything -- when merging sets, we need 168 /// to update the numbers of all of the elements mapped to those sets. Rather 169 /// than doing this at each merge, we note in the BuilderLink structure that a 170 /// remap has occurred, and use this information so we can defer renumbering set 171 /// elements until build time. 172 template <typename T> class StratifiedSetsBuilder { 173 /// Represents a Stratified Set, with information about the Stratified 174 /// Set above it, the set below it, and whether the current set has been 175 /// remapped to another. 176 struct BuilderLink { 177 const StratifiedIndex Number; 178 BuilderLinkBuilderLink179 BuilderLink(StratifiedIndex N) : Number(N) { 180 Remap = StratifiedLink::SetSentinel; 181 } 182 hasAboveBuilderLink183 bool hasAbove() const { 184 assert(!isRemapped()); 185 return Link.hasAbove(); 186 } 187 hasBelowBuilderLink188 bool hasBelow() const { 189 assert(!isRemapped()); 190 return Link.hasBelow(); 191 } 192 setBelowBuilderLink193 void setBelow(StratifiedIndex I) { 194 assert(!isRemapped()); 195 Link.Below = I; 196 } 197 setAboveBuilderLink198 void setAbove(StratifiedIndex I) { 199 assert(!isRemapped()); 200 Link.Above = I; 201 } 202 clearBelowBuilderLink203 void clearBelow() { 204 assert(!isRemapped()); 205 Link.clearBelow(); 206 } 207 clearAboveBuilderLink208 void clearAbove() { 209 assert(!isRemapped()); 210 Link.clearAbove(); 211 } 212 getBelowBuilderLink213 StratifiedIndex getBelow() const { 214 assert(!isRemapped()); 215 assert(hasBelow()); 216 return Link.Below; 217 } 218 getAboveBuilderLink219 StratifiedIndex getAbove() const { 220 assert(!isRemapped()); 221 assert(hasAbove()); 222 return Link.Above; 223 } 224 getAttrsBuilderLink225 AliasAttrs getAttrs() { 226 assert(!isRemapped()); 227 return Link.Attrs; 228 } 229 setAttrsBuilderLink230 void setAttrs(AliasAttrs Other) { 231 assert(!isRemapped()); 232 Link.Attrs |= Other; 233 } 234 isRemappedBuilderLink235 bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; } 236 237 /// For initial remapping to another set remapToBuilderLink238 void remapTo(StratifiedIndex Other) { 239 assert(!isRemapped()); 240 Remap = Other; 241 } 242 getRemapIndexBuilderLink243 StratifiedIndex getRemapIndex() const { 244 assert(isRemapped()); 245 return Remap; 246 } 247 248 /// Should only be called when we're already remapped. updateRemapBuilderLink249 void updateRemap(StratifiedIndex Other) { 250 assert(isRemapped()); 251 Remap = Other; 252 } 253 254 /// Prefer the above functions to calling things directly on what's returned 255 /// from this -- they guard against unexpected calls when the current 256 /// BuilderLink is remapped. getLinkBuilderLink257 const StratifiedLink &getLink() const { return Link; } 258 259 private: 260 StratifiedLink Link; 261 StratifiedIndex Remap; 262 }; 263 264 /// This function performs all of the set unioning/value renumbering 265 /// that we've been putting off, and generates a vector<StratifiedLink> that 266 /// may be placed in a StratifiedSets instance. finalizeSets(std::vector<StratifiedLink> & StratLinks)267 void finalizeSets(std::vector<StratifiedLink> &StratLinks) { 268 DenseMap<StratifiedIndex, StratifiedIndex> Remaps; 269 for (auto &Link : Links) { 270 if (Link.isRemapped()) 271 continue; 272 273 StratifiedIndex Number = StratLinks.size(); 274 Remaps.insert(std::make_pair(Link.Number, Number)); 275 StratLinks.push_back(Link.getLink()); 276 } 277 278 for (auto &Link : StratLinks) { 279 if (Link.hasAbove()) { 280 auto &Above = linksAt(Link.Above); 281 auto Iter = Remaps.find(Above.Number); 282 assert(Iter != Remaps.end()); 283 Link.Above = Iter->second; 284 } 285 286 if (Link.hasBelow()) { 287 auto &Below = linksAt(Link.Below); 288 auto Iter = Remaps.find(Below.Number); 289 assert(Iter != Remaps.end()); 290 Link.Below = Iter->second; 291 } 292 } 293 294 for (auto &Pair : Values) { 295 auto &Info = Pair.second; 296 auto &Link = linksAt(Info.Index); 297 auto Iter = Remaps.find(Link.Number); 298 assert(Iter != Remaps.end()); 299 Info.Index = Iter->second; 300 } 301 } 302 303 /// There's a guarantee in StratifiedLink where all bits set in a 304 /// Link.externals will be set in all Link.externals "below" it. propagateAttrs(std::vector<StratifiedLink> & Links)305 static void propagateAttrs(std::vector<StratifiedLink> &Links) { 306 const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) { 307 const auto *Link = &Links[Idx]; 308 while (Link->hasAbove()) { 309 Idx = Link->Above; 310 Link = &Links[Idx]; 311 } 312 return Idx; 313 }; 314 315 SmallSet<StratifiedIndex, 16> Visited; 316 for (unsigned I = 0, E = Links.size(); I < E; ++I) { 317 auto CurrentIndex = getHighestParentAbove(I); 318 if (!Visited.insert(CurrentIndex).second) 319 continue; 320 321 while (Links[CurrentIndex].hasBelow()) { 322 auto &CurrentBits = Links[CurrentIndex].Attrs; 323 auto NextIndex = Links[CurrentIndex].Below; 324 auto &NextBits = Links[NextIndex].Attrs; 325 NextBits |= CurrentBits; 326 CurrentIndex = NextIndex; 327 } 328 } 329 } 330 331 public: 332 /// Builds a StratifiedSet from the information we've been given since either 333 /// construction or the prior build() call. build()334 StratifiedSets<T> build() { 335 std::vector<StratifiedLink> StratLinks; 336 finalizeSets(StratLinks); 337 propagateAttrs(StratLinks); 338 Links.clear(); 339 return StratifiedSets<T>(std::move(Values), std::move(StratLinks)); 340 } 341 has(const T & Elem)342 bool has(const T &Elem) const { return get(Elem).has_value(); } 343 add(const T & Main)344 bool add(const T &Main) { 345 if (get(Main)) 346 return false; 347 348 auto NewIndex = getNewUnlinkedIndex(); 349 return addAtMerging(Main, NewIndex); 350 } 351 352 /// Restructures the stratified sets as necessary to make "ToAdd" in a 353 /// set above "Main". There are some cases where this is not possible (see 354 /// above), so we merge them such that ToAdd and Main are in the same set. addAbove(const T & Main,const T & ToAdd)355 bool addAbove(const T &Main, const T &ToAdd) { 356 assert(has(Main)); 357 auto Index = *indexOf(Main); 358 if (!linksAt(Index).hasAbove()) 359 addLinkAbove(Index); 360 361 auto Above = linksAt(Index).getAbove(); 362 return addAtMerging(ToAdd, Above); 363 } 364 365 /// Restructures the stratified sets as necessary to make "ToAdd" in a 366 /// set below "Main". There are some cases where this is not possible (see 367 /// above), so we merge them such that ToAdd and Main are in the same set. addBelow(const T & Main,const T & ToAdd)368 bool addBelow(const T &Main, const T &ToAdd) { 369 assert(has(Main)); 370 auto Index = *indexOf(Main); 371 if (!linksAt(Index).hasBelow()) 372 addLinkBelow(Index); 373 374 auto Below = linksAt(Index).getBelow(); 375 return addAtMerging(ToAdd, Below); 376 } 377 addWith(const T & Main,const T & ToAdd)378 bool addWith(const T &Main, const T &ToAdd) { 379 assert(has(Main)); 380 auto MainIndex = *indexOf(Main); 381 return addAtMerging(ToAdd, MainIndex); 382 } 383 noteAttributes(const T & Main,AliasAttrs NewAttrs)384 void noteAttributes(const T &Main, AliasAttrs NewAttrs) { 385 assert(has(Main)); 386 auto *Info = *get(Main); 387 auto &Link = linksAt(Info->Index); 388 Link.setAttrs(NewAttrs); 389 } 390 391 private: 392 DenseMap<T, StratifiedInfo> Values; 393 std::vector<BuilderLink> Links; 394 395 /// Adds the given element at the given index, merging sets if necessary. addAtMerging(const T & ToAdd,StratifiedIndex Index)396 bool addAtMerging(const T &ToAdd, StratifiedIndex Index) { 397 StratifiedInfo Info = {Index}; 398 auto Pair = Values.insert(std::make_pair(ToAdd, Info)); 399 if (Pair.second) 400 return true; 401 402 auto &Iter = Pair.first; 403 auto &IterSet = linksAt(Iter->second.Index); 404 auto &ReqSet = linksAt(Index); 405 406 // Failed to add where we wanted to. Merge the sets. 407 if (&IterSet != &ReqSet) 408 merge(IterSet.Number, ReqSet.Number); 409 410 return false; 411 } 412 413 /// Gets the BuilderLink at the given index, taking set remapping into 414 /// account. linksAt(StratifiedIndex Index)415 BuilderLink &linksAt(StratifiedIndex Index) { 416 auto *Start = &Links[Index]; 417 if (!Start->isRemapped()) 418 return *Start; 419 420 auto *Current = Start; 421 while (Current->isRemapped()) 422 Current = &Links[Current->getRemapIndex()]; 423 424 auto NewRemap = Current->Number; 425 426 // Run through everything that has yet to be updated, and update them to 427 // remap to NewRemap 428 Current = Start; 429 while (Current->isRemapped()) { 430 auto *Next = &Links[Current->getRemapIndex()]; 431 Current->updateRemap(NewRemap); 432 Current = Next; 433 } 434 435 return *Current; 436 } 437 438 /// Merges two sets into one another. Assumes that these sets are not 439 /// already one in the same. merge(StratifiedIndex Idx1,StratifiedIndex Idx2)440 void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) { 441 assert(inbounds(Idx1) && inbounds(Idx2)); 442 assert(&linksAt(Idx1) != &linksAt(Idx2) && 443 "Merging a set into itself is not allowed"); 444 445 // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge 446 // both the 447 // given sets, and all sets between them, into one. 448 if (tryMergeUpwards(Idx1, Idx2)) 449 return; 450 451 if (tryMergeUpwards(Idx2, Idx1)) 452 return; 453 454 // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`. 455 // We therefore need to merge the two chains together. 456 mergeDirect(Idx1, Idx2); 457 } 458 459 /// Merges two sets assuming that the set at `Idx1` is unreachable from 460 /// traversing above or below the set at `Idx2`. mergeDirect(StratifiedIndex Idx1,StratifiedIndex Idx2)461 void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) { 462 assert(inbounds(Idx1) && inbounds(Idx2)); 463 464 auto *LinksInto = &linksAt(Idx1); 465 auto *LinksFrom = &linksAt(Idx2); 466 // Merging everything above LinksInto then proceeding to merge everything 467 // below LinksInto becomes problematic, so we go as far "up" as possible! 468 while (LinksInto->hasAbove() && LinksFrom->hasAbove()) { 469 LinksInto = &linksAt(LinksInto->getAbove()); 470 LinksFrom = &linksAt(LinksFrom->getAbove()); 471 } 472 473 if (LinksFrom->hasAbove()) { 474 LinksInto->setAbove(LinksFrom->getAbove()); 475 auto &NewAbove = linksAt(LinksInto->getAbove()); 476 NewAbove.setBelow(LinksInto->Number); 477 } 478 479 // Merging strategy: 480 // > If neither has links below, stop. 481 // > If only `LinksInto` has links below, stop. 482 // > If only `LinksFrom` has links below, reset `LinksInto.Below` to 483 // match `LinksFrom.Below` 484 // > If both have links above, deal with those next. 485 while (LinksInto->hasBelow() && LinksFrom->hasBelow()) { 486 auto FromAttrs = LinksFrom->getAttrs(); 487 LinksInto->setAttrs(FromAttrs); 488 489 // Remap needs to happen after getBelow(), but before 490 // assignment of LinksFrom 491 auto *NewLinksFrom = &linksAt(LinksFrom->getBelow()); 492 LinksFrom->remapTo(LinksInto->Number); 493 LinksFrom = NewLinksFrom; 494 LinksInto = &linksAt(LinksInto->getBelow()); 495 } 496 497 if (LinksFrom->hasBelow()) { 498 LinksInto->setBelow(LinksFrom->getBelow()); 499 auto &NewBelow = linksAt(LinksInto->getBelow()); 500 NewBelow.setAbove(LinksInto->Number); 501 } 502 503 LinksInto->setAttrs(LinksFrom->getAttrs()); 504 LinksFrom->remapTo(LinksInto->Number); 505 } 506 507 /// Checks to see if lowerIndex is at a level lower than upperIndex. If so, it 508 /// will merge lowerIndex with upperIndex (and all of the sets between) and 509 /// return true. Otherwise, it will return false. tryMergeUpwards(StratifiedIndex LowerIndex,StratifiedIndex UpperIndex)510 bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) { 511 assert(inbounds(LowerIndex) && inbounds(UpperIndex)); 512 auto *Lower = &linksAt(LowerIndex); 513 auto *Upper = &linksAt(UpperIndex); 514 if (Lower == Upper) 515 return true; 516 517 SmallVector<BuilderLink *, 8> Found; 518 auto *Current = Lower; 519 auto Attrs = Current->getAttrs(); 520 while (Current->hasAbove() && Current != Upper) { 521 Found.push_back(Current); 522 Attrs |= Current->getAttrs(); 523 Current = &linksAt(Current->getAbove()); 524 } 525 526 if (Current != Upper) 527 return false; 528 529 Upper->setAttrs(Attrs); 530 531 if (Lower->hasBelow()) { 532 auto NewBelowIndex = Lower->getBelow(); 533 Upper->setBelow(NewBelowIndex); 534 auto &NewBelow = linksAt(NewBelowIndex); 535 NewBelow.setAbove(UpperIndex); 536 } else { 537 Upper->clearBelow(); 538 } 539 540 for (const auto &Ptr : Found) 541 Ptr->remapTo(Upper->Number); 542 543 return true; 544 } 545 get(const T & Val)546 std::optional<const StratifiedInfo *> get(const T &Val) const { 547 auto Result = Values.find(Val); 548 if (Result == Values.end()) 549 return std::nullopt; 550 return &Result->second; 551 } 552 get(const T & Val)553 std::optional<StratifiedInfo *> get(const T &Val) { 554 auto Result = Values.find(Val); 555 if (Result == Values.end()) 556 return std::nullopt; 557 return &Result->second; 558 } 559 indexOf(const T & Val)560 std::optional<StratifiedIndex> indexOf(const T &Val) { 561 auto MaybeVal = get(Val); 562 if (!MaybeVal) 563 return std::nullopt; 564 auto *Info = *MaybeVal; 565 auto &Link = linksAt(Info->Index); 566 return Link.Number; 567 } 568 addLinkBelow(StratifiedIndex Set)569 StratifiedIndex addLinkBelow(StratifiedIndex Set) { 570 auto At = addLinks(); 571 Links[Set].setBelow(At); 572 Links[At].setAbove(Set); 573 return At; 574 } 575 addLinkAbove(StratifiedIndex Set)576 StratifiedIndex addLinkAbove(StratifiedIndex Set) { 577 auto At = addLinks(); 578 Links[At].setBelow(Set); 579 Links[Set].setAbove(At); 580 return At; 581 } 582 getNewUnlinkedIndex()583 StratifiedIndex getNewUnlinkedIndex() { return addLinks(); } 584 addLinks()585 StratifiedIndex addLinks() { 586 auto Link = Links.size(); 587 Links.push_back(BuilderLink(Link)); 588 return Link; 589 } 590 inbounds(StratifiedIndex N)591 bool inbounds(StratifiedIndex N) const { return N < Links.size(); } 592 }; 593 } 594 } 595 #endif // LLVM_ADT_STRATIFIEDSETS_H 596