xref: /aosp_15_r20/art/compiler/optimizing/gvn.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "gvn.h"
18 
19 #include "base/arena_bit_vector.h"
20 #include "base/bit_vector-inl.h"
21 #include "base/scoped_arena_allocator.h"
22 #include "base/scoped_arena_containers.h"
23 #include "base/utils.h"
24 #include "side_effects_analysis.h"
25 
26 namespace art HIDDEN {
27 
28 /**
29  * A ValueSet holds instructions that can replace other instructions. It is updated
30  * through the `Add` method, and the `Kill` method. The `Kill` method removes
31  * instructions that are affected by the given side effect.
32  *
33  * The `Lookup` method returns an equivalent instruction to the given instruction
34  * if there is one in the set. In GVN, we would say those instructions have the
35  * same "number".
36  */
37 class ValueSet : public ArenaObject<kArenaAllocGvn> {
38  public:
39   // Constructs an empty ValueSet which owns all its buckets.
ValueSet(ScopedArenaAllocator * allocator)40   explicit ValueSet(ScopedArenaAllocator* allocator)
41       : allocator_(allocator),
42         num_buckets_(kMinimumNumberOfBuckets),
43         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
44         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
45         num_entries_(0u) {
46     DCHECK(IsPowerOfTwo(num_buckets_));
47     std::fill_n(buckets_, num_buckets_, nullptr);
48     buckets_owned_.SetInitialBits(num_buckets_);
49   }
50 
51   // Copy constructor. Depending on the load factor, it will either make a deep
52   // copy (all buckets owned) or a shallow one (buckets pointing to the parent).
ValueSet(ScopedArenaAllocator * allocator,const ValueSet & other)53   ValueSet(ScopedArenaAllocator* allocator, const ValueSet& other)
54       : allocator_(allocator),
55         num_buckets_(other.IdealBucketCount()),
56         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
57         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
58         num_entries_(0u) {
59     DCHECK(IsPowerOfTwo(num_buckets_));
60     PopulateFromInternal(other);
61   }
62 
63   // Erases all values in this set and populates it with values from `other`.
PopulateFrom(const ValueSet & other)64   void PopulateFrom(const ValueSet& other) {
65     if (this == &other) {
66       return;
67     }
68     PopulateFromInternal(other);
69   }
70 
71   // Returns true if `this` has enough buckets so that if `other` is copied into
72   // it, the load factor will not cross the upper threshold.
73   // If `exact_match` is set, true is returned only if `this` has the ideal
74   // number of buckets. Larger number of buckets is allowed otherwise.
CanHoldCopyOf(const ValueSet & other,bool exact_match)75   bool CanHoldCopyOf(const ValueSet& other, bool exact_match) {
76     if (exact_match) {
77       return other.IdealBucketCount() == num_buckets_;
78     } else {
79       return other.IdealBucketCount() <= num_buckets_;
80     }
81   }
82 
83   // Adds an instruction in the set.
Add(HInstruction * instruction)84   void Add(HInstruction* instruction) {
85     DCHECK(Lookup(instruction) == nullptr);
86     size_t hash_code = HashCode(instruction);
87     size_t index = BucketIndex(hash_code);
88 
89     if (!buckets_owned_.IsBitSet(index)) {
90       CloneBucket(index);
91     }
92     buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]);
93     ++num_entries_;
94   }
95 
96   // If in the set, returns an equivalent instruction to the given instruction.
97   // Returns null otherwise.
Lookup(HInstruction * instruction) const98   HInstruction* Lookup(HInstruction* instruction) const {
99     size_t hash_code = HashCode(instruction);
100     size_t index = BucketIndex(hash_code);
101 
102     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
103       if (node->GetHashCode() == hash_code) {
104         HInstruction* existing = node->GetInstruction();
105         if (existing->Equals(instruction)) {
106           return existing;
107         }
108       }
109     }
110     return nullptr;
111   }
112 
113   // Returns whether instruction is in the set.
Contains(HInstruction * instruction) const114   bool Contains(HInstruction* instruction) const {
115     size_t hash_code = HashCode(instruction);
116     size_t index = BucketIndex(hash_code);
117 
118     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
119       if (node->GetInstruction() == instruction) {
120         return true;
121       }
122     }
123     return false;
124   }
125 
126   // Removes all instructions in the set affected by the given side effects.
Kill(SideEffects side_effects)127   void Kill(SideEffects side_effects) {
128     DeleteAllImpureWhich([side_effects](Node* node) {
129       return node->GetSideEffects().MayDependOn(side_effects);
130     });
131   }
132 
Clear()133   void Clear() {
134     num_entries_ = 0;
135     for (size_t i = 0; i < num_buckets_; ++i) {
136       buckets_[i] = nullptr;
137     }
138     buckets_owned_.SetInitialBits(num_buckets_);
139   }
140 
141   // Updates this set by intersecting with instructions in a predecessor's set.
IntersectWith(ValueSet * predecessor)142   void IntersectWith(ValueSet* predecessor) {
143     if (IsEmpty()) {
144       return;
145     } else if (predecessor->IsEmpty()) {
146       Clear();
147     } else {
148       // Pure instructions do not need to be tested because only impure
149       // instructions can be killed.
150       DeleteAllImpureWhich([predecessor](Node* node) {
151         return !predecessor->Contains(node->GetInstruction());
152       });
153     }
154   }
155 
IsEmpty() const156   bool IsEmpty() const { return num_entries_ == 0; }
GetNumberOfEntries() const157   size_t GetNumberOfEntries() const { return num_entries_; }
158 
159  private:
160   // Copies all entries from `other` to `this`.
PopulateFromInternal(const ValueSet & other)161   void PopulateFromInternal(const ValueSet& other) {
162     DCHECK_NE(this, &other);
163     DCHECK_GE(num_buckets_, other.IdealBucketCount());
164 
165     if (num_buckets_ == other.num_buckets_) {
166       // Hash table remains the same size. We copy the bucket pointers and leave
167       // all buckets_owned_ bits false.
168       buckets_owned_.ClearAllBits();
169       memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*));
170     } else {
171       // Hash table size changes. We copy and rehash all entries, and set all
172       // buckets_owned_ bits to true.
173       std::fill_n(buckets_, num_buckets_, nullptr);
174       for (size_t i = 0; i < other.num_buckets_; ++i) {
175         for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) {
176           size_t new_index = BucketIndex(node->GetHashCode());
177           buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
178         }
179       }
180       buckets_owned_.SetInitialBits(num_buckets_);
181     }
182 
183     num_entries_ = other.num_entries_;
184   }
185 
186   class Node : public ArenaObject<kArenaAllocGvn> {
187    public:
Node(HInstruction * instruction,size_t hash_code,Node * next)188     Node(HInstruction* instruction, size_t hash_code, Node* next)
189         : instruction_(instruction), hash_code_(hash_code), next_(next) {}
190 
GetHashCode() const191     size_t GetHashCode() const { return hash_code_; }
GetInstruction() const192     HInstruction* GetInstruction() const { return instruction_; }
GetNext() const193     Node* GetNext() const { return next_; }
SetNext(Node * node)194     void SetNext(Node* node) { next_ = node; }
195 
Dup(ScopedArenaAllocator * allocator,Node * new_next=nullptr)196     Node* Dup(ScopedArenaAllocator* allocator, Node* new_next = nullptr) {
197       return new (allocator) Node(instruction_, hash_code_, new_next);
198     }
199 
GetSideEffects() const200     SideEffects GetSideEffects() const {
201       // Deoptimize is a weird instruction since it's predicated and
202       // never-return. Its side-effects are to prevent the splitting of dex
203       // instructions across it (which could cause inconsistencies once we begin
204       // interpreting again). In the context of GVN the 'perform-deopt' branch is not
205       // relevant and we only need to care about the no-op case, in which case there are
206       // no side-effects. By doing this we are able to eliminate redundant (i.e.
207       // dominated deopts with GVNd conditions) deoptimizations.
208       if (instruction_->IsDeoptimize()) {
209         return SideEffects::None();
210       } else {
211         return instruction_->GetSideEffects();
212       }
213     }
214 
215    private:
216     HInstruction* const instruction_;
217     const size_t hash_code_;
218     Node* next_;
219 
220     DISALLOW_COPY_AND_ASSIGN(Node);
221   };
222 
223   // Creates our own copy of a bucket that is currently pointing to a parent.
224   // This algorithm can be called while iterating over the bucket because it
225   // preserves the order of entries in the bucket and will return the clone of
226   // the given 'iterator'.
CloneBucket(size_t index,Node * iterator=nullptr)227   Node* CloneBucket(size_t index, Node* iterator = nullptr) {
228     DCHECK(!buckets_owned_.IsBitSet(index));
229     Node* clone_current = nullptr;
230     Node* clone_previous = nullptr;
231     Node* clone_iterator = nullptr;
232     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
233       clone_current = node->Dup(allocator_, nullptr);
234       if (node == iterator) {
235         clone_iterator = clone_current;
236       }
237       if (clone_previous == nullptr) {
238         buckets_[index] = clone_current;
239       } else {
240         clone_previous->SetNext(clone_current);
241       }
242       clone_previous = clone_current;
243     }
244     buckets_owned_.SetBit(index);
245     return clone_iterator;
246   }
247 
248   // Iterates over buckets with impure instructions (even indices) and deletes
249   // the ones on which 'cond' returns true.
250   template<typename Functor>
DeleteAllImpureWhich(Functor && cond)251   void DeleteAllImpureWhich(Functor&& cond) {
252     for (size_t i = 0; i < num_buckets_; i += 2) {
253       Node* node = buckets_[i];
254       Node* previous = nullptr;
255 
256       if (node == nullptr) {
257         continue;
258       }
259 
260       if (!buckets_owned_.IsBitSet(i)) {
261         // Bucket is not owned but maybe we won't need to change it at all.
262         // Iterate as long as the entries don't satisfy 'cond'.
263         while (node != nullptr) {
264           if (cond(node)) {
265             // We do need to delete an entry but we do not own the bucket.
266             // Clone the bucket, make sure 'previous' and 'node' point to
267             // the cloned entries and break.
268             previous = CloneBucket(i, previous);
269             node = (previous == nullptr) ? buckets_[i] : previous->GetNext();
270             break;
271           }
272           previous = node;
273           node = node->GetNext();
274         }
275       }
276 
277       // By this point we either own the bucket and can start deleting entries,
278       // or we do not own it but no entries matched 'cond'.
279       DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr);
280 
281       // We iterate over the remainder of entries and delete those that match
282       // the given condition.
283       while (node != nullptr) {
284         Node* next = node->GetNext();
285         if (cond(node)) {
286           if (previous == nullptr) {
287             buckets_[i] = next;
288           } else {
289             previous->SetNext(next);
290           }
291         } else {
292           previous = node;
293         }
294         node = next;
295       }
296     }
297   }
298 
299   // Computes a bucket count such that the load factor is reasonable.
300   // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2.
IdealBucketCount() const301   size_t IdealBucketCount() const {
302     size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1));
303     if (bucket_count > kMinimumNumberOfBuckets) {
304       return bucket_count;
305     } else {
306       return kMinimumNumberOfBuckets;
307     }
308   }
309 
310   // Generates a hash code for an instruction.
HashCode(HInstruction * instruction) const311   size_t HashCode(HInstruction* instruction) const {
312     size_t hash_code = instruction->ComputeHashCode();
313     // Pure instructions are put into odd buckets to speed up deletion. Note that in the
314     // case of irreducible loops, we don't put pure instructions in odd buckets, as we
315     // need to delete them when entering the loop.
316     // ClinitCheck is treated as a pure instruction since it's only executed
317     // once.
318     bool pure = !instruction->GetSideEffects().HasDependencies() ||
319                 instruction->IsClinitCheck();
320     if (!pure || instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) {
321       return (hash_code << 1) | 0;
322     } else {
323       return (hash_code << 1) | 1;
324     }
325   }
326 
327   // Converts a hash code to a bucket index.
BucketIndex(size_t hash_code) const328   size_t BucketIndex(size_t hash_code) const {
329     return hash_code & (num_buckets_ - 1);
330   }
331 
332   ScopedArenaAllocator* const allocator_;
333 
334   // The internal bucket implementation of the set.
335   size_t const num_buckets_;
336   Node** const buckets_;
337 
338   // Flags specifying which buckets were copied into the set from its parent.
339   // If a flag is not set, the corresponding bucket points to entries in the
340   // parent and must be cloned prior to making changes.
341   ArenaBitVector buckets_owned_;
342 
343   // The number of entries in the set.
344   size_t num_entries_;
345 
346   static constexpr size_t kMinimumNumberOfBuckets = 8;
347 
348   DISALLOW_COPY_AND_ASSIGN(ValueSet);
349 };
350 
351 /**
352  * Optimization phase that removes redundant instruction.
353  */
354 class GlobalValueNumberer : public ValueObject {
355  public:
GlobalValueNumberer(HGraph * graph,const SideEffectsAnalysis & side_effects)356   GlobalValueNumberer(HGraph* graph, const SideEffectsAnalysis& side_effects)
357       : graph_(graph),
358         allocator_(graph->GetArenaStack()),
359         side_effects_(side_effects),
360         sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)),
361         visited_blocks_(
362             &allocator_, graph->GetBlocks().size(), /* expandable= */ false, kArenaAllocGvn),
363         did_optimization_(false) {}
364 
365   bool Run();
366 
367  private:
368   // Per-block GVN. Will also update the ValueSet of the dominated and
369   // successor blocks.
370   void VisitBasicBlock(HBasicBlock* block);
371 
372   HGraph* graph_;
373   ScopedArenaAllocator allocator_;
374   const SideEffectsAnalysis& side_effects_;
375 
FindSetFor(HBasicBlock * block) const376   ValueSet* FindSetFor(HBasicBlock* block) const {
377     ValueSet* result = sets_[block->GetBlockId()];
378     DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId();
379     return result;
380   }
381 
AbandonSetFor(HBasicBlock * block)382   void AbandonSetFor(HBasicBlock* block) {
383     DCHECK(sets_[block->GetBlockId()] != nullptr)
384         << "Block B" << block->GetBlockId() << " expected to have a set";
385     sets_[block->GetBlockId()] = nullptr;
386   }
387 
388   // Returns false if the GlobalValueNumberer has already visited all blocks
389   // which may reference `block`.
390   bool WillBeReferencedAgain(HBasicBlock* block) const;
391 
392   // Iterates over visited blocks and finds one which has a ValueSet such that:
393   // (a) it will not be referenced in the future, and
394   // (b) it can hold a copy of `reference_set` with a reasonable load factor.
395   HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block,
396                                                  const ValueSet& reference_set) const;
397 
398   // ValueSet for blocks. Initially null, but for an individual block they
399   // are allocated and populated by the dominator, and updated by all blocks
400   // in the path from the dominator to the block.
401   ScopedArenaVector<ValueSet*> sets_;
402 
403   // BitVector which serves as a fast-access map from block id to
404   // visited/unvisited Boolean.
405   ArenaBitVector visited_blocks_;
406 
407   // True if GVN did at least one removal.
408   bool did_optimization_;
409 
410   DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
411 };
412 
Run()413 bool GlobalValueNumberer::Run() {
414   DCHECK(side_effects_.HasRun());
415   sets_[graph_->GetEntryBlock()->GetBlockId()] = new (&allocator_) ValueSet(&allocator_);
416 
417   // Use the reverse post order to ensure the non back-edge predecessors of a block are
418   // visited before the block itself.
419   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
420     VisitBasicBlock(block);
421   }
422   return did_optimization_;
423 }
424 
VisitBasicBlock(HBasicBlock * block)425 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
426   ValueSet* set = nullptr;
427 
428   const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
429   if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) {
430     // The entry block should only accumulate constant instructions, and
431     // the builder puts constants only in the entry block.
432     // Therefore, there is no need to propagate the value set to the next block.
433     set = new (&allocator_) ValueSet(&allocator_);
434   } else {
435     HBasicBlock* dominator = block->GetDominator();
436     ValueSet* dominator_set = FindSetFor(dominator);
437 
438     if (dominator->GetSuccessors().size() == 1) {
439       // `block` is a direct successor of its dominator. No need to clone the
440       // dominator's set, `block` can take over its ownership including its buckets.
441       DCHECK_EQ(dominator->GetSingleSuccessor(), block);
442       AbandonSetFor(dominator);
443       set = dominator_set;
444     } else {
445       // Try to find a basic block which will never be referenced again and whose
446       // ValueSet can therefore be recycled. We will need to copy `dominator_set`
447       // into the recycled set, so we pass `dominator_set` as a reference for size.
448       HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set);
449       if (recyclable == nullptr) {
450         // No block with a suitable ValueSet found. Allocate a new one and
451         // copy `dominator_set` into it.
452         set = new (&allocator_) ValueSet(&allocator_, *dominator_set);
453       } else {
454         // Block with a recyclable ValueSet found. Clone `dominator_set` into it.
455         set = FindSetFor(recyclable);
456         AbandonSetFor(recyclable);
457         set->PopulateFrom(*dominator_set);
458       }
459     }
460 
461     if (!set->IsEmpty()) {
462       if (block->IsLoopHeader()) {
463         if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
464           // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
465           // loop header. We clear the set at entry of irreducible loops and any loop containing
466           // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
467           // across the irreducible loop.
468           // Note that, if we're not compiling OSR, we could still do GVN and introduce
469           // phis at irreducible loop headers. We decided it was not worth the complexity.
470           set->Clear();
471         } else {
472           DCHECK(!block->GetLoopInformation()->IsIrreducible());
473           DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
474           set->Kill(side_effects_.GetLoopEffects(block));
475         }
476       } else if (predecessors.size() > 1) {
477         for (HBasicBlock* predecessor : predecessors) {
478           set->IntersectWith(FindSetFor(predecessor));
479           if (set->IsEmpty()) {
480             break;
481           }
482         }
483       }
484     }
485   }
486 
487   sets_[block->GetBlockId()] = set;
488 
489   HInstruction* current = block->GetFirstInstruction();
490   while (current != nullptr) {
491     // Save the next instruction in case `current` is removed from the graph.
492     HInstruction* next = current->GetNext();
493     // Do not kill the set with the side effects of the instruction just now: if
494     // the instruction is GVN'ed, we don't need to kill.
495     //
496     // BoundType is a special case example of an instruction which shouldn't be moved but can be
497     // GVN'ed.
498     //
499     // Deoptimize is a special case since even though we don't want to move it we can still remove
500     // it for GVN.
501     if (current->CanBeMoved() || current->IsBoundType() || current->IsDeoptimize()) {
502       if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
503         // For commutative ops, (x op y) will be treated the same as (y op x)
504         // after fixed ordering.
505         current->AsBinaryOperation()->OrderInputs();
506       }
507       HInstruction* existing = set->Lookup(current);
508       if (existing != nullptr) {
509         // This replacement doesn't make more OrderInputs() necessary since
510         // current is either used by an instruction that it dominates,
511         // which hasn't been visited yet due to the order we visit instructions.
512         // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway.
513         current->ReplaceWith(existing);
514         current->GetBlock()->RemoveInstruction(current);
515         did_optimization_ = true;
516       } else {
517         set->Kill(current->GetSideEffects());
518         set->Add(current);
519       }
520     } else {
521       set->Kill(current->GetSideEffects());
522     }
523     current = next;
524   }
525 
526   visited_blocks_.SetBit(block->GetBlockId());
527 }
528 
WillBeReferencedAgain(HBasicBlock * block) const529 bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
530   DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
531 
532   for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) {
533     if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
534       return true;
535     }
536   }
537 
538   for (const HBasicBlock* successor : block->GetSuccessors()) {
539     if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
540       return true;
541     }
542   }
543 
544   return false;
545 }
546 
FindVisitedBlockWithRecyclableSet(HBasicBlock * block,const ValueSet & reference_set) const547 HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
548     HBasicBlock* block, const ValueSet& reference_set) const {
549   HBasicBlock* secondary_match = nullptr;
550 
551   for (size_t block_id : visited_blocks_.Indexes()) {
552     ValueSet* current_set = sets_[block_id];
553     if (current_set == nullptr) {
554       // Set was already recycled.
555       continue;
556     }
557 
558     HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id];
559 
560     // We test if `current_set` has enough buckets to store a copy of
561     // `reference_set` with a reasonable load factor. If we find a set whose
562     // number of buckets matches perfectly, we return right away. If we find one
563     // that is larger, we return it if no perfectly-matching set is found.
564     // Note that we defer testing WillBeReferencedAgain until all other criteria
565     // have been satisfied because it might be expensive.
566     if (current_set->CanHoldCopyOf(reference_set, /* exact_match= */ true)) {
567       if (!WillBeReferencedAgain(current_block)) {
568         return current_block;
569       }
570     } else if (secondary_match == nullptr &&
571                current_set->CanHoldCopyOf(reference_set, /* exact_match= */ false)) {
572       if (!WillBeReferencedAgain(current_block)) {
573         secondary_match = current_block;
574       }
575     }
576   }
577 
578   return secondary_match;
579 }
580 
Run()581 bool GVNOptimization::Run() {
582   GlobalValueNumberer gvn(graph_, side_effects_);
583   return gvn.Run();
584 }
585 
586 }  // namespace art
587