xref: /aosp_15_r20/art/compiler/optimizing/load_store_elimination.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "load_store_elimination.h"
18 
19 #include <algorithm>
20 #include <optional>
21 #include <sstream>
22 #include <variant>
23 
24 #include "base/arena_allocator.h"
25 #include "base/arena_bit_vector.h"
26 #include "base/array_ref.h"
27 #include "base/bit_vector-inl.h"
28 #include "base/bit_vector.h"
29 #include "base/globals.h"
30 #include "base/indenter.h"
31 #include "base/iteration_range.h"
32 #include "base/scoped_arena_allocator.h"
33 #include "base/scoped_arena_containers.h"
34 #include "base/transform_iterator.h"
35 #include "escape.h"
36 #include "handle.h"
37 #include "load_store_analysis.h"
38 #include "mirror/class_loader.h"
39 #include "mirror/dex_cache.h"
40 #include "nodes.h"
41 #include "oat/stack_map.h"
42 #include "optimizing_compiler_stats.h"
43 #include "reference_type_propagation.h"
44 #include "side_effects_analysis.h"
45 
46 /**
47  * The general algorithm of load-store elimination (LSE).
48  *
49  * We use load-store analysis to collect a list of heap locations and perform
50  * alias analysis of those heap locations. LSE then keeps track of a list of
51  * heap values corresponding to the heap locations and stores that put those
52  * values in these locations.
53  *  - In phase 1, we visit basic blocks in reverse post order and for each basic
54  *    block, visit instructions sequentially, recording heap values and looking
55  *    for loads and stores to eliminate without relying on loop Phis.
56  *  - In phase 2, we look for loads that can be replaced by creating loop Phis
57  *    or using a loop-invariant value.
58  *  - In phase 3, we determine which stores are dead and can be eliminated and
59  *    based on that information we re-evaluate whether some kept stores are
60  *    storing the same value as the value in the heap location; such stores are
61  *    also marked for elimination.
62  *  - In phase 4, we commit the changes, replacing loads marked for elimination
63  *    in previous processing and removing stores not marked for keeping. We also
64  *    remove allocations that are no longer needed.
65  *  - In phase 5, we move allocations which only escape along some executions
66  *    closer to their escape points and fixup non-escaping paths with their actual
67  *    values, creating PHIs when needed.
68  *
69  * 1. Walk over blocks and their instructions.
70  *
71  * The initial set of heap values for a basic block is
72  *  - For a loop header of an irreducible loop, all heap values are unknown.
73  *  - For a loop header of a normal loop, all values unknown at the end of the
74  *    preheader are initialized to unknown, other heap values are set to Phi
75  *    placeholders as we cannot determine yet whether these values are known on
76  *    all back-edges. We use Phi placeholders also for array heap locations with
77  *    index defined inside the loop but this helps only when the value remains
78  *    zero from the array allocation throughout the loop.
79  *  - For catch blocks, we clear all assumptions since we arrived due to an
80  *    instruction throwing.
81  *  - For other basic blocks, we merge incoming values from the end of all
82  *    predecessors. If any incoming value is unknown, the start value for this
83  *    block is also unknown. Otherwise, if all the incoming values are the same
84  *    (including the case of a single predecessor), the incoming value is used.
85  *    Otherwise, we use a Phi placeholder to indicate different incoming values.
86  *    We record whether such Phi placeholder depends on a loop Phi placeholder.
87  *
88  * For each instruction in the block
89  *  - If the instruction is a load from a heap location with a known value not
90  *    dependent on a loop Phi placeholder, the load can be eliminated, either by
91  *    using an existing instruction or by creating new Phi(s) instead. In order
92  *    to maintain the validity of all heap locations during the optimization
93  *    phase, we only record substitutes at this phase and the real elimination
94  *    is delayed till the end of LSE. Loads that require a loop Phi placeholder
95  *    replacement are recorded for processing later.
96  *  - If the instruction is a store, it updates the heap value for the heap
97  *    location with the stored value and records the store itself so that we can
98  *    mark it for keeping if the value becomes observable. Heap values are
99  *    invalidated for heap locations that may alias with the store instruction's
100  *    heap location and their recorded stores are marked for keeping as they are
101  *    now potentially observable. The store instruction can be eliminated unless
102  *    the value stored is later needed e.g. by a load from the same/aliased heap
103  *    location or the heap location persists at method return/deoptimization.
104  *  - A store that stores the same value as the heap value is eliminated.
105  *  - For newly instantiated instances, their heap values are initialized to
106  *    language defined default values.
107  *  - Finalizable objects are considered as persisting at method
108  *    return/deoptimization.
109  *  - Some instructions such as invokes are treated as loading and invalidating
110  *    all the heap values, depending on the instruction's side effects.
111  *  - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
112  *    partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
113  *    alias and no load/store is eliminated in such case.
114  *
115  * The time complexity of the initial phase has several components. The total
116  * time for the initialization of heap values for all blocks is
117  *    O(heap_locations * edges)
118  * and the time complexity for simple instruction processing is
119  *    O(instructions).
120  * See the description of phase 3 for additional complexity due to matching of
121  * existing Phis for replacing loads.
122  *
123  * 2. Process loads that depend on loop Phi placeholders.
124  *
125  * We go over these loads to determine whether they can be eliminated. We look
126  * for the set of all Phi placeholders that feed the load and depend on a loop
127  * Phi placeholder and, if we find no unknown value, we construct the necessary
128  * Phi(s) or, if all other inputs are identical, i.e. the location does not
129  * change in the loop, just use that input. If we do find an unknown input, this
130  * must be from a loop back-edge and we replace the loop Phi placeholder with
131  * unknown value and re-process loads and stores that previously depended on
132  * loop Phi placeholders. This shall find at least one load of an unknown value
133  * which is now known to be unreplaceable or a new unknown value on a back-edge
134  * and we repeat this process until each load is either marked for replacement
135  * or found to be unreplaceable. As we mark at least one additional loop Phi
136  * placeholder as unreplacable in each iteration, this process shall terminate.
137  *
138  * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
139  * is limited by the number of Phi placeholders and their dependencies we need
140  * to search with worst-case time complexity
141  *    O(phi_placeholder_dependencies) .
142  * The dependencies are usually just the Phi placeholders' potential inputs,
143  * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
144  * replacement search, there are additional dependencies to consider, see below.
145  *
146  * In the successful case (no unknown inputs found) we use the Floyd-Warshall
147  * algorithm to determine transitive closures for each found Phi placeholder,
148  * and then match or materialize Phis from the smallest transitive closure,
149  * so that we can determine if such subset has a single other input. This has
150  * time complexity
151  *    O(phi_placeholders_found^3) .
152  * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
153  * contribute to this as such Phi placeholders are replaced immediately.
154  * The total time of all such successful cases has time complexity
155  *    O(phi_placeholders^3)
156  * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
157  * argument applies to the searches used to find all successful cases, so their
158  * total contribution is also just an insignificant
159  *    O(phi_placeholder_dependencies) .
160  * The materialization of Phis has an insignificant total time complexity
161  *    O(phi_placeholders * edges) .
162  *
163  * If we find an unknown input, we re-process heap values and loads with a time
164  * complexity that's the same as the phase 1 in the worst case. Adding this to
165  * the depth-first search time complexity yields
166  *    O(phi_placeholder_dependencies + heap_locations * edges + instructions)
167  * for a single iteration. We can ignore the middle term as it's proprotional
168  * to the number of Phi placeholder inputs included in the first term. Using
169  * the upper limit of number of such iterations, the total time complexity is
170  *    O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
171  *
172  * The upper bound of Phi placeholder inputs is
173  *    heap_locations * edges
174  * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
175  * include other heap locations in predecessor blocks with the upper bound of
176  *    heap_locations^2 * edges .
177  * Using the estimate
178  *    edges <= blocks^2
179  * and
180  *    phi_placeholders <= heap_locations * blocks ,
181  * the worst-case time complexity of the
182  *    O(phi_placeholder_dependencies * phi_placeholders)
183  * term from unknown input cases is actually
184  *    O(heap_locations^3 * blocks^3) ,
185  * exactly as the estimate for the Floyd-Warshall parts of successful cases.
186  * Adding the other term from the unknown input cases (to account for the case
187  * with significantly more instructions than blocks and heap locations), the
188  * phase 2 time complexity is
189  *    O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
190  *
191  * See the description of phase 3 for additional complexity due to matching of
192  * existing Phis for replacing loads.
193  *
194  * 3. Determine which stores to keep and which to eliminate.
195  *
196  * During instruction processing in phase 1 and re-processing in phase 2, we are
197  * keeping a record of the stores and Phi placeholders that become observable
198  * and now propagate the observable Phi placeholders to all actual stores that
199  * feed them. Having determined observable stores, we look for stores that just
200  * overwrite the old value with the same. Since ignoring non-observable stores
201  * actually changes the old values in heap locations, we need to recalculate
202  * Phi placeholder replacements but we proceed similarly to the previous phase.
203  * We look for the set of all Phis that feed the old value replaced by the store
204  * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
205  * value, we try to match existing Phis (we do not create new Phis anymore) or,
206  * if all other inputs are identical, i.e. the location does not change in the
207  * loop, just use that input. If this succeeds and the old value is identical to
208  * the value we're storing, such store shall be eliminated.
209  *
210  * The work is similar to the phase 2, except that we're not re-processing loads
211  * and stores anymore, so the time complexity of phase 3 is
212  *    O(heap_locations^3 * blocks^3) .
213  *
214  * There is additional complexity in matching existing Phis shared between the
215  * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
216  * time (this could be difficult and slow), so each matching attempt is just
217  * looking at Phis in the block (both old Phis and newly created Phis) and their
218  * inputs. As we create at most `heap_locations` Phis in each block, the upper
219  * bound on the number of Phis we look at is
220  *    heap_locations * (old_phis + heap_locations)
221  * and the worst-case time complexity is
222  *    O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
223  * The first term is lower than one term in phase 2, so the relevant part is
224  *    O(heap_locations * old_phis * edges) .
225  *
226  * 4. Replace loads and remove unnecessary stores and singleton allocations.
227  *
228  * A special type of objects called singletons are instantiated in the method
229  * and have a single name, i.e. no aliases. Singletons have exclusive heap
230  * locations since they have no aliases. Singletons are helpful in narrowing
231  * down the life span of a heap location such that they do not always need to
232  * participate in merging heap values. Allocation of a singleton can be
233  * eliminated if that singleton is not used and does not persist at method
234  * return/deoptimization.
235  *
236  * The time complexity of this phase is
237  *    O(instructions + instruction_uses) .
238  *
239  * FIXME: The time complexities described above assumes that the
240  * HeapLocationCollector finds a heap location for an instruction in O(1)
241  * time but it is currently O(heap_locations); this can be fixed by adding
242  * a hash map to the HeapLocationCollector.
243  */
244 
245 namespace art HIDDEN {
246 
247 #define LSE_VLOG \
248   if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
249 
250 class HeapRefHolder;
251 
252 // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
253 class LSEVisitor final : private HGraphDelegateVisitor {
254  public:
255   LSEVisitor(HGraph* graph,
256              const HeapLocationCollector& heap_location_collector,
257              OptimizingCompilerStats* stats);
258 
259   void Run();
260 
261  private:
262   class PhiPlaceholder {
263    public:
PhiPlaceholder()264     constexpr PhiPlaceholder() : block_id_(-1), heap_location_(-1) {}
PhiPlaceholder(uint32_t block_id,size_t heap_location)265     constexpr PhiPlaceholder(uint32_t block_id, size_t heap_location)
266         : block_id_(block_id), heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
267 
268     constexpr PhiPlaceholder(const PhiPlaceholder& p) = default;
269     constexpr PhiPlaceholder(PhiPlaceholder&& p) = default;
270     constexpr PhiPlaceholder& operator=(const PhiPlaceholder& p) = default;
271     constexpr PhiPlaceholder& operator=(PhiPlaceholder&& p) = default;
272 
GetBlockId() const273     constexpr uint32_t GetBlockId() const {
274       return block_id_;
275     }
276 
GetHeapLocation() const277     constexpr size_t GetHeapLocation() const {
278       return heap_location_;
279     }
280 
Equals(const PhiPlaceholder & p2) const281     constexpr bool Equals(const PhiPlaceholder& p2) const {
282       return block_id_ == p2.block_id_ && heap_location_ == p2.heap_location_;
283     }
284 
Dump(std::ostream & oss) const285     void Dump(std::ostream& oss) const {
286       oss << "PhiPlaceholder[blk: " << block_id_ << ", heap_location_: " << heap_location_ << "]";
287     }
288 
289    private:
290     uint32_t block_id_;
291     uint32_t heap_location_;
292   };
293 
294   friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2);
295   friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2);
296 
297   class Value {
298    public:
299     enum class ValuelessType {
300       kInvalid,
301       kUnknown,
302       kDefault,
303     };
304     struct NeedsNonLoopPhiMarker {
305       PhiPlaceholder phi_;
306     };
307     struct NeedsLoopPhiMarker {
308       PhiPlaceholder phi_;
309     };
310 
Invalid()311     static constexpr Value Invalid() {
312       return Value(ValuelessType::kInvalid);
313     }
314 
315     // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
316     // A heap location can be set to an unknown heap value when:
317     // - it is coming from outside the method,
318     // - it is killed due to aliasing, or side effects, or merging with an unknown value.
Unknown()319     static constexpr Value Unknown() {
320       return Value(ValuelessType::kUnknown);
321     }
322 
323     // Default heap value after an allocation.
324     // A heap location can be set to that value right after an allocation.
Default()325     static constexpr Value Default() {
326       return Value(ValuelessType::kDefault);
327     }
328 
ForInstruction(HInstruction * instruction)329     static constexpr Value ForInstruction(HInstruction* instruction) {
330       return Value(instruction);
331     }
332 
ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)333     static constexpr Value ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
334       return Value(NeedsNonLoopPhiMarker{phi_placeholder});
335     }
336 
ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)337     static constexpr Value ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
338       return Value(NeedsLoopPhiMarker{phi_placeholder});
339     }
340 
ForPhiPlaceholder(PhiPlaceholder phi_placeholder,bool needs_loop_phi)341     static constexpr Value ForPhiPlaceholder(PhiPlaceholder phi_placeholder, bool needs_loop_phi) {
342       return needs_loop_phi ? ForLoopPhiPlaceholder(phi_placeholder)
343                             : ForNonLoopPhiPlaceholder(phi_placeholder);
344     }
345 
IsValid() const346     constexpr bool IsValid() const {
347       return !IsInvalid();
348     }
349 
IsInvalid() const350     constexpr bool IsInvalid() const {
351       return std::holds_alternative<ValuelessType>(value_) &&
352              GetValuelessType() == ValuelessType::kInvalid;
353     }
354 
IsUnknown() const355     bool IsUnknown() const {
356       return std::holds_alternative<ValuelessType>(value_) &&
357              GetValuelessType() == ValuelessType::kUnknown;
358     }
359 
IsDefault() const360     bool IsDefault() const {
361       return std::holds_alternative<ValuelessType>(value_) &&
362              GetValuelessType() == ValuelessType::kDefault;
363     }
364 
IsInstruction() const365     bool IsInstruction() const {
366       return std::holds_alternative<HInstruction*>(value_);
367     }
368 
NeedsNonLoopPhi() const369     bool NeedsNonLoopPhi() const {
370       return std::holds_alternative<NeedsNonLoopPhiMarker>(value_);
371     }
372 
NeedsLoopPhi() const373     bool NeedsLoopPhi() const {
374       return std::holds_alternative<NeedsLoopPhiMarker>(value_);
375     }
376 
NeedsPhi() const377     bool NeedsPhi() const {
378       return NeedsNonLoopPhi() || NeedsLoopPhi();
379     }
380 
GetInstruction() const381     HInstruction* GetInstruction() const {
382       DCHECK(IsInstruction()) << *this;
383       return std::get<HInstruction*>(value_);
384     }
385 
GetPhiPlaceholder() const386     PhiPlaceholder GetPhiPlaceholder() const {
387       DCHECK(NeedsPhi());
388       if (NeedsNonLoopPhi()) {
389         return std::get<NeedsNonLoopPhiMarker>(value_).phi_;
390       } else {
391         return std::get<NeedsLoopPhiMarker>(value_).phi_;
392       }
393     }
394 
GetHeapLocation() const395     size_t GetHeapLocation() const {
396       DCHECK(NeedsPhi()) << this;
397       return GetPhiPlaceholder().GetHeapLocation();
398     }
399 
400     constexpr bool ExactEquals(Value other) const;
401 
402     constexpr bool Equals(Value other) const;
403 
Equals(HInstruction * instruction) const404     constexpr bool Equals(HInstruction* instruction) const {
405       return Equals(ForInstruction(instruction));
406     }
407 
408     std::ostream& Dump(std::ostream& os) const;
409 
410     // Public for use with lists.
Value()411     constexpr Value() : value_(ValuelessType::kInvalid) {}
412 
413    private:
414     using ValueHolder = std::variant<ValuelessType,
415                                      HInstruction*,
416                                      NeedsNonLoopPhiMarker,
417                                      NeedsLoopPhiMarker>;
GetValuelessType() const418     constexpr ValuelessType GetValuelessType() const {
419       return std::get<ValuelessType>(value_);
420     }
421 
Value(ValueHolder v)422     constexpr explicit Value(ValueHolder v) : value_(v) {}
423 
424     friend std::ostream& operator<<(std::ostream& os, const Value& v);
425 
426     ValueHolder value_;
427 
428     static_assert(std::is_move_assignable<PhiPlaceholder>::value);
429   };
430 
431   friend constexpr bool operator==(const Value::NeedsLoopPhiMarker& p1,
432                                    const Value::NeedsLoopPhiMarker& p2);
433   friend constexpr bool operator==(const Value::NeedsNonLoopPhiMarker& p1,
434                                    const Value::NeedsNonLoopPhiMarker& p2);
435 
436   // Get Phi placeholder index for access to `phi_placeholder_replacements_`
437   // and "visited" bit vectors during depth-first searches.
PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const438   size_t PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const {
439     size_t res =
440         phi_placeholder.GetBlockId() * heap_location_collector_.GetNumberOfHeapLocations() +
441         phi_placeholder.GetHeapLocation();
442     DCHECK_EQ(phi_placeholder, GetPhiPlaceholderAt(res))
443         << res << "blks: " << GetGraph()->GetBlocks().size()
444         << " hls: " << heap_location_collector_.GetNumberOfHeapLocations();
445     return res;
446   }
447 
PhiPlaceholderIndex(Value phi_placeholder) const448   size_t PhiPlaceholderIndex(Value phi_placeholder) const {
449     return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
450   }
451 
IsEscapingObject(ReferenceInfo * info)452   bool IsEscapingObject(ReferenceInfo* info) { return !info->IsSingletonAndRemovable(); }
453 
GetPhiPlaceholderAt(size_t off) const454   PhiPlaceholder GetPhiPlaceholderAt(size_t off) const {
455     DCHECK_LT(off, num_phi_placeholders_);
456     size_t id = off % heap_location_collector_.GetNumberOfHeapLocations();
457     // Technically this should be (off - id) / NumberOfHeapLocations
458     // but due to truncation it's all the same.
459     size_t blk_id = off / heap_location_collector_.GetNumberOfHeapLocations();
460     return GetPhiPlaceholder(blk_id, id);
461   }
462 
GetPhiPlaceholder(uint32_t block_id,size_t idx) const463   PhiPlaceholder GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
464     DCHECK(GetGraph()->GetBlocks()[block_id] != nullptr) << block_id;
465     return PhiPlaceholder(block_id, idx);
466   }
467 
Replacement(Value value) const468   Value Replacement(Value value) const {
469     DCHECK(value.NeedsPhi()) << value << " phase: " << current_phase_;
470     Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
471     DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
472     DCHECK(replacement.IsUnknown() ||
473            FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
474     return replacement;
475   }
476 
ReplacementOrValue(Value value) const477   Value ReplacementOrValue(Value value) const {
478     if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
479       return Replacement(value);
480     } else {
481       DCHECK_IMPLIES(value.IsInstruction(),
482                      FindSubstitute(value.GetInstruction()) == value.GetInstruction());
483       return value;
484     }
485   }
486 
487   // The record of a heap value and instruction(s) that feed that value.
488   struct ValueRecord {
489     Value value;
490     Value stored_by;
491   };
492 
FindOrAddTypeConversionIfNecessary(HInstruction * instruction,HInstruction * value,DataType::Type expected_type)493   HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
494                                                       HInstruction* value,
495                                                       DataType::Type expected_type) {
496     // Should never add type conversion into boolean value.
497     if (expected_type == DataType::Type::kBool ||
498         DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
499         // TODO: This prevents type conversion of default values but we can still insert
500         // type conversion of other constants and there is no constant folding pass after LSE.
501         IsZeroBitPattern(value)) {
502       return nullptr;
503     }
504 
505     // Check if there is already a suitable TypeConversion we can reuse.
506     for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
507       if (use.GetUser()->IsTypeConversion() &&
508           use.GetUser()->GetType() == expected_type &&
509           // TODO: We could move the TypeConversion to a common dominator
510           // if it does not cross irreducible loop header.
511           use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
512           // Don't share across irreducible loop headers.
513           // TODO: can be more fine-grained than this by testing each dominator.
514           (use.GetUser()->GetBlock() == instruction->GetBlock() ||
515            !GetGraph()->HasIrreducibleLoops())) {
516         if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
517             use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
518           // Move the TypeConversion before the instruction.
519           use.GetUser()->MoveBefore(instruction);
520         }
521         DCHECK(use.GetUser()->StrictlyDominates(instruction));
522         return use.GetUser()->AsTypeConversion();
523       }
524     }
525 
526     // We must create a new TypeConversion instruction.
527     HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
528           expected_type, value, instruction->GetDexPc());
529     instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
530     return type_conversion;
531   }
532 
533   // Find an instruction's substitute if it's a removed load.
534   // Return the same instruction if it should not be removed.
FindSubstitute(HInstruction * instruction) const535   HInstruction* FindSubstitute(HInstruction* instruction) const {
536     size_t id = static_cast<size_t>(instruction->GetId());
537     if (id >= substitute_instructions_for_loads_.size()) {
538       // New Phi (may not be in the graph yet), or default value.
539       DCHECK(!IsLoad(instruction));
540       return instruction;
541     }
542     HInstruction* substitute = substitute_instructions_for_loads_[id];
543     DCHECK(substitute == nullptr || IsLoad(instruction));
544     return (substitute != nullptr) ? substitute : instruction;
545   }
546 
AddRemovedLoad(HInstruction * load,HInstruction * heap_value)547   void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
548     DCHECK(IsLoad(load));
549     DCHECK_EQ(FindSubstitute(load), load);
550     DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
551         "Unexpected heap_value that has a substitute " << heap_value->DebugName();
552 
553     // The load expects to load the heap value as type load->GetType().
554     // However the tracked heap value may not be of that type. An explicit
555     // type conversion may be needed.
556     // There are actually three types involved here:
557     // (1) tracked heap value's type (type A)
558     // (2) heap location (field or element)'s type (type B)
559     // (3) load's type (type C)
560     // We guarantee that type A stored as type B and then fetched out as
561     // type C is the same as casting from type A to type C directly, since
562     // type B and type C will have the same size which is guaranteed in
563     // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
564     // So we only need one type conversion from type A to type C.
565     HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
566         load, heap_value, load->GetType());
567 
568     substitute_instructions_for_loads_[load->GetId()] =
569         type_conversion != nullptr ? type_conversion : heap_value;
570   }
571 
IsLoad(HInstruction * instruction)572   static bool IsLoad(HInstruction* instruction) {
573     // Unresolved load is not treated as a load.
574     return instruction->IsInstanceFieldGet() ||
575            instruction->IsStaticFieldGet() ||
576            instruction->IsVecLoad() ||
577            instruction->IsArrayGet();
578   }
579 
IsStore(HInstruction * instruction)580   static bool IsStore(HInstruction* instruction) {
581     // Unresolved store is not treated as a store.
582     return instruction->IsInstanceFieldSet() ||
583            instruction->IsArraySet() ||
584            instruction->IsVecStore() ||
585            instruction->IsStaticFieldSet();
586   }
587 
588   // Check if it is allowed to use default values or Phis for the specified load.
IsDefaultOrPhiAllowedForLoad(HInstruction * instruction)589   static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
590     DCHECK(IsLoad(instruction));
591     // Using defaults for VecLoads requires to create additional vector operations.
592     // As there are some issues with scheduling vector operations it is better to avoid creating
593     // them.
594     return !instruction->IsVecOperation();
595   }
596 
597   // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
598   // This is necessary if the stored heap value can be observed.
KeepStores(Value value)599   void KeepStores(Value value) {
600     if (value.IsUnknown()) {
601       return;
602     }
603     if (value.NeedsPhi()) {
604       phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
605     } else {
606       HInstruction* instruction = value.GetInstruction();
607       DCHECK(IsStore(instruction));
608       kept_stores_.SetBit(instruction->GetId());
609     }
610   }
611 
612   // If a heap location X may alias with heap location at `loc_index`
613   // and heap_values of that heap location X holds a store, keep that store.
614   // It's needed for a dependent load that's not eliminated since any store
615   // that may put value into the load's heap location needs to be kept.
KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord> & heap_values,size_t loc_index)616   void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
617                                      size_t loc_index) {
618     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
619       if (i == loc_index) {
620         // We use this function when reading a location with unknown value and
621         // therefore we cannot know what exact store wrote that unknown value.
622         // But we can have a phi placeholder here marking multiple stores to keep.
623         DCHECK(!heap_values[i].stored_by.IsInstruction());
624         KeepStores(heap_values[i].stored_by);
625         heap_values[i].stored_by = Value::Unknown();
626       } else if (heap_location_collector_.MayAlias(i, loc_index)) {
627         KeepStores(heap_values[i].stored_by);
628         heap_values[i].stored_by = Value::Unknown();
629       }
630     }
631   }
632 
GetDefaultValue(DataType::Type type)633   HInstruction* GetDefaultValue(DataType::Type type) {
634     switch (type) {
635       case DataType::Type::kReference:
636         return GetGraph()->GetNullConstant();
637       case DataType::Type::kBool:
638       case DataType::Type::kUint8:
639       case DataType::Type::kInt8:
640       case DataType::Type::kUint16:
641       case DataType::Type::kInt16:
642       case DataType::Type::kInt32:
643         return GetGraph()->GetIntConstant(0);
644       case DataType::Type::kInt64:
645         return GetGraph()->GetLongConstant(0);
646       case DataType::Type::kFloat32:
647         return GetGraph()->GetFloatConstant(0);
648       case DataType::Type::kFloat64:
649         return GetGraph()->GetDoubleConstant(0);
650       default:
651         UNREACHABLE();
652     }
653   }
654 
CanValueBeKeptIfSameAsNew(Value value,HInstruction * new_value,HInstruction * new_value_set_instr)655   bool CanValueBeKeptIfSameAsNew(Value value,
656                                  HInstruction* new_value,
657                                  HInstruction* new_value_set_instr) {
658     // For field/array set location operations, if the value is the same as the new_value
659     // it can be kept even if aliasing happens. All aliased operations will access the same memory
660     // range.
661     // For vector values, this is not true. For example:
662     //  packed_data = [0xA, 0xB, 0xC, 0xD];            <-- Different values in each lane.
663     //  VecStore array[i  ,i+1,i+2,i+3] = packed_data;
664     //  VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
665     //  VecLoad  vx = array[i,i+1,i+2,i+3];            <-- Cannot be eliminated because the value
666     //                                                     here is not packed_data anymore.
667     //
668     // TODO: to allow such 'same value' optimization on vector data,
669     // LSA needs to report more fine-grain MAY alias information:
670     // (1) May alias due to two vector data partial overlap.
671     //     e.g. a[i..i+3] and a[i+1,..,i+4].
672     // (2) May alias due to two vector data may complete overlap each other.
673     //     e.g. a[i..i+3] and b[i..i+3].
674     // (3) May alias but the exact relationship between two locations is unknown.
675     //     e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
676     // This 'same value' optimization can apply only on case (2).
677     if (new_value_set_instr->IsVecOperation()) {
678       return false;
679     }
680 
681     return value.Equals(new_value);
682   }
683 
684   Value PrepareLoopValue(HBasicBlock* block, size_t idx);
685   Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
686   void PrepareLoopRecords(HBasicBlock* block);
687   Value MergePredecessorValues(HBasicBlock* block, size_t idx);
688   void MergePredecessorRecords(HBasicBlock* block);
689 
690   void MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type);
691 
692   void VisitGetLocation(HInstruction* instruction, size_t idx);
693   void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
RecordFieldInfo(const FieldInfo * info,size_t heap_loc)694   void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) {
695     field_infos_[heap_loc] = info;
696   }
697 
698   void VisitBasicBlock(HBasicBlock* block) override;
699 
700   enum class Phase {
701     kLoadElimination,
702     kStoreElimination,
703   };
704 
705   bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const;
706 
707   bool TryReplacingLoopPhiPlaceholderWithDefault(
708       PhiPlaceholder phi_placeholder,
709       DataType::Type type,
710       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
711   bool TryReplacingLoopPhiPlaceholderWithSingleInput(
712       PhiPlaceholder phi_placeholder,
713       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
714   std::optional<PhiPlaceholder> FindLoopPhisToMaterialize(
715       PhiPlaceholder phi_placeholder,
716       /*out*/ ArenaBitVector* phi_placeholders_to_materialize,
717       DataType::Type type,
718       bool can_use_default_or_phi);
719   bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
720                            DataType::Type type);
721   bool MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes, DataType::Type type);
722   bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
723                            DataType::Type type);
724   bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type);
725   std::optional<PhiPlaceholder> TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,
726                                                          HInstruction* load);
727   void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input);
728   void ProcessLoadsRequiringLoopPhis();
729 
730   void SearchPhiPlaceholdersForKeptStores();
731   void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
732   void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
733   void FindStoresWritingOldValues();
734   void FinishFullLSE();
735 
HandleAcquireLoad(HInstruction * instruction)736   void HandleAcquireLoad(HInstruction* instruction) {
737     DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) ||
738            (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) ||
739            (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter()))
740         << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
741 
742     // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must
743     // invalidate all current values.
744     ScopedArenaVector<ValueRecord>& heap_values =
745         heap_values_for_[instruction->GetBlock()->GetBlockId()];
746     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
747       KeepStores(heap_values[i].stored_by);
748       heap_values[i].stored_by = Value::Unknown();
749       heap_values[i].value = Value::Unknown();
750     }
751 
752     // Note that there's no need to record the load as subsequent acquire loads shouldn't be
753     // eliminated either.
754   }
755 
HandleReleaseStore(HInstruction * instruction)756   void HandleReleaseStore(HInstruction* instruction) {
757     DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) ||
758            (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) ||
759            (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter()))
760         << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
761 
762     // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but
763     // they will push the modifications for other threads to see. Therefore, we must keep the
764     // stores but there's no need to clobber the value.
765     ScopedArenaVector<ValueRecord>& heap_values =
766         heap_values_for_[instruction->GetBlock()->GetBlockId()];
767     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
768       KeepStores(heap_values[i].stored_by);
769       heap_values[i].stored_by = Value::Unknown();
770     }
771 
772     // Note that there's no need to record the store as subsequent release store shouldn't be
773     // eliminated either.
774   }
775 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)776   void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
777     HInstruction* object = instruction->InputAt(0);
778     if (instruction->IsVolatile()) {
779       ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
780           heap_location_collector_.HuntForOriginalReference(object));
781       if (!ref_info->IsSingletonAndRemovable()) {
782         HandleAcquireLoad(instruction);
783         return;
784       }
785       // Treat it as a normal load if it is a removable singleton.
786     }
787 
788     const FieldInfo& field = instruction->GetFieldInfo();
789     VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
790   }
791 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)792   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
793     HInstruction* object = instruction->InputAt(0);
794     if (instruction->IsVolatile()) {
795       ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
796           heap_location_collector_.HuntForOriginalReference(object));
797       if (!ref_info->IsSingletonAndRemovable()) {
798         HandleReleaseStore(instruction);
799         return;
800       }
801       // Treat it as a normal store if it is a removable singleton.
802     }
803 
804     const FieldInfo& field = instruction->GetFieldInfo();
805     HInstruction* value = instruction->InputAt(1);
806     size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field);
807     VisitSetLocation(instruction, idx, value);
808   }
809 
VisitStaticFieldGet(HStaticFieldGet * instruction)810   void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
811     if (instruction->IsVolatile()) {
812       HandleAcquireLoad(instruction);
813       return;
814     }
815 
816     const FieldInfo& field = instruction->GetFieldInfo();
817     HInstruction* cls = instruction->InputAt(0);
818     VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
819   }
820 
VisitStaticFieldSet(HStaticFieldSet * instruction)821   void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
822     if (instruction->IsVolatile()) {
823       HandleReleaseStore(instruction);
824       return;
825     }
826 
827     const FieldInfo& field = instruction->GetFieldInfo();
828     HInstruction* cls = instruction->InputAt(0);
829     HInstruction* value = instruction->InputAt(1);
830     size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field);
831     VisitSetLocation(instruction, idx, value);
832   }
833 
VisitMonitorOperation(HMonitorOperation * monitor_op)834   void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
835     HInstruction* object = monitor_op->InputAt(0);
836     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
837         heap_location_collector_.HuntForOriginalReference(object));
838     if (ref_info->IsSingletonAndRemovable()) {
839       // If the object is a removable singleton, we know that no other threads will have
840       // access to it, and we can remove the MonitorOperation instruction.
841       // MONITOR_ENTER throws when encountering a null object. If `object` is a removable
842       // singleton, it is guaranteed to be non-null so we don't have to worry about the NullCheck.
843       DCHECK(!object->CanBeNull());
844       monitor_op->GetBlock()->RemoveInstruction(monitor_op);
845       MaybeRecordStat(stats_, MethodCompilationStat::kRemovedMonitorOp);
846       return;
847     }
848 
849     // We detected a monitor operation that we couldn't remove. See also LSEVisitor::Run().
850     monitor_op->GetBlock()->GetGraph()->SetHasMonitorOperations(true);
851     if (monitor_op->IsEnter()) {
852       HandleAcquireLoad(monitor_op);
853     } else {
854       HandleReleaseStore(monitor_op);
855     }
856   }
857 
VisitArrayGet(HArrayGet * instruction)858   void VisitArrayGet(HArrayGet* instruction) override {
859     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
860   }
861 
VisitArraySet(HArraySet * instruction)862   void VisitArraySet(HArraySet* instruction) override {
863     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
864     VisitSetLocation(instruction, idx, instruction->GetValue());
865   }
866 
VisitVecLoad(HVecLoad * instruction)867   void VisitVecLoad(HVecLoad* instruction) override {
868     DCHECK(!instruction->IsPredicated());
869     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
870   }
871 
VisitVecStore(HVecStore * instruction)872   void VisitVecStore(HVecStore* instruction) override {
873     DCHECK(!instruction->IsPredicated());
874     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
875     VisitSetLocation(instruction, idx, instruction->GetValue());
876   }
877 
VisitDeoptimize(HDeoptimize * instruction)878   void VisitDeoptimize(HDeoptimize* instruction) override {
879     // If we are in a try, even singletons are observable.
880     const bool inside_a_try = instruction->GetBlock()->IsTryBlock();
881     HBasicBlock* block = instruction->GetBlock();
882     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
883     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
884       Value* stored_by = &heap_values[i].stored_by;
885       if (stored_by->IsUnknown()) {
886         continue;
887       }
888       // Stores are generally observeable after deoptimization, except
889       // for singletons that don't escape in the deoptimization environment.
890       bool observable = true;
891       ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
892       if (!inside_a_try && info->IsSingleton()) {
893         HInstruction* reference = info->GetReference();
894         // Finalizable objects always escape.
895         const bool finalizable_object =
896             reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
897         if (!finalizable_object && !IsEscapingObject(info)) {
898           // Check whether the reference for a store is used by an environment local of
899           // the HDeoptimize. If not, the singleton is not observed after deoptimization.
900           const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
901           observable = std::any_of(
902               env_uses.begin(),
903               env_uses.end(),
904               [instruction](const HUseListNode<HEnvironment*>& use) {
905                 return use.GetUser()->GetHolder() == instruction;
906               });
907         }
908       }
909       if (observable) {
910         KeepStores(*stored_by);
911         *stored_by = Value::Unknown();
912       }
913     }
914   }
915 
916   // Keep necessary stores before exiting a method via return/throw.
HandleExit(HBasicBlock * block,bool must_keep_stores=false)917   void HandleExit(HBasicBlock* block, bool must_keep_stores = false) {
918     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
919     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
920       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
921       if (must_keep_stores || IsEscapingObject(ref_info)) {
922         KeepStores(heap_values[i].stored_by);
923         heap_values[i].stored_by = Value::Unknown();
924       }
925     }
926   }
927 
VisitReturn(HReturn * instruction)928   void VisitReturn(HReturn* instruction) override {
929     HandleExit(instruction->GetBlock());
930   }
931 
VisitReturnVoid(HReturnVoid * return_void)932   void VisitReturnVoid(HReturnVoid* return_void) override {
933     HandleExit(return_void->GetBlock());
934   }
935 
HandleThrowingInstruction(HInstruction * instruction)936   void HandleThrowingInstruction(HInstruction* instruction) {
937     DCHECK(instruction->CanThrow());
938     // If we are inside of a try, singletons can become visible since we may not exit the method.
939     HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock());
940   }
941 
VisitMethodEntryHook(HMethodEntryHook * method_entry)942   void VisitMethodEntryHook(HMethodEntryHook* method_entry) override {
943     HandleThrowingInstruction(method_entry);
944   }
945 
VisitMethodExitHook(HMethodExitHook * method_exit)946   void VisitMethodExitHook(HMethodExitHook* method_exit) override {
947     HandleThrowingInstruction(method_exit);
948   }
949 
VisitDivZeroCheck(HDivZeroCheck * div_zero_check)950   void VisitDivZeroCheck(HDivZeroCheck* div_zero_check) override {
951     HandleThrowingInstruction(div_zero_check);
952   }
953 
VisitNullCheck(HNullCheck * null_check)954   void VisitNullCheck(HNullCheck* null_check) override {
955     HandleThrowingInstruction(null_check);
956   }
957 
VisitBoundsCheck(HBoundsCheck * bounds_check)958   void VisitBoundsCheck(HBoundsCheck* bounds_check) override {
959     HandleThrowingInstruction(bounds_check);
960   }
961 
VisitLoadClass(HLoadClass * load_class)962   void VisitLoadClass(HLoadClass* load_class) override {
963     if (load_class->CanThrow()) {
964       HandleThrowingInstruction(load_class);
965     }
966   }
967 
VisitLoadString(HLoadString * load_string)968   void VisitLoadString(HLoadString* load_string) override {
969     if (load_string->CanThrow()) {
970       HandleThrowingInstruction(load_string);
971     }
972   }
973 
VisitLoadMethodHandle(HLoadMethodHandle * load_method_handle)974   void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override {
975     HandleThrowingInstruction(load_method_handle);
976   }
977 
VisitLoadMethodType(HLoadMethodType * load_method_type)978   void VisitLoadMethodType(HLoadMethodType* load_method_type) override {
979     HandleThrowingInstruction(load_method_type);
980   }
981 
VisitStringBuilderAppend(HStringBuilderAppend * sb_append)982   void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override {
983     HandleThrowingInstruction(sb_append);
984   }
985 
VisitThrow(HThrow * throw_instruction)986   void VisitThrow(HThrow* throw_instruction) override {
987     HandleThrowingInstruction(throw_instruction);
988   }
989 
VisitCheckCast(HCheckCast * check_cast)990   void VisitCheckCast(HCheckCast* check_cast) override {
991     HandleThrowingInstruction(check_cast);
992   }
993 
HandleInvoke(HInstruction * instruction)994   void HandleInvoke(HInstruction* instruction) {
995     // If `instruction` can throw we have to presume all stores are visible.
996     const bool can_throw = instruction->CanThrow();
997     // If we are in a try, even singletons are observable.
998     const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock();
999     SideEffects side_effects = instruction->GetSideEffects();
1000     ScopedArenaVector<ValueRecord>& heap_values =
1001         heap_values_for_[instruction->GetBlock()->GetBlockId()];
1002     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1003       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1004       // We don't need to do anything if the reference has not escaped at this point.
1005       // This is true if we never escape.
1006       if (!can_throw_inside_a_try && ref_info->IsSingleton()) {
1007         // Singleton references cannot be seen by the callee.
1008       } else {
1009         if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
1010           // Previous stores may become visible (read) and/or impossible for LSE to track (write).
1011           KeepStores(heap_values[i].stored_by);
1012           heap_values[i].stored_by = Value::Unknown();
1013         }
1014         if (side_effects.DoesAnyWrite()) {
1015           // The value may be clobbered.
1016           heap_values[i].value = Value::Unknown();
1017         }
1018       }
1019     }
1020   }
1021 
VisitInvoke(HInvoke * invoke)1022   void VisitInvoke(HInvoke* invoke) override {
1023     HandleInvoke(invoke);
1024   }
1025 
VisitClinitCheck(HClinitCheck * clinit)1026   void VisitClinitCheck(HClinitCheck* clinit) override {
1027     // Class initialization check can result in class initializer calling arbitrary methods.
1028     HandleInvoke(clinit);
1029   }
1030 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)1031   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
1032     // Conservatively treat it as an invocation.
1033     HandleInvoke(instruction);
1034   }
1035 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)1036   void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
1037     // Conservatively treat it as an invocation.
1038     HandleInvoke(instruction);
1039   }
1040 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)1041   void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
1042     // Conservatively treat it as an invocation.
1043     HandleInvoke(instruction);
1044   }
1045 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)1046   void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
1047     // Conservatively treat it as an invocation.
1048     HandleInvoke(instruction);
1049   }
1050 
VisitNewInstance(HNewInstance * new_instance)1051   void VisitNewInstance(HNewInstance* new_instance) override {
1052     // If we are in a try, even singletons are observable.
1053     const bool inside_a_try = new_instance->GetBlock()->IsTryBlock();
1054     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
1055     if (ref_info == nullptr) {
1056       // new_instance isn't used for field accesses. No need to process it.
1057       return;
1058     }
1059     if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
1060       DCHECK(!new_instance->IsFinalizable());
1061       // new_instance can potentially be eliminated.
1062       singleton_new_instances_.push_back(new_instance);
1063     }
1064     HBasicBlock* block = new_instance->GetBlock();
1065     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1066     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1067       ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1068       HInstruction* ref = info->GetReference();
1069       size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
1070       if (ref == new_instance) {
1071         if (offset >= mirror::kObjectHeaderSize ||
1072             MemberOffset(offset) == mirror::Object::MonitorOffset()) {
1073           // Instance fields except the header fields are set to default heap values.
1074           // The shadow$_monitor_ field is set to the default value however.
1075           heap_values[i].value = Value::Default();
1076           heap_values[i].stored_by = Value::Unknown();
1077         } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) {
1078           // The shadow$_klass_ field is special and has an actual value however.
1079           heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
1080           heap_values[i].stored_by = Value::Unknown();
1081         }
1082       } else if (inside_a_try || IsEscapingObject(info)) {
1083         // Since NewInstance can throw, we presume all previous stores could be visible.
1084         KeepStores(heap_values[i].stored_by);
1085         heap_values[i].stored_by = Value::Unknown();
1086       }
1087     }
1088   }
1089 
VisitNewArray(HNewArray * new_array)1090   void VisitNewArray(HNewArray* new_array) override {
1091     // If we are in a try, even singletons are observable.
1092     const bool inside_a_try = new_array->GetBlock()->IsTryBlock();
1093     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
1094     if (ref_info == nullptr) {
1095       // new_array isn't used for array accesses. No need to process it.
1096       return;
1097     }
1098     if (ref_info->IsSingletonAndRemovable()) {
1099       if (new_array->GetLength()->IsIntConstant() &&
1100           new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
1101         // new_array can potentially be eliminated.
1102         singleton_new_instances_.push_back(new_array);
1103       } else {
1104         // new_array may throw NegativeArraySizeException. Keep it.
1105       }
1106     }
1107     HBasicBlock* block = new_array->GetBlock();
1108     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1109     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1110       HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
1111       ReferenceInfo* info = location->GetReferenceInfo();
1112       HInstruction* ref = info->GetReference();
1113       if (ref == new_array && location->GetIndex() != nullptr) {
1114         // Array elements are set to default heap values.
1115         heap_values[i].value = Value::Default();
1116         heap_values[i].stored_by = Value::Unknown();
1117       } else if (inside_a_try || IsEscapingObject(info)) {
1118         // Since NewArray can throw, we presume all previous stores could be visible.
1119         KeepStores(heap_values[i].stored_by);
1120         heap_values[i].stored_by = Value::Unknown();
1121       }
1122     }
1123   }
1124 
VisitInstruction(HInstruction * instruction)1125   void VisitInstruction(HInstruction* instruction) override {
1126     // Throwing instructions must be handled specially.
1127     DCHECK(!instruction->CanThrow());
1128   }
1129 
1130   const HeapLocationCollector& heap_location_collector_;
1131 
1132   // Use local allocator for allocating memory.
1133   ScopedArenaAllocator allocator_;
1134 
1135   // The number of unique phi_placeholders there possibly are
1136   size_t num_phi_placeholders_;
1137 
1138   // One array of heap value records for each block.
1139   ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
1140 
1141   // We record loads and stores for re-processing when we find a loop Phi placeholder
1142   // with unknown value from a predecessor, and also for removing stores that are
1143   // found to be dead, i.e. not marked in `kept_stores_` at the end.
1144   struct LoadStoreRecord {
1145     HInstruction* load_or_store;
1146     size_t heap_location_index;
1147   };
1148   ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
1149 
1150   // We record the substitute instructions for loads that should be
1151   // eliminated but may be used by heap locations. They'll be removed
1152   // in the end. These are indexed by the load's id.
1153   ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
1154 
1155   // Record stores to keep in a bit vector indexed by instruction ID.
1156   ArenaBitVector kept_stores_;
1157   // When we need to keep all stores that feed a Phi placeholder, we just record the
1158   // index of that placeholder for processing after graph traversal.
1159   ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
1160 
1161   // Loads that would require a loop Phi to replace are recorded for processing
1162   // later as we do not have enough information from back-edges to determine if
1163   // a suitable Phi can be found or created when we visit these loads.
1164   // This is a flat "map" indexed by the load instruction id.
1165   ScopedArenaVector<ValueRecord*> loads_requiring_loop_phi_;
1166 
1167   // For stores, record the old value records that were replaced and the stored values.
1168   struct StoreRecord {
StoreRecordart::LSEVisitor::StoreRecord1169     StoreRecord(ValueRecord old_value_record_in, HInstruction* stored_value_in)
1170         : old_value_record(old_value_record_in), stored_value(stored_value_in) {}
1171     ValueRecord old_value_record;
1172     HInstruction* stored_value;
1173   };
1174   // This is a flat "map" indexed by the store instruction id.
1175   ScopedArenaVector<StoreRecord*> store_records_;
1176 
1177   // Replacements for Phi placeholders.
1178   // The invalid heap value is used to mark Phi placeholders that cannot be replaced.
1179   ScopedArenaVector<Value> phi_placeholder_replacements_;
1180 
1181   ScopedArenaVector<HInstruction*> singleton_new_instances_;
1182 
1183   // The field infos for each heap location (if relevant).
1184   ScopedArenaVector<const FieldInfo*> field_infos_;
1185 
1186   Phase current_phase_;
1187 
1188   friend std::ostream& operator<<(std::ostream& os, const Value& v);
1189   friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase);
1190 
1191   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
1192 };
1193 
operator <<(std::ostream & oss,const LSEVisitor::Phase & phase)1194 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
1195   switch (phase) {
1196     case LSEVisitor::Phase::kLoadElimination:
1197       return oss << "kLoadElimination";
1198     case LSEVisitor::Phase::kStoreElimination:
1199       return oss << "kStoreElimination";
1200   }
1201 }
1202 
operator ==(const LSEVisitor::PhiPlaceholder & p1,const LSEVisitor::PhiPlaceholder & p2)1203 constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1,
1204                           const LSEVisitor::PhiPlaceholder& p2) {
1205   return p1.Equals(p2);
1206 }
1207 
operator ==(const LSEVisitor::Value::NeedsLoopPhiMarker & p1,const LSEVisitor::Value::NeedsLoopPhiMarker & p2)1208 constexpr bool operator==(const LSEVisitor::Value::NeedsLoopPhiMarker& p1,
1209                           const LSEVisitor::Value::NeedsLoopPhiMarker& p2) {
1210   return p1.phi_ == p2.phi_;
1211 }
1212 
operator ==(const LSEVisitor::Value::NeedsNonLoopPhiMarker & p1,const LSEVisitor::Value::NeedsNonLoopPhiMarker & p2)1213 constexpr bool operator==(const LSEVisitor::Value::NeedsNonLoopPhiMarker& p1,
1214                           const LSEVisitor::Value::NeedsNonLoopPhiMarker& p2) {
1215   return p1.phi_ == p2.phi_;
1216 }
1217 
operator <<(std::ostream & oss,const LSEVisitor::PhiPlaceholder & p)1218 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PhiPlaceholder& p) {
1219   p.Dump(oss);
1220   return oss;
1221 }
1222 
ExactEquals(LSEVisitor::Value other) const1223 constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const {
1224   return value_ == other.value_;
1225 }
1226 
Equals(LSEVisitor::Value other) const1227 constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const {
1228   // Only valid values can be compared.
1229   DCHECK(IsValid());
1230   DCHECK(other.IsValid());
1231   if (value_ == other.value_) {
1232     // Note: Two unknown values are considered different.
1233     return !IsUnknown();
1234   } else {
1235     // Default is considered equal to zero-bit-pattern instructions.
1236     return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
1237             (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
1238   }
1239 }
1240 
Dump(std::ostream & os) const1241 std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
1242   if (std::holds_alternative<LSEVisitor::Value::ValuelessType>(value_)) {
1243     switch (GetValuelessType()) {
1244       case ValuelessType::kDefault:
1245         return os << "Default";
1246       case ValuelessType::kUnknown:
1247         return os << "Unknown";
1248       case ValuelessType::kInvalid:
1249         return os << "Invalid";
1250     }
1251   } else if (IsInstruction()) {
1252     return os << "Instruction[id: " << GetInstruction()->GetId()
1253               << ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]";
1254   } else if (NeedsLoopPhi()) {
1255     return os << "NeedsLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1256               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1257   } else {
1258     return os << "NeedsNonLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1259               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1260   }
1261 }
1262 
operator <<(std::ostream & os,const LSEVisitor::Value & v)1263 std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
1264   return v.Dump(os);
1265 }
1266 
LSEVisitor(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)1267 LSEVisitor::LSEVisitor(HGraph* graph,
1268                        const HeapLocationCollector& heap_location_collector,
1269                        OptimizingCompilerStats* stats)
1270     : HGraphDelegateVisitor(graph, stats),
1271       heap_location_collector_(heap_location_collector),
1272       allocator_(graph->GetArenaStack()),
1273       num_phi_placeholders_(GetGraph()->GetBlocks().size() *
1274                             heap_location_collector_.GetNumberOfHeapLocations()),
1275       heap_values_for_(graph->GetBlocks().size(),
1276                        ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1277                        allocator_.Adapter(kArenaAllocLSE)),
1278       loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
1279       // We may add new instructions (default values, Phis) but we're not adding loads
1280       // or stores, so we shall not need to resize following vector and BitVector.
1281       substitute_instructions_for_loads_(
1282           graph->GetCurrentInstructionId(), nullptr, allocator_.Adapter(kArenaAllocLSE)),
1283       kept_stores_(&allocator_,
1284                    /*start_bits=*/graph->GetCurrentInstructionId(),
1285                    /*expandable=*/false,
1286                    kArenaAllocLSE),
1287       phi_placeholders_to_search_for_kept_stores_(&allocator_,
1288                                                   num_phi_placeholders_,
1289                                                   /*expandable=*/false,
1290                                                   kArenaAllocLSE),
1291       loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1292       store_records_(allocator_.Adapter(kArenaAllocLSE)),
1293       phi_placeholder_replacements_(
1294           num_phi_placeholders_, Value::Invalid(), allocator_.Adapter(kArenaAllocLSE)),
1295       singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
1296       field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
1297                    allocator_.Adapter(kArenaAllocLSE)),
1298       current_phase_(Phase::kLoadElimination) {}
1299 
PrepareLoopValue(HBasicBlock * block,size_t idx)1300 LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1301   // If the pre-header value is known (which implies that the reference dominates this
1302   // block), use a Phi placeholder for the value in the loop header. If all predecessors
1303   // are later found to have a known value, we can replace loads from this location,
1304   // either with the pre-header value or with a new Phi. For array locations, the index
1305   // may be defined inside the loop but the only known value in that case should be the
1306   // default value or a Phi placeholder that can be replaced only with the default value.
1307   HLoopInformation* loop_info = block->GetLoopInformation();
1308   uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1309   Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1310   if (pre_header_value.IsUnknown()) {
1311     return pre_header_value;
1312   }
1313   if (kIsDebugBuild) {
1314     // Check that the reference indeed dominates this loop.
1315     HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1316     HInstruction* ref = location->GetReferenceInfo()->GetReference();
1317     CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
1318         << GetGraph()->PrettyMethod();
1319     // Check that the index, if defined inside the loop, tracks a default value
1320     // or a Phi placeholder requiring a loop Phi.
1321     HInstruction* index = location->GetIndex();
1322     if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
1323       CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
1324           << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
1325           << pre_header_value;
1326     }
1327   }
1328   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1329   return ReplacementOrValue(Value::ForLoopPhiPlaceholder(phi_placeholder));
1330 }
1331 
PrepareLoopStoredBy(HBasicBlock * block,size_t idx)1332 LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1333   // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1334   // if the value in the location escapes. This is not applicable to singletons that are
1335   // defined inside the loop as they shall be dead in the loop header.
1336   const ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1337   const HInstruction* reference = ref_info->GetReference();
1338   // Finalizable objects always escape.
1339   const bool is_finalizable =
1340       reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1341   if (ref_info->IsSingleton() &&
1342       block->GetLoopInformation()->Contains(*reference->GetBlock()) &&
1343       !is_finalizable) {
1344     return Value::Unknown();
1345   }
1346   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1347   return Value::ForLoopPhiPlaceholder(phi_placeholder);
1348 }
1349 
PrepareLoopRecords(HBasicBlock * block)1350 void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1351   DCHECK(block->IsLoopHeader());
1352   int block_id = block->GetBlockId();
1353   HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1354   ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1355       heap_values_for_[pre_header->GetBlockId()];
1356   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1357   DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1358   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1359   DCHECK(heap_values.empty());
1360 
1361   // Don't eliminate loads in irreducible loops.
1362   if (block->GetLoopInformation()->IsIrreducible()) {
1363     heap_values.resize(num_heap_locations,
1364                        {/*value=*/Value::Unknown(), /*stored_by=*/Value::Unknown()});
1365     // Also keep the stores before the loop header, including in blocks that were not visited yet.
1366     for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1367       KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1368     }
1369     return;
1370   }
1371 
1372   // Fill `heap_values` based on values from pre-header.
1373   heap_values.reserve(num_heap_locations);
1374   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1375     heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1376   }
1377 }
1378 
MergePredecessorValues(HBasicBlock * block,size_t idx)1379 LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1380   ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1381   DCHECK(!predecessors.empty());
1382   Value merged_value =
1383       ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1384   for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
1385     Value pred_value =
1386         ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
1387     if (pred_value.Equals(merged_value)) {
1388       // Value is the same. No need to update our merged value.
1389       continue;
1390     } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
1391       // If one is unknown and the other is not, the merged value is unknown.
1392       merged_value = Value::Unknown();
1393       break;
1394     } else {
1395       // There are conflicting known values. We may still be able to replace loads with a Phi.
1396       PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1397       // Propagate the need for a new loop Phi from all predecessors.
1398       bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1399       merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1400     }
1401   }
1402   return merged_value;
1403 }
1404 
MergePredecessorRecords(HBasicBlock * block)1405 void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1406   if (block->IsExitBlock()) {
1407     // Exit block doesn't really merge values since the control flow ends in
1408     // its predecessors. Each predecessor needs to make sure stores are kept
1409     // if necessary.
1410     return;
1411   }
1412 
1413   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1414   DCHECK(heap_values.empty());
1415   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1416   if (block->GetPredecessors().empty() || block->IsCatchBlock()) {
1417     DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock());
1418     heap_values.resize(num_heap_locations,
1419                        {/*value=*/Value::Unknown(), /*stored_by=*/Value::Unknown()});
1420     return;
1421   }
1422 
1423   heap_values.reserve(num_heap_locations);
1424   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1425     Value merged_value = MergePredecessorValues(block, idx);
1426     if (kIsDebugBuild) {
1427       if (merged_value.NeedsPhi()) {
1428         uint32_t block_id = merged_value.GetPhiPlaceholder().GetBlockId();
1429         CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1430       } else if (merged_value.IsInstruction()) {
1431         CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1432       }
1433     }
1434     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1435     Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
1436     for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1437       uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1438       Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1439       if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1440           !merged_stored_by.Equals(stored_by)) {
1441         // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1442         PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1443         merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1444         break;
1445       }
1446     }
1447     heap_values.push_back({ merged_value, merged_stored_by });
1448   }
1449 }
1450 
FindOrConstructNonLoopPhi(HBasicBlock * block,const ScopedArenaVector<HInstruction * > & phi_inputs,DataType::Type type)1451 static HInstruction* FindOrConstructNonLoopPhi(
1452     HBasicBlock* block,
1453     const ScopedArenaVector<HInstruction*>& phi_inputs,
1454     DataType::Type type) {
1455   for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1456     HInstruction* phi = phi_it.Current();
1457     DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1458     auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1459       return lhs == rhs.GetInstruction();
1460     };
1461     if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1462       return phi;
1463     }
1464   }
1465   ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1466   HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1467   for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1468     DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1469     phi->SetRawInputAt(i, phi_inputs[i]);
1470   }
1471   block->AddPhi(phi);
1472   if (type == DataType::Type::kReference) {
1473     // Update reference type information. Pass invalid handles, these are not used for Phis.
1474     ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1475                                        Handle<mirror::DexCache>(),
1476                                        /* is_first_run= */ false);
1477     rtp_fixup.Visit(phi);
1478   }
1479   return phi;
1480 }
1481 
MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder,DataType::Type type)1482 void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type) {
1483   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1484   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1485   size_t idx = phi_placeholder.GetHeapLocation();
1486 
1487   // Use local allocator to reduce peak memory usage.
1488   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1489   // Reuse the same vector for collecting phi inputs.
1490   ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1491 
1492   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1493   work_queue.push_back(phi_placeholder);
1494   while (!work_queue.empty()) {
1495     PhiPlaceholder current_phi_placeholder = work_queue.back();
1496     if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1497       // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1498       // that directly or indirectly depends on it, so it was already processed as part of the
1499       // other Phi placeholder's dependencies before this one got back to the top of the stack.
1500       work_queue.pop_back();
1501       continue;
1502     }
1503     uint32_t current_block_id = current_phi_placeholder.GetBlockId();
1504     HBasicBlock* current_block = blocks[current_block_id];
1505     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1506 
1507     // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1508     // And the only way for such merged value to reach a different heap location is through
1509     // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1510     // seen here are tied to one heap location.
1511     DCHECK(!current_block->IsLoopHeader())
1512         << current_phi_placeholder << " phase: " << current_phase_;
1513     DCHECK_EQ(current_phi_placeholder.GetHeapLocation(), idx);
1514 
1515     phi_inputs.clear();
1516     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1517       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1518       DCHECK(!pred_value.IsUnknown()) << pred_value << " block " << current_block->GetBlockId()
1519                                       << " pred: " << predecessor->GetBlockId();
1520       if (pred_value.NeedsNonLoopPhi()) {
1521         // We need to process the Phi placeholder first.
1522         work_queue.push_back(pred_value.GetPhiPlaceholder());
1523       } else if (pred_value.IsDefault()) {
1524         phi_inputs.push_back(GetDefaultValue(type));
1525       } else {
1526         DCHECK(pred_value.IsInstruction()) << pred_value << " block " << current_block->GetBlockId()
1527                                            << " pred: " << predecessor->GetBlockId();
1528         phi_inputs.push_back(pred_value.GetInstruction());
1529       }
1530     }
1531     if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1532       // All inputs are available. Find or construct the Phi replacement.
1533       phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1534           Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1535       // Remove the block from the queue.
1536       DCHECK_EQ(current_phi_placeholder, work_queue.back());
1537       work_queue.pop_back();
1538     }
1539   }
1540 }
1541 
VisitGetLocation(HInstruction * instruction,size_t idx)1542 void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1543   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1544   uint32_t block_id = instruction->GetBlock()->GetBlockId();
1545   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1546   ValueRecord& record = heap_values[idx];
1547   if (instruction->IsFieldAccess()) {
1548     RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1549   }
1550   DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1551   loads_and_stores_.push_back({ instruction, idx });
1552   if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1553       !IsDefaultOrPhiAllowedForLoad(instruction)) {
1554     record.value = Value::Unknown();
1555   }
1556   if (record.value.IsDefault()) {
1557     KeepStores(record.stored_by);
1558     HInstruction* constant = GetDefaultValue(instruction->GetType());
1559     AddRemovedLoad(instruction, constant);
1560     record.value = Value::ForInstruction(constant);
1561   } else if (record.value.IsUnknown()) {
1562     // Load isn't eliminated. Put the load as the value into the HeapLocation.
1563     // This acts like GVN but with better aliasing analysis.
1564     Value old_value = record.value;
1565     record.value = Value::ForInstruction(instruction);
1566     KeepStoresIfAliasedToLocation(heap_values, idx);
1567     KeepStores(old_value);
1568   } else if (record.value.NeedsLoopPhi()) {
1569     // We do not know yet if the value is known for all back edges. Record for future processing.
1570     if (loads_requiring_loop_phi_.empty()) {
1571       loads_requiring_loop_phi_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1572     }
1573     DCHECK_EQ(loads_requiring_loop_phi_[instruction->GetId()], nullptr);
1574     loads_requiring_loop_phi_[instruction->GetId()] =
1575         new (allocator_.Alloc<ValueRecord>(kArenaAllocLSE)) ValueRecord(record);
1576   } else {
1577     // This load can be eliminated but we may need to construct non-loop Phis.
1578     if (record.value.NeedsNonLoopPhi()) {
1579       MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1580       record.value = Replacement(record.value);
1581     }
1582     HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1583     AddRemovedLoad(instruction, heap_value);
1584   }
1585 }
1586 
VisitSetLocation(HInstruction * instruction,size_t idx,HInstruction * value)1587 void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1588   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1589   DCHECK(!IsStore(value)) << value->DebugName();
1590   if (instruction->IsFieldAccess()) {
1591     RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1592   }
1593   // value may already have a substitute.
1594   value = FindSubstitute(value);
1595   HBasicBlock* block = instruction->GetBlock();
1596   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1597   ValueRecord& record = heap_values[idx];
1598   DCHECK_IMPLIES(record.value.IsInstruction(),
1599                  FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1600 
1601   if (record.value.Equals(value)) {
1602     // Store into the heap location with the same value.
1603     // This store can be eliminated right away.
1604     block->RemoveInstruction(instruction);
1605     return;
1606   }
1607 
1608   if (store_records_.empty()) {
1609     store_records_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1610   }
1611   DCHECK_EQ(store_records_[instruction->GetId()], nullptr);
1612   store_records_[instruction->GetId()] =
1613       new (allocator_.Alloc<StoreRecord>(kArenaAllocLSE)) StoreRecord(record, value);
1614   loads_and_stores_.push_back({ instruction, idx });
1615 
1616   // If the `record.stored_by` specified a store from this block, it shall be removed
1617   // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1618   // `kept_stores_` anymore after we update the `record.stored_by` below.
1619   DCHECK(!record.stored_by.IsInstruction() ||
1620          record.stored_by.GetInstruction()->GetBlock() != block ||
1621          record.stored_by.GetInstruction()->CanThrow() ||
1622          !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1623 
1624   if (instruction->CanThrow()) {
1625     // Previous stores can become visible.
1626     HandleThrowingInstruction(instruction);
1627     // We cannot remove a possibly throwing store.
1628     // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1629     kept_stores_.SetBit(instruction->GetId());
1630   }
1631 
1632   // Update the record.
1633   // Note that the `value` can be a newly created `Phi` with an id that falls outside
1634   // the allocated `loads_requiring_loop_phi_` range.
1635   DCHECK_IMPLIES(IsLoad(value) && !loads_requiring_loop_phi_.empty(),
1636                  static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size());
1637   if (static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size() &&
1638       loads_requiring_loop_phi_[value->GetId()] != nullptr) {
1639     // Propapate the Phi placeholder to the record.
1640     record.value = loads_requiring_loop_phi_[value->GetId()]->value;
1641     DCHECK(record.value.NeedsLoopPhi());
1642   } else {
1643     record.value = Value::ForInstruction(value);
1644   }
1645   // Track the store in the value record. If the value is loaded or needed after
1646   // return/deoptimization later, this store isn't really redundant.
1647   record.stored_by = Value::ForInstruction(instruction);
1648 
1649   // This store may kill values in other heap locations due to aliasing.
1650   for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1651     if (i == idx ||
1652         heap_values[i].value.IsUnknown() ||
1653         CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1654         !heap_location_collector_.MayAlias(i, idx)) {
1655       continue;
1656     }
1657     // Kill heap locations that may alias and keep previous stores to these locations.
1658     KeepStores(heap_values[i].stored_by);
1659     heap_values[i].stored_by = Value::Unknown();
1660     heap_values[i].value = Value::Unknown();
1661   }
1662 }
1663 
VisitBasicBlock(HBasicBlock * block)1664 void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1665   // Populate the heap_values array for this block.
1666   // TODO: try to reuse the heap_values array from one predecessor if possible.
1667   if (block->IsLoopHeader()) {
1668     PrepareLoopRecords(block);
1669   } else {
1670     MergePredecessorRecords(block);
1671   }
1672   // Visit non-Phi instructions.
1673   VisitNonPhiInstructions(block);
1674 }
1675 
MayAliasOnBackEdge(HBasicBlock * loop_header,size_t idx1,size_t idx2) const1676 bool LSEVisitor::MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const {
1677   DCHECK_NE(idx1, idx2);
1678   DCHECK(loop_header->IsLoopHeader());
1679   if (heap_location_collector_.MayAlias(idx1, idx2)) {
1680     return true;
1681   }
1682   // For array locations with index defined inside the loop, include
1683   // all other locations in the array, even those that LSA declares
1684   // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
1685   // refer to the same locations for different iterations. (LSA's
1686   // `ComputeMayAlias()` does not consider different loop iterations.)
1687   HeapLocation* loc1 = heap_location_collector_.GetHeapLocation(idx1);
1688   HeapLocation* loc2 = heap_location_collector_.GetHeapLocation(idx2);
1689   if (loc1->IsArray() &&
1690       loc2->IsArray() &&
1691       HeapLocationCollector::CanReferencesAlias(loc1->GetReferenceInfo(),
1692                                                 loc2->GetReferenceInfo())) {
1693     HLoopInformation* loop_info = loop_header->GetLoopInformation();
1694     if (loop_info->Contains(*loc1->GetIndex()->GetBlock()) ||
1695         loop_info->Contains(*loc2->GetIndex()->GetBlock())) {
1696       // Consider the locations aliasing. Do not optimize the case where both indexes
1697       // are loop invariants defined inside the loop, rely on LICM to pull them out.
1698       return true;
1699     }
1700   }
1701   return false;
1702 }
1703 
TryReplacingLoopPhiPlaceholderWithDefault(PhiPlaceholder phi_placeholder,DataType::Type type,ArenaBitVector * phi_placeholders_to_materialize)1704 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
1705     PhiPlaceholder phi_placeholder,
1706     DataType::Type type,
1707     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1708   // Use local allocator to reduce peak memory usage.
1709   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1710   ArenaBitVector visited(&allocator,
1711                          /*start_bits=*/ num_phi_placeholders_,
1712                          /*expandable=*/ false,
1713                          kArenaAllocLSE);
1714   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1715 
1716   // Use depth first search to check if any non-Phi input is unknown.
1717   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1718   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1719   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1720   work_queue.push_back(phi_placeholder);
1721   while (!work_queue.empty()) {
1722     PhiPlaceholder current_phi_placeholder = work_queue.back();
1723     work_queue.pop_back();
1724     HBasicBlock* block = blocks[current_phi_placeholder.GetBlockId()];
1725     DCHECK_GE(block->GetPredecessors().size(), 2u);
1726     size_t idx = current_phi_placeholder.GetHeapLocation();
1727     for (HBasicBlock* predecessor : block->GetPredecessors()) {
1728       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1729       if (value.NeedsPhi()) {
1730         // Visit the predecessor Phi placeholder if it's not visited yet.
1731         if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1732           visited.SetBit(PhiPlaceholderIndex(value));
1733           work_queue.push_back(value.GetPhiPlaceholder());
1734         }
1735       } else if (!value.Equals(Value::Default())) {
1736         return false;  // Report failure.
1737       }
1738     }
1739     if (block->IsLoopHeader()) {
1740       // For back-edges we need to check all locations that write to the same array,
1741       // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
1742       // as they may actually refer to the same locations for different iterations.
1743       for (size_t i = 0; i != num_heap_locations; ++i) {
1744         if (i == idx ||
1745             heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
1746                 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
1747           continue;
1748         }
1749         for (HBasicBlock* predecessor : block->GetPredecessors()) {
1750           // Check if there were any writes to this location.
1751           // Note: We could simply process the values but due to the vector operation
1752           // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
1753           // the value to change and not be equal to default. To work around this and
1754           // allow replacing the non-vector load of loop-invariant default values
1755           // anyway, skip over paths that do not have any writes.
1756           ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
1757           while (record.stored_by.NeedsLoopPhi() &&
1758                  blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->IsLoopHeader()) {
1759             HLoopInformation* loop_info =
1760                 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->GetLoopInformation();
1761             record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
1762           }
1763           Value value = ReplacementOrValue(record.value);
1764           if (value.NeedsPhi()) {
1765             // Visit the predecessor Phi placeholder if it's not visited yet.
1766             if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1767               visited.SetBit(PhiPlaceholderIndex(value));
1768               work_queue.push_back(value.GetPhiPlaceholder());
1769             }
1770           } else if (!value.Equals(Value::Default())) {
1771             return false;  // Report failure.
1772           }
1773         }
1774       }
1775     }
1776   }
1777 
1778   // Record replacement and report success.
1779   HInstruction* replacement = GetDefaultValue(type);
1780   for (uint32_t phi_placeholder_index : visited.Indexes()) {
1781     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1782     PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
1783     HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
1784     // We use both vector and non vector operations to analyze the information. However, we replace
1785     // only non vector operations in this code path.
1786     if (!hl->IsVecOp()) {
1787       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1788       phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
1789     }
1790   }
1791   return true;
1792 }
1793 
TryReplacingLoopPhiPlaceholderWithSingleInput(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize)1794 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
1795     PhiPlaceholder phi_placeholder,
1796     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1797   // Use local allocator to reduce peak memory usage.
1798   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1799   ArenaBitVector visited(&allocator,
1800                          /*start_bits=*/ num_phi_placeholders_,
1801                          /*expandable=*/ false,
1802                          kArenaAllocLSE);
1803   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1804 
1805   // Use depth first search to check if any non-Phi input is unknown.
1806   HInstruction* replacement = nullptr;
1807   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1808   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1809   work_queue.push_back(phi_placeholder);
1810   while (!work_queue.empty()) {
1811     PhiPlaceholder current_phi_placeholder = work_queue.back();
1812     work_queue.pop_back();
1813     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
1814     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1815     size_t idx = current_phi_placeholder.GetHeapLocation();
1816     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1817       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1818       if (value.NeedsPhi()) {
1819         // Visit the predecessor Phi placeholder if it's not visited yet.
1820         if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1821           visited.SetBit(PhiPlaceholderIndex(value));
1822           work_queue.push_back(value.GetPhiPlaceholder());
1823         }
1824       } else {
1825         if (!value.IsInstruction() ||
1826             (replacement != nullptr && replacement != value.GetInstruction())) {
1827           return false;  // Report failure.
1828         }
1829         replacement = value.GetInstruction();
1830       }
1831     }
1832     // While `TryReplacingLoopPhiPlaceholderWithDefault()` has special treatment
1833     // for back-edges, it is not needed here. When looking for a single input
1834     // instruction coming from before the loop, the array index must also be
1835     // defined before the loop and the aliasing analysis done by LSA is sufficient.
1836     // Any writes of a different value with an index that is not loop invariant
1837     // would invalidate the heap location in `VisitSetLocation()`.
1838   }
1839 
1840   // Record replacement and report success.
1841   DCHECK(replacement != nullptr);
1842   for (uint32_t phi_placeholder_index : visited.Indexes()) {
1843     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1844     PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
1845     HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
1846     // We use both vector and non vector operations to analyze the information. However, we replace
1847     // only vector operations in this code path.
1848     if (hl->IsVecOp()) {
1849       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1850       phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
1851     }
1852   }
1853   return true;
1854 }
1855 
FindLoopPhisToMaterialize(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize,DataType::Type type,bool can_use_default_or_phi)1856 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::FindLoopPhisToMaterialize(
1857     PhiPlaceholder phi_placeholder,
1858     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize,
1859     DataType::Type type,
1860     bool can_use_default_or_phi) {
1861   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1862 
1863   // Use local allocator to reduce peak memory usage.
1864   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1865   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1866 
1867   // Use depth first search to check if any non-Phi input is unknown.
1868   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1869   phi_placeholders_to_materialize->ClearAllBits();
1870   phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
1871   work_queue.push_back(phi_placeholder);
1872   while (!work_queue.empty()) {
1873     PhiPlaceholder current_phi_placeholder = work_queue.back();
1874     work_queue.pop_back();
1875     if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
1876       // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
1877       DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
1878                  Value::Default()));
1879       continue;
1880     }
1881     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
1882     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1883     size_t idx = current_phi_placeholder.GetHeapLocation();
1884     if (current_block->IsLoopHeader()) {
1885       // If the index is defined inside the loop, it may reference different elements of the
1886       // array on each iteration. Since we do not track if all elements of an array are set
1887       // to the same value explicitly, the only known value in pre-header can be the default
1888       // value from NewArray or a Phi placeholder depending on a default value from some outer
1889       // loop pre-header. This Phi placeholder can be replaced only by the default value.
1890       HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
1891       if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
1892         if (can_use_default_or_phi &&
1893             TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
1894                                                       type,
1895                                                       phi_placeholders_to_materialize)) {
1896           continue;
1897         } else {
1898           return current_phi_placeholder;  // Report the loop Phi placeholder.
1899         }
1900       }
1901       // A similar situation arises with the index defined outside the loop if we cannot use
1902       // default values or Phis, i.e. for vector loads, as we can only replace the Phi
1903       // placeholder with a single instruction defined before the loop.
1904       if (!can_use_default_or_phi) {
1905         DCHECK(index != nullptr);  // Vector operations are array operations.
1906         if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
1907                                                           phi_placeholders_to_materialize)) {
1908           continue;
1909         } else {
1910           return current_phi_placeholder;  // Report the loop Phi placeholder.
1911         }
1912       }
1913     }
1914     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1915       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
1916       Value value = ReplacementOrValue(heap_values[idx].value);
1917       if (value.IsUnknown()) {
1918         // We cannot create a Phi for this loop Phi placeholder.
1919         return current_phi_placeholder;  // Report the loop Phi placeholder.
1920       }
1921       // For arrays, the location may have been clobbered by writes to other locations
1922       // in a loop that LSA does not consider aliasing, such as `a[i]` and `a[i + 1]`.
1923       if (current_block->IsLoopHeader() &&
1924           predecessor != current_block->GetLoopInformation()->GetPreHeader() &&
1925           heap_location_collector_.GetHeapLocation(idx)->GetIndex() != nullptr) {
1926         for (size_t i = 0, size = heap_values.size(); i != size; ++i) {
1927           if (i != idx &&
1928               !heap_values[i].stored_by.IsUnknown() &&
1929               MayAliasOnBackEdge(current_block, idx, i)) {
1930             // We cannot create a Phi for this loop Phi placeholder.
1931             return current_phi_placeholder;
1932           }
1933         }
1934       }
1935       if (value.NeedsLoopPhi()) {
1936         // Visit the predecessor Phi placeholder if it's not visited yet.
1937         if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
1938           phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
1939           work_queue.push_back(value.GetPhiPlaceholder());
1940           LSE_VLOG << "For materialization of " << phi_placeholder
1941                    << " we need to materialize " << value;
1942         }
1943       }
1944     }
1945   }
1946 
1947   // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
1948   return std::nullopt;
1949 }
1950 
MaterializeLoopPhis(const ScopedArenaVector<size_t> & phi_placeholder_indexes,DataType::Type type)1951 bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
1952                                      DataType::Type type) {
1953   return MaterializeLoopPhis(ArrayRef<const size_t>(phi_placeholder_indexes), type);
1954 }
1955 
MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,DataType::Type type)1956 bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,
1957                                      DataType::Type type) {
1958   // Materialize all predecessors that do not need a loop Phi and determine if all inputs
1959   // other than loop Phis are the same.
1960   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1961   std::optional<Value> other_value = std::nullopt;
1962   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1963     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
1964     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
1965     DCHECK_GE(block->GetPredecessors().size(), 2u);
1966     size_t idx = phi_placeholder.GetHeapLocation();
1967     for (HBasicBlock* predecessor : block->GetPredecessors()) {
1968       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1969       if (value.NeedsNonLoopPhi()) {
1970         DCHECK(current_phase_ == Phase::kLoadElimination) << current_phase_;
1971         MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
1972         value = Replacement(value);
1973       }
1974       if (!value.NeedsLoopPhi()) {
1975         if (!other_value) {
1976           // The first other value we found.
1977           other_value = value;
1978         } else if (!other_value->IsInvalid()) {
1979           // Check if the current `value` differs from the previous `other_value`.
1980           if (!value.Equals(*other_value)) {
1981             other_value = Value::Invalid();
1982           }
1983         }
1984       }
1985     }
1986   }
1987 
1988   DCHECK(other_value.has_value());
1989   if (!other_value->IsInvalid()) {
1990     HInstruction* replacement =
1991         (other_value->IsDefault()) ? GetDefaultValue(type) : other_value->GetInstruction();
1992     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1993       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1994     }
1995     return true;
1996   }
1997 
1998   // If we're materializing only a single Phi, try to match it with an existing Phi.
1999   // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
2000   // This also covers the case when after replacing a previous set of Phi placeholders,
2001   // we continue with a Phi placeholder that does not really need a loop Phi anymore.
2002   if (phi_placeholder_indexes.size() == 1u) {
2003     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[0]);
2004     size_t idx = phi_placeholder.GetHeapLocation();
2005     HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder.GetBlockId()];
2006     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
2007     for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
2008       HInstruction* phi = phi_it.Current();
2009       DCHECK_EQ(phi->InputCount(), predecessors.size());
2010       ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
2011       auto cmp = [=, this](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
2012         Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
2013         if (value.NeedsPhi()) {
2014           DCHECK(value.GetPhiPlaceholder() == phi_placeholder);
2015           return lhs.GetInstruction() == phi;
2016         } else {
2017           DCHECK(value.IsDefault() || value.IsInstruction());
2018           return value.Equals(lhs.GetInstruction());
2019         }
2020       };
2021       if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
2022         phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
2023         return true;
2024       }
2025     }
2026   }
2027 
2028   if (current_phase_ == Phase::kStoreElimination) {
2029     // We're not creating Phis during the final store elimination phase.
2030     return false;
2031   }
2032 
2033   // There are different inputs to the Phi chain. Create the Phis.
2034   ArenaAllocator* allocator = GetGraph()->GetAllocator();
2035   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2036     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2037     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2038     CHECK_GE(block->GetPredecessors().size(), 2u);
2039     phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
2040         new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
2041   }
2042   // Fill the Phi inputs.
2043   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2044     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2045     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2046     size_t idx = phi_placeholder.GetHeapLocation();
2047     HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
2048     DCHECK(DataType::IsTypeConversionImplicit(type, phi->GetType()))
2049         << "type=" << type << " vs phi-type=" << phi->GetType();
2050     for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
2051       HBasicBlock* predecessor = block->GetPredecessors()[i];
2052       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2053       HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
2054       DCHECK_NE(input->GetType(), DataType::Type::kVoid);
2055       phi->SetRawInputAt(i, input);
2056       DCHECK(DataType::IsTypeConversionImplicit(input->GetType(), phi->GetType()))
2057           << " input: " << input->GetType() << value << " phi: " << phi->GetType()
2058           << " request: " << type;
2059     }
2060   }
2061   // Add the Phis to their blocks.
2062   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2063     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2064     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2065     block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
2066   }
2067   if (type == DataType::Type::kReference) {
2068     ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
2069     ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
2070     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2071       phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
2072     }
2073     // Update reference type information. Pass invalid handles, these are not used for Phis.
2074     ReferenceTypePropagation rtp_fixup(GetGraph(),
2075                                        Handle<mirror::DexCache>(),
2076                                        /* is_first_run= */ false);
2077     rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
2078   }
2079 
2080   return true;
2081 }
2082 
MaterializeLoopPhis(const ArenaBitVector & phi_placeholders_to_materialize,DataType::Type type)2083 bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
2084                                      DataType::Type type) {
2085   // Use local allocator to reduce peak memory usage.
2086   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2087 
2088   // We want to recognize when a subset of these loop Phis that do not need other
2089   // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
2090   // i.e. that instruction can be used instead of each Phi in the set. See for example
2091   // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
2092   // materialize these loop Phis from the smallest transitive closure.
2093 
2094   // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
2095   // assign new indexes to the Phi placeholders, making the matrix dense.
2096   ScopedArenaVector<size_t> matrix_indexes(num_phi_placeholders_,
2097                                            static_cast<size_t>(-1),  // Invalid.
2098                                            allocator.Adapter(kArenaAllocLSE));
2099   ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
2100   size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
2101   phi_placeholder_indexes.reserve(num_phi_placeholders);
2102   for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
2103     matrix_indexes[marker_index] = phi_placeholder_indexes.size();
2104     phi_placeholder_indexes.push_back(marker_index);
2105   }
2106   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2107   ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
2108   dependencies.reserve(num_phi_placeholders);
2109   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2110     static constexpr bool kExpandable = false;
2111     dependencies.push_back(
2112         ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
2113     ArenaBitVector* current_dependencies = dependencies.back();
2114     current_dependencies->SetBit(matrix_index);  // Count the Phi placeholder as its own dependency.
2115     PhiPlaceholder current_phi_placeholder =
2116         GetPhiPlaceholderAt(phi_placeholder_indexes[matrix_index]);
2117     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2118     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2119     size_t idx = current_phi_placeholder.GetHeapLocation();
2120     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2121       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2122       if (pred_value.NeedsLoopPhi()) {
2123         size_t pred_value_index = PhiPlaceholderIndex(pred_value);
2124         DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
2125         DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
2126         current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
2127       }
2128     }
2129   }
2130 
2131   // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
2132   for (size_t k = 0; k != num_phi_placeholders; ++k) {
2133     for (size_t i = 0; i != num_phi_placeholders; ++i) {
2134       for (size_t j = 0; j != num_phi_placeholders; ++j) {
2135         if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
2136           dependencies[i]->SetBit(j);
2137         }
2138       }
2139     }
2140   }
2141 
2142   // Count the number of transitive dependencies for each replaceable Phi placeholder.
2143   ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
2144   num_dependencies.reserve(num_phi_placeholders);
2145   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2146     num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
2147   }
2148 
2149   // Pick a Phi placeholder with the smallest number of transitive dependencies and
2150   // materialize it and its dependencies. Repeat until we have materialized all.
2151   ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
2152   current_subset.reserve(num_phi_placeholders);
2153   size_t remaining_phi_placeholders = num_phi_placeholders;
2154   while (remaining_phi_placeholders != 0u) {
2155     auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
2156     DCHECK_LE(*it, remaining_phi_placeholders);
2157     size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
2158     ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
2159     size_t current_num_dependencies = num_dependencies[current_matrix_index];
2160     current_subset.clear();
2161     for (uint32_t matrix_index : current_dependencies->Indexes()) {
2162       current_subset.push_back(phi_placeholder_indexes[matrix_index]);
2163     }
2164     if (!MaterializeLoopPhis(current_subset, type)) {
2165       DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2166       // This is the final store elimination phase and we shall not be able to eliminate any
2167       // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
2168       for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2169         if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2170           DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
2171           phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] = Value::Unknown();
2172         }
2173       }
2174       return false;
2175     }
2176     for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2177       if (current_dependencies->IsBitSet(matrix_index)) {
2178         // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
2179         // so that they shall never be the minimum again.
2180         num_dependencies[matrix_index] = num_phi_placeholders;
2181       } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2182         // Remove dependencies from other Phi placeholders.
2183         dependencies[matrix_index]->Subtract(current_dependencies);
2184         num_dependencies[matrix_index] -= current_num_dependencies;
2185       }
2186     }
2187     remaining_phi_placeholders -= current_num_dependencies;
2188   }
2189   return true;
2190 }
2191 
FullyMaterializePhi(PhiPlaceholder phi_placeholder,DataType::Type type)2192 bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) {
2193   ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
2194   ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE);
2195   auto res =
2196       FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true);
2197   CHECK(!res.has_value()) << *res;
2198   return MaterializeLoopPhis(abv, type);
2199 }
2200 
TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,HInstruction * load)2201 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::TryToMaterializeLoopPhis(
2202     PhiPlaceholder phi_placeholder, HInstruction* load) {
2203   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2204 
2205   // Use local allocator to reduce peak memory usage.
2206   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2207 
2208   // Find Phi placeholders to materialize.
2209   ArenaBitVector phi_placeholders_to_materialize(
2210       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2211   DataType::Type type = load->GetType();
2212   bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
2213   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2214       phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
2215   if (loop_phi_with_unknown_input) {
2216     DCHECK_GE(GetGraph()
2217                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2218                   ->GetPredecessors()
2219                   .size(),
2220               2u);
2221     return loop_phi_with_unknown_input;  // Return failure.
2222   }
2223 
2224   DCHECK_EQ(current_phase_, Phase::kLoadElimination);
2225   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2226   DCHECK(success);
2227 
2228   // Report success.
2229   return std::nullopt;
2230 }
2231 
2232 // Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
2233 // find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
2234 // propagate the load(s) as the new value(s) to successors; this may uncover new elimination
2235 // opportunities. If we find no such load, we shall at least propagate an unknown value to some
2236 // heap location that is needed by another loop Phi placeholder.
ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input)2237 void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input) {
2238   DCHECK(!loads_requiring_loop_phi_.empty());
2239   size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
2240   DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
2241   phi_placeholder_replacements_[loop_phi_with_unknown_input_index] = Value::Unknown();
2242 
2243   uint32_t block_id = loop_phi_with_unknown_input.GetBlockId();
2244   const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
2245   size_t reverse_post_order_index = 0;
2246   size_t reverse_post_order_size = reverse_post_order.size();
2247   size_t loads_and_stores_index = 0u;
2248   size_t loads_and_stores_size = loads_and_stores_.size();
2249 
2250   // Skip blocks and instructions before the block containing the loop phi with unknown input.
2251   DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2252   while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
2253     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2254     while (loads_and_stores_index != loads_and_stores_size &&
2255            loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
2256       ++loads_and_stores_index;
2257     }
2258     ++reverse_post_order_index;
2259     DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2260   }
2261 
2262   // Use local allocator to reduce peak memory usage.
2263   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2264   // Reuse one temporary vector for all remaining blocks.
2265   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2266   ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
2267 
2268   auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
2269     Value value;
2270     if (block->IsLoopHeader()) {
2271       if (block->GetLoopInformation()->IsIrreducible()) {
2272         value = Value::Unknown();
2273       } else {
2274         value = PrepareLoopValue(block, idx);
2275       }
2276     } else {
2277       value = MergePredecessorValues(block, idx);
2278     }
2279     DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
2280     return value;
2281   };
2282 
2283   // Process remaining blocks and instructions.
2284   bool found_unreplaceable_load = false;
2285   bool replaced_heap_value_with_unknown = false;
2286   for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
2287     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2288     if (block->IsExitBlock()) {
2289       continue;
2290     }
2291 
2292     // We shall reconstruct only the heap values that we need for processing loads and stores.
2293     local_heap_values.clear();
2294     local_heap_values.resize(num_heap_locations, Value::Invalid());
2295 
2296     for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
2297       HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
2298       size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
2299       if (load_or_store->GetBlock() != block) {
2300         break;  // End of instructions from the current block.
2301       }
2302       if (IsStore(load_or_store)) {
2303         StoreRecord* store_record = store_records_[load_or_store->GetId()];
2304         DCHECK(store_record != nullptr);
2305         HInstruction* stored_value = store_record->stored_value;
2306         DCHECK(stored_value != nullptr);
2307         // Note that the `stored_value` can be a newly created `Phi` with an id that falls
2308         // outside the allocated `loads_requiring_loop_phi_` range.
2309         DCHECK_IMPLIES(
2310             IsLoad(stored_value),
2311             static_cast<size_t>(stored_value->GetId()) < loads_requiring_loop_phi_.size());
2312         if (static_cast<size_t>(stored_value->GetId()) >= loads_requiring_loop_phi_.size() ||
2313             loads_requiring_loop_phi_[stored_value->GetId()] == nullptr) {
2314           continue;  // This store never needed a loop Phi.
2315         }
2316         ValueRecord* record = loads_requiring_loop_phi_[stored_value->GetId()];
2317         // Process the store by updating `local_heap_values[idx]`. The last update shall
2318         // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
2319         // at the end of the block.
2320         Value replacement = ReplacementOrValue(record->value);
2321         if (replacement.NeedsLoopPhi()) {
2322           // No replacement yet, use the Phi placeholder from the load.
2323           DCHECK(record->value.NeedsLoopPhi());
2324           local_heap_values[idx] = record->value;
2325         } else {
2326           // If the load fetched a known value, use it, otherwise use the load.
2327           local_heap_values[idx] = Value::ForInstruction(
2328               replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
2329         }
2330       } else {
2331         // Process the load unless it has previously been marked unreplaceable.
2332         DCHECK(IsLoad(load_or_store));
2333         ValueRecord* record = loads_requiring_loop_phi_[load_or_store->GetId()];
2334         if (record == nullptr) {
2335           continue;  // This load never needed a loop Phi.
2336         }
2337         if (record->value.NeedsLoopPhi()) {
2338           if (local_heap_values[idx].IsInvalid()) {
2339             local_heap_values[idx] = get_initial_value(block, idx);
2340           }
2341           if (local_heap_values[idx].IsUnknown()) {
2342             // This load cannot be replaced. Keep stores that feed the Phi placeholder
2343             // (no aliasing since then, otherwise the Phi placeholder would not have been
2344             // propagated as a value to this load) and store the load as the new heap value.
2345             found_unreplaceable_load = true;
2346             KeepStores(record->value);
2347             record->value = Value::Unknown();
2348             local_heap_values[idx] = Value::ForInstruction(load_or_store);
2349           } else if (local_heap_values[idx].NeedsLoopPhi()) {
2350             // The load may still be replaced with a Phi later.
2351             DCHECK(local_heap_values[idx].Equals(record->value));
2352           } else {
2353             // This load can be eliminated but we may need to construct non-loop Phis.
2354             if (local_heap_values[idx].NeedsNonLoopPhi()) {
2355               MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
2356                                      load_or_store->GetType());
2357               local_heap_values[idx] = Replacement(local_heap_values[idx]);
2358             }
2359             record->value = local_heap_values[idx];
2360             DCHECK(local_heap_values[idx].IsDefault() || local_heap_values[idx].IsInstruction())
2361                 << "The replacement heap value can be an HIR instruction or the default value.";
2362             HInstruction* heap_value = local_heap_values[idx].IsDefault() ?
2363                                            GetDefaultValue(load_or_store->GetType()) :
2364                                            local_heap_values[idx].GetInstruction();
2365             AddRemovedLoad(load_or_store, heap_value);
2366           }
2367         }
2368       }
2369     }
2370 
2371     // All heap values that previously needed a loop Phi at the end of the block
2372     // need to be updated for processing successors.
2373     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
2374     for (size_t idx = 0; idx != num_heap_locations; ++idx) {
2375       if (heap_values[idx].value.NeedsLoopPhi()) {
2376         if (local_heap_values[idx].IsValid()) {
2377           heap_values[idx].value = local_heap_values[idx];
2378         } else {
2379           heap_values[idx].value = get_initial_value(block, idx);
2380         }
2381         if (heap_values[idx].value.IsUnknown()) {
2382           replaced_heap_value_with_unknown = true;
2383         }
2384       }
2385     }
2386   }
2387   DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2388 }
2389 
ProcessLoadsRequiringLoopPhis()2390 void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2391   // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2392   // make the result of the processing depend on the order in which we process these loads.
2393   // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2394   // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2395   if (loads_requiring_loop_phi_.empty()) {
2396     return;  // No loads to process.
2397   }
2398   for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2399     ValueRecord* record = loads_requiring_loop_phi_[load_store_record.load_or_store->GetId()];
2400     if (record == nullptr) {
2401       continue;
2402     }
2403     HInstruction* load = load_store_record.load_or_store;
2404     while (record->value.NeedsLoopPhi() &&
2405            phi_placeholder_replacements_[PhiPlaceholderIndex(record->value)].IsInvalid()) {
2406       std::optional<PhiPlaceholder> loop_phi_with_unknown_input =
2407           TryToMaterializeLoopPhis(record->value.GetPhiPlaceholder(), load);
2408       DCHECK_EQ(loop_phi_with_unknown_input.has_value(),
2409                 phi_placeholder_replacements_[PhiPlaceholderIndex(record->value)].IsInvalid());
2410       if (loop_phi_with_unknown_input) {
2411         DCHECK_GE(GetGraph()
2412                       ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2413                       ->GetPredecessors()
2414                       .size(),
2415                   2u);
2416         ProcessLoopPhiWithUnknownInput(*loop_phi_with_unknown_input);
2417       }
2418     }
2419     // The load could have been marked as unreplaceable (and stores marked for keeping)
2420     // or marked for replacement with an instruction in ProcessLoopPhiWithUnknownInput().
2421     DCHECK(record->value.IsUnknown() ||
2422            record->value.IsInstruction() ||
2423            record->value.NeedsLoopPhi());
2424     if (record->value.NeedsLoopPhi()) {
2425       record->value = Replacement(record->value);
2426       HInstruction* heap_value = record->value.GetInstruction();
2427       AddRemovedLoad(load, heap_value);
2428     }
2429   }
2430 }
2431 
SearchPhiPlaceholdersForKeptStores()2432 void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2433   ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2434   size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2435   work_queue.reserve(((start_size * 3u) + 1u) / 2u);  // Reserve 1.5x start size, rounded up.
2436   for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2437     work_queue.push_back(index);
2438   }
2439   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2440   while (!work_queue.empty()) {
2441     uint32_t cur_phi_idx = work_queue.back();
2442     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx);
2443     work_queue.pop_back();
2444     size_t idx = phi_placeholder.GetHeapLocation();
2445     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2446     DCHECK(block != nullptr) << cur_phi_idx << " phi: " << phi_placeholder
2447                              << " (blocks: " << blocks.size() << ")";
2448     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2449       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2450       // For loop back-edges we must also preserve all stores to locations that
2451       // may alias with the location `idx`.
2452       // TODO: Add tests cases around this.
2453       bool is_back_edge =
2454           block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2455       size_t start = is_back_edge ? 0u : idx;
2456       size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2457       for (size_t i = start; i != end; ++i) {
2458         Value stored_by = heap_values[i].stored_by;
2459         if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) {
2460           if (stored_by.NeedsPhi()) {
2461             size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
2462             if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2463               phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2464               work_queue.push_back(phi_placeholder_index);
2465             }
2466           } else {
2467             DCHECK(IsStore(stored_by.GetInstruction()));
2468             ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
2469             DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
2470                                   << " id: " << stored_by.GetInstruction()->GetId() << " block: "
2471                                   << stored_by.GetInstruction()->GetBlock()->GetBlockId();
2472             kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2473           }
2474         }
2475       }
2476     }
2477   }
2478 }
2479 
UpdateValueRecordForStoreElimination(ValueRecord * value_record)2480 void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2481   while (value_record->stored_by.IsInstruction() &&
2482          !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2483     StoreRecord* store_record = store_records_[value_record->stored_by.GetInstruction()->GetId()];
2484     DCHECK(store_record != nullptr);
2485     *value_record = store_record->old_value_record;
2486   }
2487   if (value_record->stored_by.NeedsPhi() &&
2488       !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2489            PhiPlaceholderIndex(value_record->stored_by))) {
2490     // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2491     // Phi placeholder to recalculate the actual value.
2492     value_record->value = value_record->stored_by;
2493   }
2494   value_record->value = ReplacementOrValue(value_record->value);
2495   if (value_record->value.NeedsNonLoopPhi()) {
2496     // Treat all Phi placeholders as requiring loop Phis at this point.
2497     // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2498     value_record->value = Value::ForLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2499   }
2500 }
2501 
FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,DataType::Type type)2502 void LSEVisitor::FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,
2503                                                DataType::Type type) {
2504   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2505 
2506   // Use local allocator to reduce peak memory usage.
2507   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2508   ArenaBitVector visited(&allocator,
2509                          /*start_bits=*/ num_phi_placeholders_,
2510                          /*expandable=*/ false,
2511                          kArenaAllocLSE);
2512 
2513   // Find Phi placeholders to try and match against existing Phis or other replacement values.
2514   ArenaBitVector phi_placeholders_to_materialize(
2515       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2516   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2517       phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/true);
2518   if (loop_phi_with_unknown_input) {
2519     DCHECK_GE(GetGraph()
2520                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2521                   ->GetPredecessors()
2522                   .size(),
2523               2u);
2524     // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2525     phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::Unknown();
2526     phi_placeholder_replacements_[PhiPlaceholderIndex(*loop_phi_with_unknown_input)] =
2527         Value::Unknown();
2528     return;
2529   }
2530 
2531   DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2532   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2533   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2534   DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2535             !success);
2536 }
2537 
FindStoresWritingOldValues()2538 void LSEVisitor::FindStoresWritingOldValues() {
2539   // The Phi placeholder replacements have so far been used for eliminating loads,
2540   // tracking values that would be stored if all stores were kept. As we want to
2541   // compare actual old values after removing unmarked stores, prune the Phi
2542   // placeholder replacements that can be fed by values we may not actually store.
2543   // Replacements marked as unknown can be kept as they are fed by some unknown
2544   // value and would end up as unknown again if we recalculated them.
2545   for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2546     if (!phi_placeholder_replacements_[i].IsUnknown() &&
2547         !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2548       phi_placeholder_replacements_[i] = Value::Invalid();
2549     }
2550   }
2551 
2552   // Update heap values at end of blocks.
2553   for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2554     for (ValueRecord& value_record : heap_values_for_[block->GetBlockId()]) {
2555       UpdateValueRecordForStoreElimination(&value_record);
2556     }
2557   }
2558 
2559   // Use local allocator to reduce peak memory usage.
2560   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2561   // Mark the stores we want to eliminate in a separate bit vector.
2562   ArenaBitVector eliminated_stores(&allocator,
2563                                    /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2564                                    /*expandable=*/ false,
2565                                    kArenaAllocLSE);
2566 
2567   for (uint32_t store_id : kept_stores_.Indexes()) {
2568     DCHECK(kept_stores_.IsBitSet(store_id));
2569     StoreRecord* store_record = store_records_[store_id];
2570     DCHECK(store_record != nullptr);
2571     UpdateValueRecordForStoreElimination(&store_record->old_value_record);
2572     if (store_record->old_value_record.value.NeedsPhi()) {
2573       DataType::Type type = store_record->stored_value->GetType();
2574       FindOldValueForPhiPlaceholder(store_record->old_value_record.value.GetPhiPlaceholder(), type);
2575       store_record->old_value_record.value =
2576           ReplacementOrValue(store_record->old_value_record.value);
2577     }
2578     DCHECK(!store_record->old_value_record.value.NeedsPhi());
2579     HInstruction* stored_value = FindSubstitute(store_record->stored_value);
2580     if (store_record->old_value_record.value.Equals(stored_value)) {
2581       eliminated_stores.SetBit(store_id);
2582     }
2583   }
2584 
2585   // Commit the stores to eliminate by removing them from `kept_stores_`.
2586   kept_stores_.Subtract(&eliminated_stores);
2587 }
2588 
Run()2589 void LSEVisitor::Run() {
2590   // 0. Set HasMonitorOperations to false. If we encounter some MonitorOperations that we can't
2591   // remove, we will set it to true in VisitMonitorOperation.
2592   GetGraph()->SetHasMonitorOperations(false);
2593 
2594   // 1. Process blocks and instructions in reverse post order.
2595   for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2596     VisitBasicBlock(block);
2597   }
2598 
2599   // 2. Process loads that require loop Phis, trying to find/create replacements.
2600   current_phase_ = Phase::kLoadElimination;
2601   ProcessLoadsRequiringLoopPhis();
2602 
2603   // 3. Determine which stores to keep and which to eliminate.
2604   current_phase_ = Phase::kStoreElimination;
2605   // Finish marking stores for keeping.
2606   SearchPhiPlaceholdersForKeptStores();
2607 
2608   // Find stores that write the same value as is already present in the location.
2609   FindStoresWritingOldValues();
2610 
2611   // 4. Replace loads and remove unnecessary stores and singleton allocations.
2612   FinishFullLSE();
2613 }
2614 
2615 
FinishFullLSE()2616 void LSEVisitor::FinishFullLSE() {
2617   // Remove recorded load instructions that should be eliminated.
2618   for (const LoadStoreRecord& record : loads_and_stores_) {
2619     size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
2620     HInstruction* substitute = substitute_instructions_for_loads_[id];
2621     if (substitute == nullptr) {
2622       continue;
2623     }
2624     HInstruction* load = record.load_or_store;
2625     DCHECK(load != nullptr);
2626     DCHECK(IsLoad(load));
2627     DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
2628     // We proactively retrieve the substitute for a removed load, so
2629     // a load that has a substitute should not be observed as a heap
2630     // location value.
2631     DCHECK_EQ(FindSubstitute(substitute), substitute);
2632 
2633     load->ReplaceWith(substitute);
2634     load->GetBlock()->RemoveInstruction(load);
2635     if ((load->IsInstanceFieldGet() && load->AsInstanceFieldGet()->IsVolatile()) ||
2636         (load->IsStaticFieldGet() && load->AsStaticFieldGet()->IsVolatile())) {
2637       MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileLoad);
2638     }
2639   }
2640 
2641   // Remove all the stores we can.
2642   for (const LoadStoreRecord& record : loads_and_stores_) {
2643     if (IsStore(record.load_or_store) && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
2644       record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
2645       if ((record.load_or_store->IsInstanceFieldSet() &&
2646            record.load_or_store->AsInstanceFieldSet()->IsVolatile()) ||
2647           (record.load_or_store->IsStaticFieldSet() &&
2648            record.load_or_store->AsStaticFieldSet()->IsVolatile())) {
2649         MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileStore);
2650       }
2651     }
2652   }
2653 
2654   // Eliminate singleton-classified instructions:
2655   //   * - Constructor fences (they never escape this thread).
2656   //   * - Allocations (if they are unused).
2657   for (HInstruction* new_instance : singleton_new_instances_) {
2658     size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
2659     MaybeRecordStat(stats_,
2660                     MethodCompilationStat::kConstructorFenceRemovedLSE,
2661                     removed);
2662 
2663     if (!new_instance->HasNonEnvironmentUses()) {
2664       new_instance->RemoveEnvironmentUsers();
2665       new_instance->GetBlock()->RemoveInstruction(new_instance);
2666       MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
2667     }
2668   }
2669 }
2670 
2671 // The LSEVisitor is a ValueObject (indirectly through base classes) and therefore
2672 // cannot be directly allocated with an arena allocator, so we need to wrap it.
2673 class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
2674  public:
LSEVisitorWrapper(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)2675   LSEVisitorWrapper(HGraph* graph,
2676                     const HeapLocationCollector& heap_location_collector,
2677                     OptimizingCompilerStats* stats)
2678       : lse_visitor_(graph, heap_location_collector, stats) {}
2679 
Run()2680   void Run() {
2681     lse_visitor_.Run();
2682   }
2683 
2684  private:
2685   LSEVisitor lse_visitor_;
2686 };
2687 
Run()2688 bool LoadStoreElimination::Run() {
2689   if (graph_->IsDebuggable()) {
2690     // Debugger may set heap values or trigger deoptimization of callers.
2691     // Skip this optimization.
2692     return false;
2693   }
2694   ScopedArenaAllocator allocator(graph_->GetArenaStack());
2695   LoadStoreAnalysis lsa(graph_, stats_, &allocator);
2696   lsa.Run();
2697   const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
2698   if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
2699     // No HeapLocation information from LSA, skip this optimization.
2700     return false;
2701   }
2702 
2703   // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
2704   // memory operations with different predicates.
2705   // TODO: support predicated SIMD.
2706   if (graph_->HasPredicatedSIMD()) {
2707     return false;
2708   }
2709 
2710   std::unique_ptr<LSEVisitorWrapper> lse_visitor(
2711       new (&allocator) LSEVisitorWrapper(graph_, heap_location_collector, stats_));
2712   lse_visitor->Run();
2713   return true;
2714 }
2715 
2716 #undef LSE_VLOG
2717 
2718 }  // namespace art
2719