1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "load_store_elimination.h"
18
19 #include <algorithm>
20 #include <optional>
21 #include <sstream>
22 #include <variant>
23
24 #include "base/arena_allocator.h"
25 #include "base/arena_bit_vector.h"
26 #include "base/array_ref.h"
27 #include "base/bit_vector-inl.h"
28 #include "base/bit_vector.h"
29 #include "base/globals.h"
30 #include "base/indenter.h"
31 #include "base/iteration_range.h"
32 #include "base/scoped_arena_allocator.h"
33 #include "base/scoped_arena_containers.h"
34 #include "base/transform_iterator.h"
35 #include "escape.h"
36 #include "handle.h"
37 #include "load_store_analysis.h"
38 #include "mirror/class_loader.h"
39 #include "mirror/dex_cache.h"
40 #include "nodes.h"
41 #include "oat/stack_map.h"
42 #include "optimizing_compiler_stats.h"
43 #include "reference_type_propagation.h"
44 #include "side_effects_analysis.h"
45
46 /**
47 * The general algorithm of load-store elimination (LSE).
48 *
49 * We use load-store analysis to collect a list of heap locations and perform
50 * alias analysis of those heap locations. LSE then keeps track of a list of
51 * heap values corresponding to the heap locations and stores that put those
52 * values in these locations.
53 * - In phase 1, we visit basic blocks in reverse post order and for each basic
54 * block, visit instructions sequentially, recording heap values and looking
55 * for loads and stores to eliminate without relying on loop Phis.
56 * - In phase 2, we look for loads that can be replaced by creating loop Phis
57 * or using a loop-invariant value.
58 * - In phase 3, we determine which stores are dead and can be eliminated and
59 * based on that information we re-evaluate whether some kept stores are
60 * storing the same value as the value in the heap location; such stores are
61 * also marked for elimination.
62 * - In phase 4, we commit the changes, replacing loads marked for elimination
63 * in previous processing and removing stores not marked for keeping. We also
64 * remove allocations that are no longer needed.
65 * - In phase 5, we move allocations which only escape along some executions
66 * closer to their escape points and fixup non-escaping paths with their actual
67 * values, creating PHIs when needed.
68 *
69 * 1. Walk over blocks and their instructions.
70 *
71 * The initial set of heap values for a basic block is
72 * - For a loop header of an irreducible loop, all heap values are unknown.
73 * - For a loop header of a normal loop, all values unknown at the end of the
74 * preheader are initialized to unknown, other heap values are set to Phi
75 * placeholders as we cannot determine yet whether these values are known on
76 * all back-edges. We use Phi placeholders also for array heap locations with
77 * index defined inside the loop but this helps only when the value remains
78 * zero from the array allocation throughout the loop.
79 * - For catch blocks, we clear all assumptions since we arrived due to an
80 * instruction throwing.
81 * - For other basic blocks, we merge incoming values from the end of all
82 * predecessors. If any incoming value is unknown, the start value for this
83 * block is also unknown. Otherwise, if all the incoming values are the same
84 * (including the case of a single predecessor), the incoming value is used.
85 * Otherwise, we use a Phi placeholder to indicate different incoming values.
86 * We record whether such Phi placeholder depends on a loop Phi placeholder.
87 *
88 * For each instruction in the block
89 * - If the instruction is a load from a heap location with a known value not
90 * dependent on a loop Phi placeholder, the load can be eliminated, either by
91 * using an existing instruction or by creating new Phi(s) instead. In order
92 * to maintain the validity of all heap locations during the optimization
93 * phase, we only record substitutes at this phase and the real elimination
94 * is delayed till the end of LSE. Loads that require a loop Phi placeholder
95 * replacement are recorded for processing later.
96 * - If the instruction is a store, it updates the heap value for the heap
97 * location with the stored value and records the store itself so that we can
98 * mark it for keeping if the value becomes observable. Heap values are
99 * invalidated for heap locations that may alias with the store instruction's
100 * heap location and their recorded stores are marked for keeping as they are
101 * now potentially observable. The store instruction can be eliminated unless
102 * the value stored is later needed e.g. by a load from the same/aliased heap
103 * location or the heap location persists at method return/deoptimization.
104 * - A store that stores the same value as the heap value is eliminated.
105 * - For newly instantiated instances, their heap values are initialized to
106 * language defined default values.
107 * - Finalizable objects are considered as persisting at method
108 * return/deoptimization.
109 * - Some instructions such as invokes are treated as loading and invalidating
110 * all the heap values, depending on the instruction's side effects.
111 * - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
112 * partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
113 * alias and no load/store is eliminated in such case.
114 *
115 * The time complexity of the initial phase has several components. The total
116 * time for the initialization of heap values for all blocks is
117 * O(heap_locations * edges)
118 * and the time complexity for simple instruction processing is
119 * O(instructions).
120 * See the description of phase 3 for additional complexity due to matching of
121 * existing Phis for replacing loads.
122 *
123 * 2. Process loads that depend on loop Phi placeholders.
124 *
125 * We go over these loads to determine whether they can be eliminated. We look
126 * for the set of all Phi placeholders that feed the load and depend on a loop
127 * Phi placeholder and, if we find no unknown value, we construct the necessary
128 * Phi(s) or, if all other inputs are identical, i.e. the location does not
129 * change in the loop, just use that input. If we do find an unknown input, this
130 * must be from a loop back-edge and we replace the loop Phi placeholder with
131 * unknown value and re-process loads and stores that previously depended on
132 * loop Phi placeholders. This shall find at least one load of an unknown value
133 * which is now known to be unreplaceable or a new unknown value on a back-edge
134 * and we repeat this process until each load is either marked for replacement
135 * or found to be unreplaceable. As we mark at least one additional loop Phi
136 * placeholder as unreplacable in each iteration, this process shall terminate.
137 *
138 * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
139 * is limited by the number of Phi placeholders and their dependencies we need
140 * to search with worst-case time complexity
141 * O(phi_placeholder_dependencies) .
142 * The dependencies are usually just the Phi placeholders' potential inputs,
143 * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
144 * replacement search, there are additional dependencies to consider, see below.
145 *
146 * In the successful case (no unknown inputs found) we use the Floyd-Warshall
147 * algorithm to determine transitive closures for each found Phi placeholder,
148 * and then match or materialize Phis from the smallest transitive closure,
149 * so that we can determine if such subset has a single other input. This has
150 * time complexity
151 * O(phi_placeholders_found^3) .
152 * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
153 * contribute to this as such Phi placeholders are replaced immediately.
154 * The total time of all such successful cases has time complexity
155 * O(phi_placeholders^3)
156 * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
157 * argument applies to the searches used to find all successful cases, so their
158 * total contribution is also just an insignificant
159 * O(phi_placeholder_dependencies) .
160 * The materialization of Phis has an insignificant total time complexity
161 * O(phi_placeholders * edges) .
162 *
163 * If we find an unknown input, we re-process heap values and loads with a time
164 * complexity that's the same as the phase 1 in the worst case. Adding this to
165 * the depth-first search time complexity yields
166 * O(phi_placeholder_dependencies + heap_locations * edges + instructions)
167 * for a single iteration. We can ignore the middle term as it's proprotional
168 * to the number of Phi placeholder inputs included in the first term. Using
169 * the upper limit of number of such iterations, the total time complexity is
170 * O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
171 *
172 * The upper bound of Phi placeholder inputs is
173 * heap_locations * edges
174 * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
175 * include other heap locations in predecessor blocks with the upper bound of
176 * heap_locations^2 * edges .
177 * Using the estimate
178 * edges <= blocks^2
179 * and
180 * phi_placeholders <= heap_locations * blocks ,
181 * the worst-case time complexity of the
182 * O(phi_placeholder_dependencies * phi_placeholders)
183 * term from unknown input cases is actually
184 * O(heap_locations^3 * blocks^3) ,
185 * exactly as the estimate for the Floyd-Warshall parts of successful cases.
186 * Adding the other term from the unknown input cases (to account for the case
187 * with significantly more instructions than blocks and heap locations), the
188 * phase 2 time complexity is
189 * O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
190 *
191 * See the description of phase 3 for additional complexity due to matching of
192 * existing Phis for replacing loads.
193 *
194 * 3. Determine which stores to keep and which to eliminate.
195 *
196 * During instruction processing in phase 1 and re-processing in phase 2, we are
197 * keeping a record of the stores and Phi placeholders that become observable
198 * and now propagate the observable Phi placeholders to all actual stores that
199 * feed them. Having determined observable stores, we look for stores that just
200 * overwrite the old value with the same. Since ignoring non-observable stores
201 * actually changes the old values in heap locations, we need to recalculate
202 * Phi placeholder replacements but we proceed similarly to the previous phase.
203 * We look for the set of all Phis that feed the old value replaced by the store
204 * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
205 * value, we try to match existing Phis (we do not create new Phis anymore) or,
206 * if all other inputs are identical, i.e. the location does not change in the
207 * loop, just use that input. If this succeeds and the old value is identical to
208 * the value we're storing, such store shall be eliminated.
209 *
210 * The work is similar to the phase 2, except that we're not re-processing loads
211 * and stores anymore, so the time complexity of phase 3 is
212 * O(heap_locations^3 * blocks^3) .
213 *
214 * There is additional complexity in matching existing Phis shared between the
215 * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
216 * time (this could be difficult and slow), so each matching attempt is just
217 * looking at Phis in the block (both old Phis and newly created Phis) and their
218 * inputs. As we create at most `heap_locations` Phis in each block, the upper
219 * bound on the number of Phis we look at is
220 * heap_locations * (old_phis + heap_locations)
221 * and the worst-case time complexity is
222 * O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
223 * The first term is lower than one term in phase 2, so the relevant part is
224 * O(heap_locations * old_phis * edges) .
225 *
226 * 4. Replace loads and remove unnecessary stores and singleton allocations.
227 *
228 * A special type of objects called singletons are instantiated in the method
229 * and have a single name, i.e. no aliases. Singletons have exclusive heap
230 * locations since they have no aliases. Singletons are helpful in narrowing
231 * down the life span of a heap location such that they do not always need to
232 * participate in merging heap values. Allocation of a singleton can be
233 * eliminated if that singleton is not used and does not persist at method
234 * return/deoptimization.
235 *
236 * The time complexity of this phase is
237 * O(instructions + instruction_uses) .
238 *
239 * FIXME: The time complexities described above assumes that the
240 * HeapLocationCollector finds a heap location for an instruction in O(1)
241 * time but it is currently O(heap_locations); this can be fixed by adding
242 * a hash map to the HeapLocationCollector.
243 */
244
245 namespace art HIDDEN {
246
247 #define LSE_VLOG \
248 if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
249
250 class HeapRefHolder;
251
252 // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
253 class LSEVisitor final : private HGraphDelegateVisitor {
254 public:
255 LSEVisitor(HGraph* graph,
256 const HeapLocationCollector& heap_location_collector,
257 OptimizingCompilerStats* stats);
258
259 void Run();
260
261 private:
262 class PhiPlaceholder {
263 public:
PhiPlaceholder()264 constexpr PhiPlaceholder() : block_id_(-1), heap_location_(-1) {}
PhiPlaceholder(uint32_t block_id,size_t heap_location)265 constexpr PhiPlaceholder(uint32_t block_id, size_t heap_location)
266 : block_id_(block_id), heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
267
268 constexpr PhiPlaceholder(const PhiPlaceholder& p) = default;
269 constexpr PhiPlaceholder(PhiPlaceholder&& p) = default;
270 constexpr PhiPlaceholder& operator=(const PhiPlaceholder& p) = default;
271 constexpr PhiPlaceholder& operator=(PhiPlaceholder&& p) = default;
272
GetBlockId() const273 constexpr uint32_t GetBlockId() const {
274 return block_id_;
275 }
276
GetHeapLocation() const277 constexpr size_t GetHeapLocation() const {
278 return heap_location_;
279 }
280
Equals(const PhiPlaceholder & p2) const281 constexpr bool Equals(const PhiPlaceholder& p2) const {
282 return block_id_ == p2.block_id_ && heap_location_ == p2.heap_location_;
283 }
284
Dump(std::ostream & oss) const285 void Dump(std::ostream& oss) const {
286 oss << "PhiPlaceholder[blk: " << block_id_ << ", heap_location_: " << heap_location_ << "]";
287 }
288
289 private:
290 uint32_t block_id_;
291 uint32_t heap_location_;
292 };
293
294 friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2);
295 friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2);
296
297 class Value {
298 public:
299 enum class ValuelessType {
300 kInvalid,
301 kUnknown,
302 kDefault,
303 };
304 struct NeedsNonLoopPhiMarker {
305 PhiPlaceholder phi_;
306 };
307 struct NeedsLoopPhiMarker {
308 PhiPlaceholder phi_;
309 };
310
Invalid()311 static constexpr Value Invalid() {
312 return Value(ValuelessType::kInvalid);
313 }
314
315 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
316 // A heap location can be set to an unknown heap value when:
317 // - it is coming from outside the method,
318 // - it is killed due to aliasing, or side effects, or merging with an unknown value.
Unknown()319 static constexpr Value Unknown() {
320 return Value(ValuelessType::kUnknown);
321 }
322
323 // Default heap value after an allocation.
324 // A heap location can be set to that value right after an allocation.
Default()325 static constexpr Value Default() {
326 return Value(ValuelessType::kDefault);
327 }
328
ForInstruction(HInstruction * instruction)329 static constexpr Value ForInstruction(HInstruction* instruction) {
330 return Value(instruction);
331 }
332
ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)333 static constexpr Value ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
334 return Value(NeedsNonLoopPhiMarker{phi_placeholder});
335 }
336
ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)337 static constexpr Value ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
338 return Value(NeedsLoopPhiMarker{phi_placeholder});
339 }
340
ForPhiPlaceholder(PhiPlaceholder phi_placeholder,bool needs_loop_phi)341 static constexpr Value ForPhiPlaceholder(PhiPlaceholder phi_placeholder, bool needs_loop_phi) {
342 return needs_loop_phi ? ForLoopPhiPlaceholder(phi_placeholder)
343 : ForNonLoopPhiPlaceholder(phi_placeholder);
344 }
345
IsValid() const346 constexpr bool IsValid() const {
347 return !IsInvalid();
348 }
349
IsInvalid() const350 constexpr bool IsInvalid() const {
351 return std::holds_alternative<ValuelessType>(value_) &&
352 GetValuelessType() == ValuelessType::kInvalid;
353 }
354
IsUnknown() const355 bool IsUnknown() const {
356 return std::holds_alternative<ValuelessType>(value_) &&
357 GetValuelessType() == ValuelessType::kUnknown;
358 }
359
IsDefault() const360 bool IsDefault() const {
361 return std::holds_alternative<ValuelessType>(value_) &&
362 GetValuelessType() == ValuelessType::kDefault;
363 }
364
IsInstruction() const365 bool IsInstruction() const {
366 return std::holds_alternative<HInstruction*>(value_);
367 }
368
NeedsNonLoopPhi() const369 bool NeedsNonLoopPhi() const {
370 return std::holds_alternative<NeedsNonLoopPhiMarker>(value_);
371 }
372
NeedsLoopPhi() const373 bool NeedsLoopPhi() const {
374 return std::holds_alternative<NeedsLoopPhiMarker>(value_);
375 }
376
NeedsPhi() const377 bool NeedsPhi() const {
378 return NeedsNonLoopPhi() || NeedsLoopPhi();
379 }
380
GetInstruction() const381 HInstruction* GetInstruction() const {
382 DCHECK(IsInstruction()) << *this;
383 return std::get<HInstruction*>(value_);
384 }
385
GetPhiPlaceholder() const386 PhiPlaceholder GetPhiPlaceholder() const {
387 DCHECK(NeedsPhi());
388 if (NeedsNonLoopPhi()) {
389 return std::get<NeedsNonLoopPhiMarker>(value_).phi_;
390 } else {
391 return std::get<NeedsLoopPhiMarker>(value_).phi_;
392 }
393 }
394
GetHeapLocation() const395 size_t GetHeapLocation() const {
396 DCHECK(NeedsPhi()) << this;
397 return GetPhiPlaceholder().GetHeapLocation();
398 }
399
400 constexpr bool ExactEquals(Value other) const;
401
402 constexpr bool Equals(Value other) const;
403
Equals(HInstruction * instruction) const404 constexpr bool Equals(HInstruction* instruction) const {
405 return Equals(ForInstruction(instruction));
406 }
407
408 std::ostream& Dump(std::ostream& os) const;
409
410 // Public for use with lists.
Value()411 constexpr Value() : value_(ValuelessType::kInvalid) {}
412
413 private:
414 using ValueHolder = std::variant<ValuelessType,
415 HInstruction*,
416 NeedsNonLoopPhiMarker,
417 NeedsLoopPhiMarker>;
GetValuelessType() const418 constexpr ValuelessType GetValuelessType() const {
419 return std::get<ValuelessType>(value_);
420 }
421
Value(ValueHolder v)422 constexpr explicit Value(ValueHolder v) : value_(v) {}
423
424 friend std::ostream& operator<<(std::ostream& os, const Value& v);
425
426 ValueHolder value_;
427
428 static_assert(std::is_move_assignable<PhiPlaceholder>::value);
429 };
430
431 friend constexpr bool operator==(const Value::NeedsLoopPhiMarker& p1,
432 const Value::NeedsLoopPhiMarker& p2);
433 friend constexpr bool operator==(const Value::NeedsNonLoopPhiMarker& p1,
434 const Value::NeedsNonLoopPhiMarker& p2);
435
436 // Get Phi placeholder index for access to `phi_placeholder_replacements_`
437 // and "visited" bit vectors during depth-first searches.
PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const438 size_t PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const {
439 size_t res =
440 phi_placeholder.GetBlockId() * heap_location_collector_.GetNumberOfHeapLocations() +
441 phi_placeholder.GetHeapLocation();
442 DCHECK_EQ(phi_placeholder, GetPhiPlaceholderAt(res))
443 << res << "blks: " << GetGraph()->GetBlocks().size()
444 << " hls: " << heap_location_collector_.GetNumberOfHeapLocations();
445 return res;
446 }
447
PhiPlaceholderIndex(Value phi_placeholder) const448 size_t PhiPlaceholderIndex(Value phi_placeholder) const {
449 return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
450 }
451
IsEscapingObject(ReferenceInfo * info)452 bool IsEscapingObject(ReferenceInfo* info) { return !info->IsSingletonAndRemovable(); }
453
GetPhiPlaceholderAt(size_t off) const454 PhiPlaceholder GetPhiPlaceholderAt(size_t off) const {
455 DCHECK_LT(off, num_phi_placeholders_);
456 size_t id = off % heap_location_collector_.GetNumberOfHeapLocations();
457 // Technically this should be (off - id) / NumberOfHeapLocations
458 // but due to truncation it's all the same.
459 size_t blk_id = off / heap_location_collector_.GetNumberOfHeapLocations();
460 return GetPhiPlaceholder(blk_id, id);
461 }
462
GetPhiPlaceholder(uint32_t block_id,size_t idx) const463 PhiPlaceholder GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
464 DCHECK(GetGraph()->GetBlocks()[block_id] != nullptr) << block_id;
465 return PhiPlaceholder(block_id, idx);
466 }
467
Replacement(Value value) const468 Value Replacement(Value value) const {
469 DCHECK(value.NeedsPhi()) << value << " phase: " << current_phase_;
470 Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
471 DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
472 DCHECK(replacement.IsUnknown() ||
473 FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
474 return replacement;
475 }
476
ReplacementOrValue(Value value) const477 Value ReplacementOrValue(Value value) const {
478 if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
479 return Replacement(value);
480 } else {
481 DCHECK_IMPLIES(value.IsInstruction(),
482 FindSubstitute(value.GetInstruction()) == value.GetInstruction());
483 return value;
484 }
485 }
486
487 // The record of a heap value and instruction(s) that feed that value.
488 struct ValueRecord {
489 Value value;
490 Value stored_by;
491 };
492
FindOrAddTypeConversionIfNecessary(HInstruction * instruction,HInstruction * value,DataType::Type expected_type)493 HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
494 HInstruction* value,
495 DataType::Type expected_type) {
496 // Should never add type conversion into boolean value.
497 if (expected_type == DataType::Type::kBool ||
498 DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
499 // TODO: This prevents type conversion of default values but we can still insert
500 // type conversion of other constants and there is no constant folding pass after LSE.
501 IsZeroBitPattern(value)) {
502 return nullptr;
503 }
504
505 // Check if there is already a suitable TypeConversion we can reuse.
506 for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
507 if (use.GetUser()->IsTypeConversion() &&
508 use.GetUser()->GetType() == expected_type &&
509 // TODO: We could move the TypeConversion to a common dominator
510 // if it does not cross irreducible loop header.
511 use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
512 // Don't share across irreducible loop headers.
513 // TODO: can be more fine-grained than this by testing each dominator.
514 (use.GetUser()->GetBlock() == instruction->GetBlock() ||
515 !GetGraph()->HasIrreducibleLoops())) {
516 if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
517 use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
518 // Move the TypeConversion before the instruction.
519 use.GetUser()->MoveBefore(instruction);
520 }
521 DCHECK(use.GetUser()->StrictlyDominates(instruction));
522 return use.GetUser()->AsTypeConversion();
523 }
524 }
525
526 // We must create a new TypeConversion instruction.
527 HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
528 expected_type, value, instruction->GetDexPc());
529 instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
530 return type_conversion;
531 }
532
533 // Find an instruction's substitute if it's a removed load.
534 // Return the same instruction if it should not be removed.
FindSubstitute(HInstruction * instruction) const535 HInstruction* FindSubstitute(HInstruction* instruction) const {
536 size_t id = static_cast<size_t>(instruction->GetId());
537 if (id >= substitute_instructions_for_loads_.size()) {
538 // New Phi (may not be in the graph yet), or default value.
539 DCHECK(!IsLoad(instruction));
540 return instruction;
541 }
542 HInstruction* substitute = substitute_instructions_for_loads_[id];
543 DCHECK(substitute == nullptr || IsLoad(instruction));
544 return (substitute != nullptr) ? substitute : instruction;
545 }
546
AddRemovedLoad(HInstruction * load,HInstruction * heap_value)547 void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
548 DCHECK(IsLoad(load));
549 DCHECK_EQ(FindSubstitute(load), load);
550 DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
551 "Unexpected heap_value that has a substitute " << heap_value->DebugName();
552
553 // The load expects to load the heap value as type load->GetType().
554 // However the tracked heap value may not be of that type. An explicit
555 // type conversion may be needed.
556 // There are actually three types involved here:
557 // (1) tracked heap value's type (type A)
558 // (2) heap location (field or element)'s type (type B)
559 // (3) load's type (type C)
560 // We guarantee that type A stored as type B and then fetched out as
561 // type C is the same as casting from type A to type C directly, since
562 // type B and type C will have the same size which is guaranteed in
563 // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
564 // So we only need one type conversion from type A to type C.
565 HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
566 load, heap_value, load->GetType());
567
568 substitute_instructions_for_loads_[load->GetId()] =
569 type_conversion != nullptr ? type_conversion : heap_value;
570 }
571
IsLoad(HInstruction * instruction)572 static bool IsLoad(HInstruction* instruction) {
573 // Unresolved load is not treated as a load.
574 return instruction->IsInstanceFieldGet() ||
575 instruction->IsStaticFieldGet() ||
576 instruction->IsVecLoad() ||
577 instruction->IsArrayGet();
578 }
579
IsStore(HInstruction * instruction)580 static bool IsStore(HInstruction* instruction) {
581 // Unresolved store is not treated as a store.
582 return instruction->IsInstanceFieldSet() ||
583 instruction->IsArraySet() ||
584 instruction->IsVecStore() ||
585 instruction->IsStaticFieldSet();
586 }
587
588 // Check if it is allowed to use default values or Phis for the specified load.
IsDefaultOrPhiAllowedForLoad(HInstruction * instruction)589 static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
590 DCHECK(IsLoad(instruction));
591 // Using defaults for VecLoads requires to create additional vector operations.
592 // As there are some issues with scheduling vector operations it is better to avoid creating
593 // them.
594 return !instruction->IsVecOperation();
595 }
596
597 // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
598 // This is necessary if the stored heap value can be observed.
KeepStores(Value value)599 void KeepStores(Value value) {
600 if (value.IsUnknown()) {
601 return;
602 }
603 if (value.NeedsPhi()) {
604 phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
605 } else {
606 HInstruction* instruction = value.GetInstruction();
607 DCHECK(IsStore(instruction));
608 kept_stores_.SetBit(instruction->GetId());
609 }
610 }
611
612 // If a heap location X may alias with heap location at `loc_index`
613 // and heap_values of that heap location X holds a store, keep that store.
614 // It's needed for a dependent load that's not eliminated since any store
615 // that may put value into the load's heap location needs to be kept.
KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord> & heap_values,size_t loc_index)616 void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
617 size_t loc_index) {
618 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
619 if (i == loc_index) {
620 // We use this function when reading a location with unknown value and
621 // therefore we cannot know what exact store wrote that unknown value.
622 // But we can have a phi placeholder here marking multiple stores to keep.
623 DCHECK(!heap_values[i].stored_by.IsInstruction());
624 KeepStores(heap_values[i].stored_by);
625 heap_values[i].stored_by = Value::Unknown();
626 } else if (heap_location_collector_.MayAlias(i, loc_index)) {
627 KeepStores(heap_values[i].stored_by);
628 heap_values[i].stored_by = Value::Unknown();
629 }
630 }
631 }
632
GetDefaultValue(DataType::Type type)633 HInstruction* GetDefaultValue(DataType::Type type) {
634 switch (type) {
635 case DataType::Type::kReference:
636 return GetGraph()->GetNullConstant();
637 case DataType::Type::kBool:
638 case DataType::Type::kUint8:
639 case DataType::Type::kInt8:
640 case DataType::Type::kUint16:
641 case DataType::Type::kInt16:
642 case DataType::Type::kInt32:
643 return GetGraph()->GetIntConstant(0);
644 case DataType::Type::kInt64:
645 return GetGraph()->GetLongConstant(0);
646 case DataType::Type::kFloat32:
647 return GetGraph()->GetFloatConstant(0);
648 case DataType::Type::kFloat64:
649 return GetGraph()->GetDoubleConstant(0);
650 default:
651 UNREACHABLE();
652 }
653 }
654
CanValueBeKeptIfSameAsNew(Value value,HInstruction * new_value,HInstruction * new_value_set_instr)655 bool CanValueBeKeptIfSameAsNew(Value value,
656 HInstruction* new_value,
657 HInstruction* new_value_set_instr) {
658 // For field/array set location operations, if the value is the same as the new_value
659 // it can be kept even if aliasing happens. All aliased operations will access the same memory
660 // range.
661 // For vector values, this is not true. For example:
662 // packed_data = [0xA, 0xB, 0xC, 0xD]; <-- Different values in each lane.
663 // VecStore array[i ,i+1,i+2,i+3] = packed_data;
664 // VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
665 // VecLoad vx = array[i,i+1,i+2,i+3]; <-- Cannot be eliminated because the value
666 // here is not packed_data anymore.
667 //
668 // TODO: to allow such 'same value' optimization on vector data,
669 // LSA needs to report more fine-grain MAY alias information:
670 // (1) May alias due to two vector data partial overlap.
671 // e.g. a[i..i+3] and a[i+1,..,i+4].
672 // (2) May alias due to two vector data may complete overlap each other.
673 // e.g. a[i..i+3] and b[i..i+3].
674 // (3) May alias but the exact relationship between two locations is unknown.
675 // e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
676 // This 'same value' optimization can apply only on case (2).
677 if (new_value_set_instr->IsVecOperation()) {
678 return false;
679 }
680
681 return value.Equals(new_value);
682 }
683
684 Value PrepareLoopValue(HBasicBlock* block, size_t idx);
685 Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
686 void PrepareLoopRecords(HBasicBlock* block);
687 Value MergePredecessorValues(HBasicBlock* block, size_t idx);
688 void MergePredecessorRecords(HBasicBlock* block);
689
690 void MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type);
691
692 void VisitGetLocation(HInstruction* instruction, size_t idx);
693 void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
RecordFieldInfo(const FieldInfo * info,size_t heap_loc)694 void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) {
695 field_infos_[heap_loc] = info;
696 }
697
698 void VisitBasicBlock(HBasicBlock* block) override;
699
700 enum class Phase {
701 kLoadElimination,
702 kStoreElimination,
703 };
704
705 bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const;
706
707 bool TryReplacingLoopPhiPlaceholderWithDefault(
708 PhiPlaceholder phi_placeholder,
709 DataType::Type type,
710 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
711 bool TryReplacingLoopPhiPlaceholderWithSingleInput(
712 PhiPlaceholder phi_placeholder,
713 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
714 std::optional<PhiPlaceholder> FindLoopPhisToMaterialize(
715 PhiPlaceholder phi_placeholder,
716 /*out*/ ArenaBitVector* phi_placeholders_to_materialize,
717 DataType::Type type,
718 bool can_use_default_or_phi);
719 bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
720 DataType::Type type);
721 bool MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes, DataType::Type type);
722 bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
723 DataType::Type type);
724 bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type);
725 std::optional<PhiPlaceholder> TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,
726 HInstruction* load);
727 void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input);
728 void ProcessLoadsRequiringLoopPhis();
729
730 void SearchPhiPlaceholdersForKeptStores();
731 void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
732 void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
733 void FindStoresWritingOldValues();
734 void FinishFullLSE();
735
HandleAcquireLoad(HInstruction * instruction)736 void HandleAcquireLoad(HInstruction* instruction) {
737 DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) ||
738 (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) ||
739 (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter()))
740 << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
741
742 // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must
743 // invalidate all current values.
744 ScopedArenaVector<ValueRecord>& heap_values =
745 heap_values_for_[instruction->GetBlock()->GetBlockId()];
746 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
747 KeepStores(heap_values[i].stored_by);
748 heap_values[i].stored_by = Value::Unknown();
749 heap_values[i].value = Value::Unknown();
750 }
751
752 // Note that there's no need to record the load as subsequent acquire loads shouldn't be
753 // eliminated either.
754 }
755
HandleReleaseStore(HInstruction * instruction)756 void HandleReleaseStore(HInstruction* instruction) {
757 DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) ||
758 (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) ||
759 (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter()))
760 << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
761
762 // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but
763 // they will push the modifications for other threads to see. Therefore, we must keep the
764 // stores but there's no need to clobber the value.
765 ScopedArenaVector<ValueRecord>& heap_values =
766 heap_values_for_[instruction->GetBlock()->GetBlockId()];
767 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
768 KeepStores(heap_values[i].stored_by);
769 heap_values[i].stored_by = Value::Unknown();
770 }
771
772 // Note that there's no need to record the store as subsequent release store shouldn't be
773 // eliminated either.
774 }
775
VisitInstanceFieldGet(HInstanceFieldGet * instruction)776 void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
777 HInstruction* object = instruction->InputAt(0);
778 if (instruction->IsVolatile()) {
779 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
780 heap_location_collector_.HuntForOriginalReference(object));
781 if (!ref_info->IsSingletonAndRemovable()) {
782 HandleAcquireLoad(instruction);
783 return;
784 }
785 // Treat it as a normal load if it is a removable singleton.
786 }
787
788 const FieldInfo& field = instruction->GetFieldInfo();
789 VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
790 }
791
VisitInstanceFieldSet(HInstanceFieldSet * instruction)792 void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
793 HInstruction* object = instruction->InputAt(0);
794 if (instruction->IsVolatile()) {
795 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
796 heap_location_collector_.HuntForOriginalReference(object));
797 if (!ref_info->IsSingletonAndRemovable()) {
798 HandleReleaseStore(instruction);
799 return;
800 }
801 // Treat it as a normal store if it is a removable singleton.
802 }
803
804 const FieldInfo& field = instruction->GetFieldInfo();
805 HInstruction* value = instruction->InputAt(1);
806 size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field);
807 VisitSetLocation(instruction, idx, value);
808 }
809
VisitStaticFieldGet(HStaticFieldGet * instruction)810 void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
811 if (instruction->IsVolatile()) {
812 HandleAcquireLoad(instruction);
813 return;
814 }
815
816 const FieldInfo& field = instruction->GetFieldInfo();
817 HInstruction* cls = instruction->InputAt(0);
818 VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
819 }
820
VisitStaticFieldSet(HStaticFieldSet * instruction)821 void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
822 if (instruction->IsVolatile()) {
823 HandleReleaseStore(instruction);
824 return;
825 }
826
827 const FieldInfo& field = instruction->GetFieldInfo();
828 HInstruction* cls = instruction->InputAt(0);
829 HInstruction* value = instruction->InputAt(1);
830 size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field);
831 VisitSetLocation(instruction, idx, value);
832 }
833
VisitMonitorOperation(HMonitorOperation * monitor_op)834 void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
835 HInstruction* object = monitor_op->InputAt(0);
836 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
837 heap_location_collector_.HuntForOriginalReference(object));
838 if (ref_info->IsSingletonAndRemovable()) {
839 // If the object is a removable singleton, we know that no other threads will have
840 // access to it, and we can remove the MonitorOperation instruction.
841 // MONITOR_ENTER throws when encountering a null object. If `object` is a removable
842 // singleton, it is guaranteed to be non-null so we don't have to worry about the NullCheck.
843 DCHECK(!object->CanBeNull());
844 monitor_op->GetBlock()->RemoveInstruction(monitor_op);
845 MaybeRecordStat(stats_, MethodCompilationStat::kRemovedMonitorOp);
846 return;
847 }
848
849 // We detected a monitor operation that we couldn't remove. See also LSEVisitor::Run().
850 monitor_op->GetBlock()->GetGraph()->SetHasMonitorOperations(true);
851 if (monitor_op->IsEnter()) {
852 HandleAcquireLoad(monitor_op);
853 } else {
854 HandleReleaseStore(monitor_op);
855 }
856 }
857
VisitArrayGet(HArrayGet * instruction)858 void VisitArrayGet(HArrayGet* instruction) override {
859 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
860 }
861
VisitArraySet(HArraySet * instruction)862 void VisitArraySet(HArraySet* instruction) override {
863 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
864 VisitSetLocation(instruction, idx, instruction->GetValue());
865 }
866
VisitVecLoad(HVecLoad * instruction)867 void VisitVecLoad(HVecLoad* instruction) override {
868 DCHECK(!instruction->IsPredicated());
869 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
870 }
871
VisitVecStore(HVecStore * instruction)872 void VisitVecStore(HVecStore* instruction) override {
873 DCHECK(!instruction->IsPredicated());
874 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
875 VisitSetLocation(instruction, idx, instruction->GetValue());
876 }
877
VisitDeoptimize(HDeoptimize * instruction)878 void VisitDeoptimize(HDeoptimize* instruction) override {
879 // If we are in a try, even singletons are observable.
880 const bool inside_a_try = instruction->GetBlock()->IsTryBlock();
881 HBasicBlock* block = instruction->GetBlock();
882 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
883 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
884 Value* stored_by = &heap_values[i].stored_by;
885 if (stored_by->IsUnknown()) {
886 continue;
887 }
888 // Stores are generally observeable after deoptimization, except
889 // for singletons that don't escape in the deoptimization environment.
890 bool observable = true;
891 ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
892 if (!inside_a_try && info->IsSingleton()) {
893 HInstruction* reference = info->GetReference();
894 // Finalizable objects always escape.
895 const bool finalizable_object =
896 reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
897 if (!finalizable_object && !IsEscapingObject(info)) {
898 // Check whether the reference for a store is used by an environment local of
899 // the HDeoptimize. If not, the singleton is not observed after deoptimization.
900 const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
901 observable = std::any_of(
902 env_uses.begin(),
903 env_uses.end(),
904 [instruction](const HUseListNode<HEnvironment*>& use) {
905 return use.GetUser()->GetHolder() == instruction;
906 });
907 }
908 }
909 if (observable) {
910 KeepStores(*stored_by);
911 *stored_by = Value::Unknown();
912 }
913 }
914 }
915
916 // Keep necessary stores before exiting a method via return/throw.
HandleExit(HBasicBlock * block,bool must_keep_stores=false)917 void HandleExit(HBasicBlock* block, bool must_keep_stores = false) {
918 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
919 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
920 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
921 if (must_keep_stores || IsEscapingObject(ref_info)) {
922 KeepStores(heap_values[i].stored_by);
923 heap_values[i].stored_by = Value::Unknown();
924 }
925 }
926 }
927
VisitReturn(HReturn * instruction)928 void VisitReturn(HReturn* instruction) override {
929 HandleExit(instruction->GetBlock());
930 }
931
VisitReturnVoid(HReturnVoid * return_void)932 void VisitReturnVoid(HReturnVoid* return_void) override {
933 HandleExit(return_void->GetBlock());
934 }
935
HandleThrowingInstruction(HInstruction * instruction)936 void HandleThrowingInstruction(HInstruction* instruction) {
937 DCHECK(instruction->CanThrow());
938 // If we are inside of a try, singletons can become visible since we may not exit the method.
939 HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock());
940 }
941
VisitMethodEntryHook(HMethodEntryHook * method_entry)942 void VisitMethodEntryHook(HMethodEntryHook* method_entry) override {
943 HandleThrowingInstruction(method_entry);
944 }
945
VisitMethodExitHook(HMethodExitHook * method_exit)946 void VisitMethodExitHook(HMethodExitHook* method_exit) override {
947 HandleThrowingInstruction(method_exit);
948 }
949
VisitDivZeroCheck(HDivZeroCheck * div_zero_check)950 void VisitDivZeroCheck(HDivZeroCheck* div_zero_check) override {
951 HandleThrowingInstruction(div_zero_check);
952 }
953
VisitNullCheck(HNullCheck * null_check)954 void VisitNullCheck(HNullCheck* null_check) override {
955 HandleThrowingInstruction(null_check);
956 }
957
VisitBoundsCheck(HBoundsCheck * bounds_check)958 void VisitBoundsCheck(HBoundsCheck* bounds_check) override {
959 HandleThrowingInstruction(bounds_check);
960 }
961
VisitLoadClass(HLoadClass * load_class)962 void VisitLoadClass(HLoadClass* load_class) override {
963 if (load_class->CanThrow()) {
964 HandleThrowingInstruction(load_class);
965 }
966 }
967
VisitLoadString(HLoadString * load_string)968 void VisitLoadString(HLoadString* load_string) override {
969 if (load_string->CanThrow()) {
970 HandleThrowingInstruction(load_string);
971 }
972 }
973
VisitLoadMethodHandle(HLoadMethodHandle * load_method_handle)974 void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override {
975 HandleThrowingInstruction(load_method_handle);
976 }
977
VisitLoadMethodType(HLoadMethodType * load_method_type)978 void VisitLoadMethodType(HLoadMethodType* load_method_type) override {
979 HandleThrowingInstruction(load_method_type);
980 }
981
VisitStringBuilderAppend(HStringBuilderAppend * sb_append)982 void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override {
983 HandleThrowingInstruction(sb_append);
984 }
985
VisitThrow(HThrow * throw_instruction)986 void VisitThrow(HThrow* throw_instruction) override {
987 HandleThrowingInstruction(throw_instruction);
988 }
989
VisitCheckCast(HCheckCast * check_cast)990 void VisitCheckCast(HCheckCast* check_cast) override {
991 HandleThrowingInstruction(check_cast);
992 }
993
HandleInvoke(HInstruction * instruction)994 void HandleInvoke(HInstruction* instruction) {
995 // If `instruction` can throw we have to presume all stores are visible.
996 const bool can_throw = instruction->CanThrow();
997 // If we are in a try, even singletons are observable.
998 const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock();
999 SideEffects side_effects = instruction->GetSideEffects();
1000 ScopedArenaVector<ValueRecord>& heap_values =
1001 heap_values_for_[instruction->GetBlock()->GetBlockId()];
1002 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1003 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1004 // We don't need to do anything if the reference has not escaped at this point.
1005 // This is true if we never escape.
1006 if (!can_throw_inside_a_try && ref_info->IsSingleton()) {
1007 // Singleton references cannot be seen by the callee.
1008 } else {
1009 if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
1010 // Previous stores may become visible (read) and/or impossible for LSE to track (write).
1011 KeepStores(heap_values[i].stored_by);
1012 heap_values[i].stored_by = Value::Unknown();
1013 }
1014 if (side_effects.DoesAnyWrite()) {
1015 // The value may be clobbered.
1016 heap_values[i].value = Value::Unknown();
1017 }
1018 }
1019 }
1020 }
1021
VisitInvoke(HInvoke * invoke)1022 void VisitInvoke(HInvoke* invoke) override {
1023 HandleInvoke(invoke);
1024 }
1025
VisitClinitCheck(HClinitCheck * clinit)1026 void VisitClinitCheck(HClinitCheck* clinit) override {
1027 // Class initialization check can result in class initializer calling arbitrary methods.
1028 HandleInvoke(clinit);
1029 }
1030
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)1031 void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
1032 // Conservatively treat it as an invocation.
1033 HandleInvoke(instruction);
1034 }
1035
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)1036 void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
1037 // Conservatively treat it as an invocation.
1038 HandleInvoke(instruction);
1039 }
1040
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)1041 void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
1042 // Conservatively treat it as an invocation.
1043 HandleInvoke(instruction);
1044 }
1045
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)1046 void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
1047 // Conservatively treat it as an invocation.
1048 HandleInvoke(instruction);
1049 }
1050
VisitNewInstance(HNewInstance * new_instance)1051 void VisitNewInstance(HNewInstance* new_instance) override {
1052 // If we are in a try, even singletons are observable.
1053 const bool inside_a_try = new_instance->GetBlock()->IsTryBlock();
1054 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
1055 if (ref_info == nullptr) {
1056 // new_instance isn't used for field accesses. No need to process it.
1057 return;
1058 }
1059 if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
1060 DCHECK(!new_instance->IsFinalizable());
1061 // new_instance can potentially be eliminated.
1062 singleton_new_instances_.push_back(new_instance);
1063 }
1064 HBasicBlock* block = new_instance->GetBlock();
1065 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1066 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1067 ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1068 HInstruction* ref = info->GetReference();
1069 size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
1070 if (ref == new_instance) {
1071 if (offset >= mirror::kObjectHeaderSize ||
1072 MemberOffset(offset) == mirror::Object::MonitorOffset()) {
1073 // Instance fields except the header fields are set to default heap values.
1074 // The shadow$_monitor_ field is set to the default value however.
1075 heap_values[i].value = Value::Default();
1076 heap_values[i].stored_by = Value::Unknown();
1077 } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) {
1078 // The shadow$_klass_ field is special and has an actual value however.
1079 heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
1080 heap_values[i].stored_by = Value::Unknown();
1081 }
1082 } else if (inside_a_try || IsEscapingObject(info)) {
1083 // Since NewInstance can throw, we presume all previous stores could be visible.
1084 KeepStores(heap_values[i].stored_by);
1085 heap_values[i].stored_by = Value::Unknown();
1086 }
1087 }
1088 }
1089
VisitNewArray(HNewArray * new_array)1090 void VisitNewArray(HNewArray* new_array) override {
1091 // If we are in a try, even singletons are observable.
1092 const bool inside_a_try = new_array->GetBlock()->IsTryBlock();
1093 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
1094 if (ref_info == nullptr) {
1095 // new_array isn't used for array accesses. No need to process it.
1096 return;
1097 }
1098 if (ref_info->IsSingletonAndRemovable()) {
1099 if (new_array->GetLength()->IsIntConstant() &&
1100 new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
1101 // new_array can potentially be eliminated.
1102 singleton_new_instances_.push_back(new_array);
1103 } else {
1104 // new_array may throw NegativeArraySizeException. Keep it.
1105 }
1106 }
1107 HBasicBlock* block = new_array->GetBlock();
1108 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1109 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1110 HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
1111 ReferenceInfo* info = location->GetReferenceInfo();
1112 HInstruction* ref = info->GetReference();
1113 if (ref == new_array && location->GetIndex() != nullptr) {
1114 // Array elements are set to default heap values.
1115 heap_values[i].value = Value::Default();
1116 heap_values[i].stored_by = Value::Unknown();
1117 } else if (inside_a_try || IsEscapingObject(info)) {
1118 // Since NewArray can throw, we presume all previous stores could be visible.
1119 KeepStores(heap_values[i].stored_by);
1120 heap_values[i].stored_by = Value::Unknown();
1121 }
1122 }
1123 }
1124
VisitInstruction(HInstruction * instruction)1125 void VisitInstruction(HInstruction* instruction) override {
1126 // Throwing instructions must be handled specially.
1127 DCHECK(!instruction->CanThrow());
1128 }
1129
1130 const HeapLocationCollector& heap_location_collector_;
1131
1132 // Use local allocator for allocating memory.
1133 ScopedArenaAllocator allocator_;
1134
1135 // The number of unique phi_placeholders there possibly are
1136 size_t num_phi_placeholders_;
1137
1138 // One array of heap value records for each block.
1139 ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
1140
1141 // We record loads and stores for re-processing when we find a loop Phi placeholder
1142 // with unknown value from a predecessor, and also for removing stores that are
1143 // found to be dead, i.e. not marked in `kept_stores_` at the end.
1144 struct LoadStoreRecord {
1145 HInstruction* load_or_store;
1146 size_t heap_location_index;
1147 };
1148 ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
1149
1150 // We record the substitute instructions for loads that should be
1151 // eliminated but may be used by heap locations. They'll be removed
1152 // in the end. These are indexed by the load's id.
1153 ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
1154
1155 // Record stores to keep in a bit vector indexed by instruction ID.
1156 ArenaBitVector kept_stores_;
1157 // When we need to keep all stores that feed a Phi placeholder, we just record the
1158 // index of that placeholder for processing after graph traversal.
1159 ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
1160
1161 // Loads that would require a loop Phi to replace are recorded for processing
1162 // later as we do not have enough information from back-edges to determine if
1163 // a suitable Phi can be found or created when we visit these loads.
1164 // This is a flat "map" indexed by the load instruction id.
1165 ScopedArenaVector<ValueRecord*> loads_requiring_loop_phi_;
1166
1167 // For stores, record the old value records that were replaced and the stored values.
1168 struct StoreRecord {
StoreRecordart::LSEVisitor::StoreRecord1169 StoreRecord(ValueRecord old_value_record_in, HInstruction* stored_value_in)
1170 : old_value_record(old_value_record_in), stored_value(stored_value_in) {}
1171 ValueRecord old_value_record;
1172 HInstruction* stored_value;
1173 };
1174 // This is a flat "map" indexed by the store instruction id.
1175 ScopedArenaVector<StoreRecord*> store_records_;
1176
1177 // Replacements for Phi placeholders.
1178 // The invalid heap value is used to mark Phi placeholders that cannot be replaced.
1179 ScopedArenaVector<Value> phi_placeholder_replacements_;
1180
1181 ScopedArenaVector<HInstruction*> singleton_new_instances_;
1182
1183 // The field infos for each heap location (if relevant).
1184 ScopedArenaVector<const FieldInfo*> field_infos_;
1185
1186 Phase current_phase_;
1187
1188 friend std::ostream& operator<<(std::ostream& os, const Value& v);
1189 friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase);
1190
1191 DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
1192 };
1193
operator <<(std::ostream & oss,const LSEVisitor::Phase & phase)1194 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
1195 switch (phase) {
1196 case LSEVisitor::Phase::kLoadElimination:
1197 return oss << "kLoadElimination";
1198 case LSEVisitor::Phase::kStoreElimination:
1199 return oss << "kStoreElimination";
1200 }
1201 }
1202
operator ==(const LSEVisitor::PhiPlaceholder & p1,const LSEVisitor::PhiPlaceholder & p2)1203 constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1,
1204 const LSEVisitor::PhiPlaceholder& p2) {
1205 return p1.Equals(p2);
1206 }
1207
operator ==(const LSEVisitor::Value::NeedsLoopPhiMarker & p1,const LSEVisitor::Value::NeedsLoopPhiMarker & p2)1208 constexpr bool operator==(const LSEVisitor::Value::NeedsLoopPhiMarker& p1,
1209 const LSEVisitor::Value::NeedsLoopPhiMarker& p2) {
1210 return p1.phi_ == p2.phi_;
1211 }
1212
operator ==(const LSEVisitor::Value::NeedsNonLoopPhiMarker & p1,const LSEVisitor::Value::NeedsNonLoopPhiMarker & p2)1213 constexpr bool operator==(const LSEVisitor::Value::NeedsNonLoopPhiMarker& p1,
1214 const LSEVisitor::Value::NeedsNonLoopPhiMarker& p2) {
1215 return p1.phi_ == p2.phi_;
1216 }
1217
operator <<(std::ostream & oss,const LSEVisitor::PhiPlaceholder & p)1218 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PhiPlaceholder& p) {
1219 p.Dump(oss);
1220 return oss;
1221 }
1222
ExactEquals(LSEVisitor::Value other) const1223 constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const {
1224 return value_ == other.value_;
1225 }
1226
Equals(LSEVisitor::Value other) const1227 constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const {
1228 // Only valid values can be compared.
1229 DCHECK(IsValid());
1230 DCHECK(other.IsValid());
1231 if (value_ == other.value_) {
1232 // Note: Two unknown values are considered different.
1233 return !IsUnknown();
1234 } else {
1235 // Default is considered equal to zero-bit-pattern instructions.
1236 return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
1237 (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
1238 }
1239 }
1240
Dump(std::ostream & os) const1241 std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
1242 if (std::holds_alternative<LSEVisitor::Value::ValuelessType>(value_)) {
1243 switch (GetValuelessType()) {
1244 case ValuelessType::kDefault:
1245 return os << "Default";
1246 case ValuelessType::kUnknown:
1247 return os << "Unknown";
1248 case ValuelessType::kInvalid:
1249 return os << "Invalid";
1250 }
1251 } else if (IsInstruction()) {
1252 return os << "Instruction[id: " << GetInstruction()->GetId()
1253 << ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]";
1254 } else if (NeedsLoopPhi()) {
1255 return os << "NeedsLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1256 << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1257 } else {
1258 return os << "NeedsNonLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1259 << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1260 }
1261 }
1262
operator <<(std::ostream & os,const LSEVisitor::Value & v)1263 std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
1264 return v.Dump(os);
1265 }
1266
LSEVisitor(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)1267 LSEVisitor::LSEVisitor(HGraph* graph,
1268 const HeapLocationCollector& heap_location_collector,
1269 OptimizingCompilerStats* stats)
1270 : HGraphDelegateVisitor(graph, stats),
1271 heap_location_collector_(heap_location_collector),
1272 allocator_(graph->GetArenaStack()),
1273 num_phi_placeholders_(GetGraph()->GetBlocks().size() *
1274 heap_location_collector_.GetNumberOfHeapLocations()),
1275 heap_values_for_(graph->GetBlocks().size(),
1276 ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1277 allocator_.Adapter(kArenaAllocLSE)),
1278 loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
1279 // We may add new instructions (default values, Phis) but we're not adding loads
1280 // or stores, so we shall not need to resize following vector and BitVector.
1281 substitute_instructions_for_loads_(
1282 graph->GetCurrentInstructionId(), nullptr, allocator_.Adapter(kArenaAllocLSE)),
1283 kept_stores_(&allocator_,
1284 /*start_bits=*/graph->GetCurrentInstructionId(),
1285 /*expandable=*/false,
1286 kArenaAllocLSE),
1287 phi_placeholders_to_search_for_kept_stores_(&allocator_,
1288 num_phi_placeholders_,
1289 /*expandable=*/false,
1290 kArenaAllocLSE),
1291 loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1292 store_records_(allocator_.Adapter(kArenaAllocLSE)),
1293 phi_placeholder_replacements_(
1294 num_phi_placeholders_, Value::Invalid(), allocator_.Adapter(kArenaAllocLSE)),
1295 singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
1296 field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
1297 allocator_.Adapter(kArenaAllocLSE)),
1298 current_phase_(Phase::kLoadElimination) {}
1299
PrepareLoopValue(HBasicBlock * block,size_t idx)1300 LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1301 // If the pre-header value is known (which implies that the reference dominates this
1302 // block), use a Phi placeholder for the value in the loop header. If all predecessors
1303 // are later found to have a known value, we can replace loads from this location,
1304 // either with the pre-header value or with a new Phi. For array locations, the index
1305 // may be defined inside the loop but the only known value in that case should be the
1306 // default value or a Phi placeholder that can be replaced only with the default value.
1307 HLoopInformation* loop_info = block->GetLoopInformation();
1308 uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1309 Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1310 if (pre_header_value.IsUnknown()) {
1311 return pre_header_value;
1312 }
1313 if (kIsDebugBuild) {
1314 // Check that the reference indeed dominates this loop.
1315 HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1316 HInstruction* ref = location->GetReferenceInfo()->GetReference();
1317 CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
1318 << GetGraph()->PrettyMethod();
1319 // Check that the index, if defined inside the loop, tracks a default value
1320 // or a Phi placeholder requiring a loop Phi.
1321 HInstruction* index = location->GetIndex();
1322 if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
1323 CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
1324 << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
1325 << pre_header_value;
1326 }
1327 }
1328 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1329 return ReplacementOrValue(Value::ForLoopPhiPlaceholder(phi_placeholder));
1330 }
1331
PrepareLoopStoredBy(HBasicBlock * block,size_t idx)1332 LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1333 // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1334 // if the value in the location escapes. This is not applicable to singletons that are
1335 // defined inside the loop as they shall be dead in the loop header.
1336 const ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1337 const HInstruction* reference = ref_info->GetReference();
1338 // Finalizable objects always escape.
1339 const bool is_finalizable =
1340 reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1341 if (ref_info->IsSingleton() &&
1342 block->GetLoopInformation()->Contains(*reference->GetBlock()) &&
1343 !is_finalizable) {
1344 return Value::Unknown();
1345 }
1346 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1347 return Value::ForLoopPhiPlaceholder(phi_placeholder);
1348 }
1349
PrepareLoopRecords(HBasicBlock * block)1350 void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1351 DCHECK(block->IsLoopHeader());
1352 int block_id = block->GetBlockId();
1353 HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1354 ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1355 heap_values_for_[pre_header->GetBlockId()];
1356 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1357 DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1358 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1359 DCHECK(heap_values.empty());
1360
1361 // Don't eliminate loads in irreducible loops.
1362 if (block->GetLoopInformation()->IsIrreducible()) {
1363 heap_values.resize(num_heap_locations,
1364 {/*value=*/Value::Unknown(), /*stored_by=*/Value::Unknown()});
1365 // Also keep the stores before the loop header, including in blocks that were not visited yet.
1366 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1367 KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1368 }
1369 return;
1370 }
1371
1372 // Fill `heap_values` based on values from pre-header.
1373 heap_values.reserve(num_heap_locations);
1374 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1375 heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1376 }
1377 }
1378
MergePredecessorValues(HBasicBlock * block,size_t idx)1379 LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1380 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1381 DCHECK(!predecessors.empty());
1382 Value merged_value =
1383 ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1384 for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
1385 Value pred_value =
1386 ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
1387 if (pred_value.Equals(merged_value)) {
1388 // Value is the same. No need to update our merged value.
1389 continue;
1390 } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
1391 // If one is unknown and the other is not, the merged value is unknown.
1392 merged_value = Value::Unknown();
1393 break;
1394 } else {
1395 // There are conflicting known values. We may still be able to replace loads with a Phi.
1396 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1397 // Propagate the need for a new loop Phi from all predecessors.
1398 bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1399 merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1400 }
1401 }
1402 return merged_value;
1403 }
1404
MergePredecessorRecords(HBasicBlock * block)1405 void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1406 if (block->IsExitBlock()) {
1407 // Exit block doesn't really merge values since the control flow ends in
1408 // its predecessors. Each predecessor needs to make sure stores are kept
1409 // if necessary.
1410 return;
1411 }
1412
1413 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1414 DCHECK(heap_values.empty());
1415 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1416 if (block->GetPredecessors().empty() || block->IsCatchBlock()) {
1417 DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock());
1418 heap_values.resize(num_heap_locations,
1419 {/*value=*/Value::Unknown(), /*stored_by=*/Value::Unknown()});
1420 return;
1421 }
1422
1423 heap_values.reserve(num_heap_locations);
1424 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1425 Value merged_value = MergePredecessorValues(block, idx);
1426 if (kIsDebugBuild) {
1427 if (merged_value.NeedsPhi()) {
1428 uint32_t block_id = merged_value.GetPhiPlaceholder().GetBlockId();
1429 CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1430 } else if (merged_value.IsInstruction()) {
1431 CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1432 }
1433 }
1434 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1435 Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
1436 for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1437 uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1438 Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1439 if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1440 !merged_stored_by.Equals(stored_by)) {
1441 // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1442 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1443 merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1444 break;
1445 }
1446 }
1447 heap_values.push_back({ merged_value, merged_stored_by });
1448 }
1449 }
1450
FindOrConstructNonLoopPhi(HBasicBlock * block,const ScopedArenaVector<HInstruction * > & phi_inputs,DataType::Type type)1451 static HInstruction* FindOrConstructNonLoopPhi(
1452 HBasicBlock* block,
1453 const ScopedArenaVector<HInstruction*>& phi_inputs,
1454 DataType::Type type) {
1455 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1456 HInstruction* phi = phi_it.Current();
1457 DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1458 auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1459 return lhs == rhs.GetInstruction();
1460 };
1461 if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1462 return phi;
1463 }
1464 }
1465 ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1466 HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1467 for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1468 DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1469 phi->SetRawInputAt(i, phi_inputs[i]);
1470 }
1471 block->AddPhi(phi);
1472 if (type == DataType::Type::kReference) {
1473 // Update reference type information. Pass invalid handles, these are not used for Phis.
1474 ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1475 Handle<mirror::DexCache>(),
1476 /* is_first_run= */ false);
1477 rtp_fixup.Visit(phi);
1478 }
1479 return phi;
1480 }
1481
MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder,DataType::Type type)1482 void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type) {
1483 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1484 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1485 size_t idx = phi_placeholder.GetHeapLocation();
1486
1487 // Use local allocator to reduce peak memory usage.
1488 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1489 // Reuse the same vector for collecting phi inputs.
1490 ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1491
1492 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1493 work_queue.push_back(phi_placeholder);
1494 while (!work_queue.empty()) {
1495 PhiPlaceholder current_phi_placeholder = work_queue.back();
1496 if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1497 // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1498 // that directly or indirectly depends on it, so it was already processed as part of the
1499 // other Phi placeholder's dependencies before this one got back to the top of the stack.
1500 work_queue.pop_back();
1501 continue;
1502 }
1503 uint32_t current_block_id = current_phi_placeholder.GetBlockId();
1504 HBasicBlock* current_block = blocks[current_block_id];
1505 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1506
1507 // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1508 // And the only way for such merged value to reach a different heap location is through
1509 // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1510 // seen here are tied to one heap location.
1511 DCHECK(!current_block->IsLoopHeader())
1512 << current_phi_placeholder << " phase: " << current_phase_;
1513 DCHECK_EQ(current_phi_placeholder.GetHeapLocation(), idx);
1514
1515 phi_inputs.clear();
1516 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1517 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1518 DCHECK(!pred_value.IsUnknown()) << pred_value << " block " << current_block->GetBlockId()
1519 << " pred: " << predecessor->GetBlockId();
1520 if (pred_value.NeedsNonLoopPhi()) {
1521 // We need to process the Phi placeholder first.
1522 work_queue.push_back(pred_value.GetPhiPlaceholder());
1523 } else if (pred_value.IsDefault()) {
1524 phi_inputs.push_back(GetDefaultValue(type));
1525 } else {
1526 DCHECK(pred_value.IsInstruction()) << pred_value << " block " << current_block->GetBlockId()
1527 << " pred: " << predecessor->GetBlockId();
1528 phi_inputs.push_back(pred_value.GetInstruction());
1529 }
1530 }
1531 if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1532 // All inputs are available. Find or construct the Phi replacement.
1533 phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1534 Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1535 // Remove the block from the queue.
1536 DCHECK_EQ(current_phi_placeholder, work_queue.back());
1537 work_queue.pop_back();
1538 }
1539 }
1540 }
1541
VisitGetLocation(HInstruction * instruction,size_t idx)1542 void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1543 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1544 uint32_t block_id = instruction->GetBlock()->GetBlockId();
1545 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1546 ValueRecord& record = heap_values[idx];
1547 if (instruction->IsFieldAccess()) {
1548 RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1549 }
1550 DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1551 loads_and_stores_.push_back({ instruction, idx });
1552 if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1553 !IsDefaultOrPhiAllowedForLoad(instruction)) {
1554 record.value = Value::Unknown();
1555 }
1556 if (record.value.IsDefault()) {
1557 KeepStores(record.stored_by);
1558 HInstruction* constant = GetDefaultValue(instruction->GetType());
1559 AddRemovedLoad(instruction, constant);
1560 record.value = Value::ForInstruction(constant);
1561 } else if (record.value.IsUnknown()) {
1562 // Load isn't eliminated. Put the load as the value into the HeapLocation.
1563 // This acts like GVN but with better aliasing analysis.
1564 Value old_value = record.value;
1565 record.value = Value::ForInstruction(instruction);
1566 KeepStoresIfAliasedToLocation(heap_values, idx);
1567 KeepStores(old_value);
1568 } else if (record.value.NeedsLoopPhi()) {
1569 // We do not know yet if the value is known for all back edges. Record for future processing.
1570 if (loads_requiring_loop_phi_.empty()) {
1571 loads_requiring_loop_phi_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1572 }
1573 DCHECK_EQ(loads_requiring_loop_phi_[instruction->GetId()], nullptr);
1574 loads_requiring_loop_phi_[instruction->GetId()] =
1575 new (allocator_.Alloc<ValueRecord>(kArenaAllocLSE)) ValueRecord(record);
1576 } else {
1577 // This load can be eliminated but we may need to construct non-loop Phis.
1578 if (record.value.NeedsNonLoopPhi()) {
1579 MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1580 record.value = Replacement(record.value);
1581 }
1582 HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1583 AddRemovedLoad(instruction, heap_value);
1584 }
1585 }
1586
VisitSetLocation(HInstruction * instruction,size_t idx,HInstruction * value)1587 void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1588 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1589 DCHECK(!IsStore(value)) << value->DebugName();
1590 if (instruction->IsFieldAccess()) {
1591 RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1592 }
1593 // value may already have a substitute.
1594 value = FindSubstitute(value);
1595 HBasicBlock* block = instruction->GetBlock();
1596 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1597 ValueRecord& record = heap_values[idx];
1598 DCHECK_IMPLIES(record.value.IsInstruction(),
1599 FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1600
1601 if (record.value.Equals(value)) {
1602 // Store into the heap location with the same value.
1603 // This store can be eliminated right away.
1604 block->RemoveInstruction(instruction);
1605 return;
1606 }
1607
1608 if (store_records_.empty()) {
1609 store_records_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1610 }
1611 DCHECK_EQ(store_records_[instruction->GetId()], nullptr);
1612 store_records_[instruction->GetId()] =
1613 new (allocator_.Alloc<StoreRecord>(kArenaAllocLSE)) StoreRecord(record, value);
1614 loads_and_stores_.push_back({ instruction, idx });
1615
1616 // If the `record.stored_by` specified a store from this block, it shall be removed
1617 // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1618 // `kept_stores_` anymore after we update the `record.stored_by` below.
1619 DCHECK(!record.stored_by.IsInstruction() ||
1620 record.stored_by.GetInstruction()->GetBlock() != block ||
1621 record.stored_by.GetInstruction()->CanThrow() ||
1622 !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1623
1624 if (instruction->CanThrow()) {
1625 // Previous stores can become visible.
1626 HandleThrowingInstruction(instruction);
1627 // We cannot remove a possibly throwing store.
1628 // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1629 kept_stores_.SetBit(instruction->GetId());
1630 }
1631
1632 // Update the record.
1633 // Note that the `value` can be a newly created `Phi` with an id that falls outside
1634 // the allocated `loads_requiring_loop_phi_` range.
1635 DCHECK_IMPLIES(IsLoad(value) && !loads_requiring_loop_phi_.empty(),
1636 static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size());
1637 if (static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size() &&
1638 loads_requiring_loop_phi_[value->GetId()] != nullptr) {
1639 // Propapate the Phi placeholder to the record.
1640 record.value = loads_requiring_loop_phi_[value->GetId()]->value;
1641 DCHECK(record.value.NeedsLoopPhi());
1642 } else {
1643 record.value = Value::ForInstruction(value);
1644 }
1645 // Track the store in the value record. If the value is loaded or needed after
1646 // return/deoptimization later, this store isn't really redundant.
1647 record.stored_by = Value::ForInstruction(instruction);
1648
1649 // This store may kill values in other heap locations due to aliasing.
1650 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1651 if (i == idx ||
1652 heap_values[i].value.IsUnknown() ||
1653 CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1654 !heap_location_collector_.MayAlias(i, idx)) {
1655 continue;
1656 }
1657 // Kill heap locations that may alias and keep previous stores to these locations.
1658 KeepStores(heap_values[i].stored_by);
1659 heap_values[i].stored_by = Value::Unknown();
1660 heap_values[i].value = Value::Unknown();
1661 }
1662 }
1663
VisitBasicBlock(HBasicBlock * block)1664 void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1665 // Populate the heap_values array for this block.
1666 // TODO: try to reuse the heap_values array from one predecessor if possible.
1667 if (block->IsLoopHeader()) {
1668 PrepareLoopRecords(block);
1669 } else {
1670 MergePredecessorRecords(block);
1671 }
1672 // Visit non-Phi instructions.
1673 VisitNonPhiInstructions(block);
1674 }
1675
MayAliasOnBackEdge(HBasicBlock * loop_header,size_t idx1,size_t idx2) const1676 bool LSEVisitor::MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const {
1677 DCHECK_NE(idx1, idx2);
1678 DCHECK(loop_header->IsLoopHeader());
1679 if (heap_location_collector_.MayAlias(idx1, idx2)) {
1680 return true;
1681 }
1682 // For array locations with index defined inside the loop, include
1683 // all other locations in the array, even those that LSA declares
1684 // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
1685 // refer to the same locations for different iterations. (LSA's
1686 // `ComputeMayAlias()` does not consider different loop iterations.)
1687 HeapLocation* loc1 = heap_location_collector_.GetHeapLocation(idx1);
1688 HeapLocation* loc2 = heap_location_collector_.GetHeapLocation(idx2);
1689 if (loc1->IsArray() &&
1690 loc2->IsArray() &&
1691 HeapLocationCollector::CanReferencesAlias(loc1->GetReferenceInfo(),
1692 loc2->GetReferenceInfo())) {
1693 HLoopInformation* loop_info = loop_header->GetLoopInformation();
1694 if (loop_info->Contains(*loc1->GetIndex()->GetBlock()) ||
1695 loop_info->Contains(*loc2->GetIndex()->GetBlock())) {
1696 // Consider the locations aliasing. Do not optimize the case where both indexes
1697 // are loop invariants defined inside the loop, rely on LICM to pull them out.
1698 return true;
1699 }
1700 }
1701 return false;
1702 }
1703
TryReplacingLoopPhiPlaceholderWithDefault(PhiPlaceholder phi_placeholder,DataType::Type type,ArenaBitVector * phi_placeholders_to_materialize)1704 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
1705 PhiPlaceholder phi_placeholder,
1706 DataType::Type type,
1707 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1708 // Use local allocator to reduce peak memory usage.
1709 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1710 ArenaBitVector visited(&allocator,
1711 /*start_bits=*/ num_phi_placeholders_,
1712 /*expandable=*/ false,
1713 kArenaAllocLSE);
1714 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1715
1716 // Use depth first search to check if any non-Phi input is unknown.
1717 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1718 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1719 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1720 work_queue.push_back(phi_placeholder);
1721 while (!work_queue.empty()) {
1722 PhiPlaceholder current_phi_placeholder = work_queue.back();
1723 work_queue.pop_back();
1724 HBasicBlock* block = blocks[current_phi_placeholder.GetBlockId()];
1725 DCHECK_GE(block->GetPredecessors().size(), 2u);
1726 size_t idx = current_phi_placeholder.GetHeapLocation();
1727 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1728 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1729 if (value.NeedsPhi()) {
1730 // Visit the predecessor Phi placeholder if it's not visited yet.
1731 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1732 visited.SetBit(PhiPlaceholderIndex(value));
1733 work_queue.push_back(value.GetPhiPlaceholder());
1734 }
1735 } else if (!value.Equals(Value::Default())) {
1736 return false; // Report failure.
1737 }
1738 }
1739 if (block->IsLoopHeader()) {
1740 // For back-edges we need to check all locations that write to the same array,
1741 // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
1742 // as they may actually refer to the same locations for different iterations.
1743 for (size_t i = 0; i != num_heap_locations; ++i) {
1744 if (i == idx ||
1745 heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
1746 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
1747 continue;
1748 }
1749 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1750 // Check if there were any writes to this location.
1751 // Note: We could simply process the values but due to the vector operation
1752 // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
1753 // the value to change and not be equal to default. To work around this and
1754 // allow replacing the non-vector load of loop-invariant default values
1755 // anyway, skip over paths that do not have any writes.
1756 ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
1757 while (record.stored_by.NeedsLoopPhi() &&
1758 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->IsLoopHeader()) {
1759 HLoopInformation* loop_info =
1760 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->GetLoopInformation();
1761 record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
1762 }
1763 Value value = ReplacementOrValue(record.value);
1764 if (value.NeedsPhi()) {
1765 // Visit the predecessor Phi placeholder if it's not visited yet.
1766 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1767 visited.SetBit(PhiPlaceholderIndex(value));
1768 work_queue.push_back(value.GetPhiPlaceholder());
1769 }
1770 } else if (!value.Equals(Value::Default())) {
1771 return false; // Report failure.
1772 }
1773 }
1774 }
1775 }
1776 }
1777
1778 // Record replacement and report success.
1779 HInstruction* replacement = GetDefaultValue(type);
1780 for (uint32_t phi_placeholder_index : visited.Indexes()) {
1781 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1782 PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
1783 HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
1784 // We use both vector and non vector operations to analyze the information. However, we replace
1785 // only non vector operations in this code path.
1786 if (!hl->IsVecOp()) {
1787 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1788 phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
1789 }
1790 }
1791 return true;
1792 }
1793
TryReplacingLoopPhiPlaceholderWithSingleInput(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize)1794 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
1795 PhiPlaceholder phi_placeholder,
1796 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1797 // Use local allocator to reduce peak memory usage.
1798 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1799 ArenaBitVector visited(&allocator,
1800 /*start_bits=*/ num_phi_placeholders_,
1801 /*expandable=*/ false,
1802 kArenaAllocLSE);
1803 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1804
1805 // Use depth first search to check if any non-Phi input is unknown.
1806 HInstruction* replacement = nullptr;
1807 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1808 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1809 work_queue.push_back(phi_placeholder);
1810 while (!work_queue.empty()) {
1811 PhiPlaceholder current_phi_placeholder = work_queue.back();
1812 work_queue.pop_back();
1813 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
1814 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1815 size_t idx = current_phi_placeholder.GetHeapLocation();
1816 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1817 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1818 if (value.NeedsPhi()) {
1819 // Visit the predecessor Phi placeholder if it's not visited yet.
1820 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1821 visited.SetBit(PhiPlaceholderIndex(value));
1822 work_queue.push_back(value.GetPhiPlaceholder());
1823 }
1824 } else {
1825 if (!value.IsInstruction() ||
1826 (replacement != nullptr && replacement != value.GetInstruction())) {
1827 return false; // Report failure.
1828 }
1829 replacement = value.GetInstruction();
1830 }
1831 }
1832 // While `TryReplacingLoopPhiPlaceholderWithDefault()` has special treatment
1833 // for back-edges, it is not needed here. When looking for a single input
1834 // instruction coming from before the loop, the array index must also be
1835 // defined before the loop and the aliasing analysis done by LSA is sufficient.
1836 // Any writes of a different value with an index that is not loop invariant
1837 // would invalidate the heap location in `VisitSetLocation()`.
1838 }
1839
1840 // Record replacement and report success.
1841 DCHECK(replacement != nullptr);
1842 for (uint32_t phi_placeholder_index : visited.Indexes()) {
1843 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1844 PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
1845 HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
1846 // We use both vector and non vector operations to analyze the information. However, we replace
1847 // only vector operations in this code path.
1848 if (hl->IsVecOp()) {
1849 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1850 phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
1851 }
1852 }
1853 return true;
1854 }
1855
FindLoopPhisToMaterialize(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize,DataType::Type type,bool can_use_default_or_phi)1856 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::FindLoopPhisToMaterialize(
1857 PhiPlaceholder phi_placeholder,
1858 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize,
1859 DataType::Type type,
1860 bool can_use_default_or_phi) {
1861 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1862
1863 // Use local allocator to reduce peak memory usage.
1864 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1865 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1866
1867 // Use depth first search to check if any non-Phi input is unknown.
1868 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1869 phi_placeholders_to_materialize->ClearAllBits();
1870 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
1871 work_queue.push_back(phi_placeholder);
1872 while (!work_queue.empty()) {
1873 PhiPlaceholder current_phi_placeholder = work_queue.back();
1874 work_queue.pop_back();
1875 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
1876 // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
1877 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
1878 Value::Default()));
1879 continue;
1880 }
1881 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
1882 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1883 size_t idx = current_phi_placeholder.GetHeapLocation();
1884 if (current_block->IsLoopHeader()) {
1885 // If the index is defined inside the loop, it may reference different elements of the
1886 // array on each iteration. Since we do not track if all elements of an array are set
1887 // to the same value explicitly, the only known value in pre-header can be the default
1888 // value from NewArray or a Phi placeholder depending on a default value from some outer
1889 // loop pre-header. This Phi placeholder can be replaced only by the default value.
1890 HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
1891 if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
1892 if (can_use_default_or_phi &&
1893 TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
1894 type,
1895 phi_placeholders_to_materialize)) {
1896 continue;
1897 } else {
1898 return current_phi_placeholder; // Report the loop Phi placeholder.
1899 }
1900 }
1901 // A similar situation arises with the index defined outside the loop if we cannot use
1902 // default values or Phis, i.e. for vector loads, as we can only replace the Phi
1903 // placeholder with a single instruction defined before the loop.
1904 if (!can_use_default_or_phi) {
1905 DCHECK(index != nullptr); // Vector operations are array operations.
1906 if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
1907 phi_placeholders_to_materialize)) {
1908 continue;
1909 } else {
1910 return current_phi_placeholder; // Report the loop Phi placeholder.
1911 }
1912 }
1913 }
1914 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1915 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
1916 Value value = ReplacementOrValue(heap_values[idx].value);
1917 if (value.IsUnknown()) {
1918 // We cannot create a Phi for this loop Phi placeholder.
1919 return current_phi_placeholder; // Report the loop Phi placeholder.
1920 }
1921 // For arrays, the location may have been clobbered by writes to other locations
1922 // in a loop that LSA does not consider aliasing, such as `a[i]` and `a[i + 1]`.
1923 if (current_block->IsLoopHeader() &&
1924 predecessor != current_block->GetLoopInformation()->GetPreHeader() &&
1925 heap_location_collector_.GetHeapLocation(idx)->GetIndex() != nullptr) {
1926 for (size_t i = 0, size = heap_values.size(); i != size; ++i) {
1927 if (i != idx &&
1928 !heap_values[i].stored_by.IsUnknown() &&
1929 MayAliasOnBackEdge(current_block, idx, i)) {
1930 // We cannot create a Phi for this loop Phi placeholder.
1931 return current_phi_placeholder;
1932 }
1933 }
1934 }
1935 if (value.NeedsLoopPhi()) {
1936 // Visit the predecessor Phi placeholder if it's not visited yet.
1937 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
1938 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
1939 work_queue.push_back(value.GetPhiPlaceholder());
1940 LSE_VLOG << "For materialization of " << phi_placeholder
1941 << " we need to materialize " << value;
1942 }
1943 }
1944 }
1945 }
1946
1947 // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
1948 return std::nullopt;
1949 }
1950
MaterializeLoopPhis(const ScopedArenaVector<size_t> & phi_placeholder_indexes,DataType::Type type)1951 bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
1952 DataType::Type type) {
1953 return MaterializeLoopPhis(ArrayRef<const size_t>(phi_placeholder_indexes), type);
1954 }
1955
MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,DataType::Type type)1956 bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,
1957 DataType::Type type) {
1958 // Materialize all predecessors that do not need a loop Phi and determine if all inputs
1959 // other than loop Phis are the same.
1960 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1961 std::optional<Value> other_value = std::nullopt;
1962 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1963 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
1964 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
1965 DCHECK_GE(block->GetPredecessors().size(), 2u);
1966 size_t idx = phi_placeholder.GetHeapLocation();
1967 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1968 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1969 if (value.NeedsNonLoopPhi()) {
1970 DCHECK(current_phase_ == Phase::kLoadElimination) << current_phase_;
1971 MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
1972 value = Replacement(value);
1973 }
1974 if (!value.NeedsLoopPhi()) {
1975 if (!other_value) {
1976 // The first other value we found.
1977 other_value = value;
1978 } else if (!other_value->IsInvalid()) {
1979 // Check if the current `value` differs from the previous `other_value`.
1980 if (!value.Equals(*other_value)) {
1981 other_value = Value::Invalid();
1982 }
1983 }
1984 }
1985 }
1986 }
1987
1988 DCHECK(other_value.has_value());
1989 if (!other_value->IsInvalid()) {
1990 HInstruction* replacement =
1991 (other_value->IsDefault()) ? GetDefaultValue(type) : other_value->GetInstruction();
1992 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1993 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1994 }
1995 return true;
1996 }
1997
1998 // If we're materializing only a single Phi, try to match it with an existing Phi.
1999 // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
2000 // This also covers the case when after replacing a previous set of Phi placeholders,
2001 // we continue with a Phi placeholder that does not really need a loop Phi anymore.
2002 if (phi_placeholder_indexes.size() == 1u) {
2003 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[0]);
2004 size_t idx = phi_placeholder.GetHeapLocation();
2005 HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder.GetBlockId()];
2006 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
2007 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
2008 HInstruction* phi = phi_it.Current();
2009 DCHECK_EQ(phi->InputCount(), predecessors.size());
2010 ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
2011 auto cmp = [=, this](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
2012 Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
2013 if (value.NeedsPhi()) {
2014 DCHECK(value.GetPhiPlaceholder() == phi_placeholder);
2015 return lhs.GetInstruction() == phi;
2016 } else {
2017 DCHECK(value.IsDefault() || value.IsInstruction());
2018 return value.Equals(lhs.GetInstruction());
2019 }
2020 };
2021 if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
2022 phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
2023 return true;
2024 }
2025 }
2026 }
2027
2028 if (current_phase_ == Phase::kStoreElimination) {
2029 // We're not creating Phis during the final store elimination phase.
2030 return false;
2031 }
2032
2033 // There are different inputs to the Phi chain. Create the Phis.
2034 ArenaAllocator* allocator = GetGraph()->GetAllocator();
2035 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2036 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2037 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2038 CHECK_GE(block->GetPredecessors().size(), 2u);
2039 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
2040 new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
2041 }
2042 // Fill the Phi inputs.
2043 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2044 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2045 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2046 size_t idx = phi_placeholder.GetHeapLocation();
2047 HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
2048 DCHECK(DataType::IsTypeConversionImplicit(type, phi->GetType()))
2049 << "type=" << type << " vs phi-type=" << phi->GetType();
2050 for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
2051 HBasicBlock* predecessor = block->GetPredecessors()[i];
2052 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2053 HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
2054 DCHECK_NE(input->GetType(), DataType::Type::kVoid);
2055 phi->SetRawInputAt(i, input);
2056 DCHECK(DataType::IsTypeConversionImplicit(input->GetType(), phi->GetType()))
2057 << " input: " << input->GetType() << value << " phi: " << phi->GetType()
2058 << " request: " << type;
2059 }
2060 }
2061 // Add the Phis to their blocks.
2062 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2063 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2064 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2065 block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
2066 }
2067 if (type == DataType::Type::kReference) {
2068 ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
2069 ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
2070 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2071 phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
2072 }
2073 // Update reference type information. Pass invalid handles, these are not used for Phis.
2074 ReferenceTypePropagation rtp_fixup(GetGraph(),
2075 Handle<mirror::DexCache>(),
2076 /* is_first_run= */ false);
2077 rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
2078 }
2079
2080 return true;
2081 }
2082
MaterializeLoopPhis(const ArenaBitVector & phi_placeholders_to_materialize,DataType::Type type)2083 bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
2084 DataType::Type type) {
2085 // Use local allocator to reduce peak memory usage.
2086 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2087
2088 // We want to recognize when a subset of these loop Phis that do not need other
2089 // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
2090 // i.e. that instruction can be used instead of each Phi in the set. See for example
2091 // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
2092 // materialize these loop Phis from the smallest transitive closure.
2093
2094 // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
2095 // assign new indexes to the Phi placeholders, making the matrix dense.
2096 ScopedArenaVector<size_t> matrix_indexes(num_phi_placeholders_,
2097 static_cast<size_t>(-1), // Invalid.
2098 allocator.Adapter(kArenaAllocLSE));
2099 ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
2100 size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
2101 phi_placeholder_indexes.reserve(num_phi_placeholders);
2102 for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
2103 matrix_indexes[marker_index] = phi_placeholder_indexes.size();
2104 phi_placeholder_indexes.push_back(marker_index);
2105 }
2106 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2107 ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
2108 dependencies.reserve(num_phi_placeholders);
2109 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2110 static constexpr bool kExpandable = false;
2111 dependencies.push_back(
2112 ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
2113 ArenaBitVector* current_dependencies = dependencies.back();
2114 current_dependencies->SetBit(matrix_index); // Count the Phi placeholder as its own dependency.
2115 PhiPlaceholder current_phi_placeholder =
2116 GetPhiPlaceholderAt(phi_placeholder_indexes[matrix_index]);
2117 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2118 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2119 size_t idx = current_phi_placeholder.GetHeapLocation();
2120 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2121 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2122 if (pred_value.NeedsLoopPhi()) {
2123 size_t pred_value_index = PhiPlaceholderIndex(pred_value);
2124 DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
2125 DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
2126 current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
2127 }
2128 }
2129 }
2130
2131 // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
2132 for (size_t k = 0; k != num_phi_placeholders; ++k) {
2133 for (size_t i = 0; i != num_phi_placeholders; ++i) {
2134 for (size_t j = 0; j != num_phi_placeholders; ++j) {
2135 if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
2136 dependencies[i]->SetBit(j);
2137 }
2138 }
2139 }
2140 }
2141
2142 // Count the number of transitive dependencies for each replaceable Phi placeholder.
2143 ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
2144 num_dependencies.reserve(num_phi_placeholders);
2145 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2146 num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
2147 }
2148
2149 // Pick a Phi placeholder with the smallest number of transitive dependencies and
2150 // materialize it and its dependencies. Repeat until we have materialized all.
2151 ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
2152 current_subset.reserve(num_phi_placeholders);
2153 size_t remaining_phi_placeholders = num_phi_placeholders;
2154 while (remaining_phi_placeholders != 0u) {
2155 auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
2156 DCHECK_LE(*it, remaining_phi_placeholders);
2157 size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
2158 ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
2159 size_t current_num_dependencies = num_dependencies[current_matrix_index];
2160 current_subset.clear();
2161 for (uint32_t matrix_index : current_dependencies->Indexes()) {
2162 current_subset.push_back(phi_placeholder_indexes[matrix_index]);
2163 }
2164 if (!MaterializeLoopPhis(current_subset, type)) {
2165 DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2166 // This is the final store elimination phase and we shall not be able to eliminate any
2167 // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
2168 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2169 if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2170 DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
2171 phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] = Value::Unknown();
2172 }
2173 }
2174 return false;
2175 }
2176 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2177 if (current_dependencies->IsBitSet(matrix_index)) {
2178 // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
2179 // so that they shall never be the minimum again.
2180 num_dependencies[matrix_index] = num_phi_placeholders;
2181 } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2182 // Remove dependencies from other Phi placeholders.
2183 dependencies[matrix_index]->Subtract(current_dependencies);
2184 num_dependencies[matrix_index] -= current_num_dependencies;
2185 }
2186 }
2187 remaining_phi_placeholders -= current_num_dependencies;
2188 }
2189 return true;
2190 }
2191
FullyMaterializePhi(PhiPlaceholder phi_placeholder,DataType::Type type)2192 bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) {
2193 ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
2194 ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE);
2195 auto res =
2196 FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true);
2197 CHECK(!res.has_value()) << *res;
2198 return MaterializeLoopPhis(abv, type);
2199 }
2200
TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,HInstruction * load)2201 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::TryToMaterializeLoopPhis(
2202 PhiPlaceholder phi_placeholder, HInstruction* load) {
2203 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2204
2205 // Use local allocator to reduce peak memory usage.
2206 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2207
2208 // Find Phi placeholders to materialize.
2209 ArenaBitVector phi_placeholders_to_materialize(
2210 &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2211 DataType::Type type = load->GetType();
2212 bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
2213 std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2214 phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
2215 if (loop_phi_with_unknown_input) {
2216 DCHECK_GE(GetGraph()
2217 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2218 ->GetPredecessors()
2219 .size(),
2220 2u);
2221 return loop_phi_with_unknown_input; // Return failure.
2222 }
2223
2224 DCHECK_EQ(current_phase_, Phase::kLoadElimination);
2225 bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2226 DCHECK(success);
2227
2228 // Report success.
2229 return std::nullopt;
2230 }
2231
2232 // Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
2233 // find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
2234 // propagate the load(s) as the new value(s) to successors; this may uncover new elimination
2235 // opportunities. If we find no such load, we shall at least propagate an unknown value to some
2236 // heap location that is needed by another loop Phi placeholder.
ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input)2237 void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input) {
2238 DCHECK(!loads_requiring_loop_phi_.empty());
2239 size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
2240 DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
2241 phi_placeholder_replacements_[loop_phi_with_unknown_input_index] = Value::Unknown();
2242
2243 uint32_t block_id = loop_phi_with_unknown_input.GetBlockId();
2244 const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
2245 size_t reverse_post_order_index = 0;
2246 size_t reverse_post_order_size = reverse_post_order.size();
2247 size_t loads_and_stores_index = 0u;
2248 size_t loads_and_stores_size = loads_and_stores_.size();
2249
2250 // Skip blocks and instructions before the block containing the loop phi with unknown input.
2251 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2252 while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
2253 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2254 while (loads_and_stores_index != loads_and_stores_size &&
2255 loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
2256 ++loads_and_stores_index;
2257 }
2258 ++reverse_post_order_index;
2259 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2260 }
2261
2262 // Use local allocator to reduce peak memory usage.
2263 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2264 // Reuse one temporary vector for all remaining blocks.
2265 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2266 ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
2267
2268 auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
2269 Value value;
2270 if (block->IsLoopHeader()) {
2271 if (block->GetLoopInformation()->IsIrreducible()) {
2272 value = Value::Unknown();
2273 } else {
2274 value = PrepareLoopValue(block, idx);
2275 }
2276 } else {
2277 value = MergePredecessorValues(block, idx);
2278 }
2279 DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
2280 return value;
2281 };
2282
2283 // Process remaining blocks and instructions.
2284 bool found_unreplaceable_load = false;
2285 bool replaced_heap_value_with_unknown = false;
2286 for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
2287 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2288 if (block->IsExitBlock()) {
2289 continue;
2290 }
2291
2292 // We shall reconstruct only the heap values that we need for processing loads and stores.
2293 local_heap_values.clear();
2294 local_heap_values.resize(num_heap_locations, Value::Invalid());
2295
2296 for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
2297 HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
2298 size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
2299 if (load_or_store->GetBlock() != block) {
2300 break; // End of instructions from the current block.
2301 }
2302 if (IsStore(load_or_store)) {
2303 StoreRecord* store_record = store_records_[load_or_store->GetId()];
2304 DCHECK(store_record != nullptr);
2305 HInstruction* stored_value = store_record->stored_value;
2306 DCHECK(stored_value != nullptr);
2307 // Note that the `stored_value` can be a newly created `Phi` with an id that falls
2308 // outside the allocated `loads_requiring_loop_phi_` range.
2309 DCHECK_IMPLIES(
2310 IsLoad(stored_value),
2311 static_cast<size_t>(stored_value->GetId()) < loads_requiring_loop_phi_.size());
2312 if (static_cast<size_t>(stored_value->GetId()) >= loads_requiring_loop_phi_.size() ||
2313 loads_requiring_loop_phi_[stored_value->GetId()] == nullptr) {
2314 continue; // This store never needed a loop Phi.
2315 }
2316 ValueRecord* record = loads_requiring_loop_phi_[stored_value->GetId()];
2317 // Process the store by updating `local_heap_values[idx]`. The last update shall
2318 // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
2319 // at the end of the block.
2320 Value replacement = ReplacementOrValue(record->value);
2321 if (replacement.NeedsLoopPhi()) {
2322 // No replacement yet, use the Phi placeholder from the load.
2323 DCHECK(record->value.NeedsLoopPhi());
2324 local_heap_values[idx] = record->value;
2325 } else {
2326 // If the load fetched a known value, use it, otherwise use the load.
2327 local_heap_values[idx] = Value::ForInstruction(
2328 replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
2329 }
2330 } else {
2331 // Process the load unless it has previously been marked unreplaceable.
2332 DCHECK(IsLoad(load_or_store));
2333 ValueRecord* record = loads_requiring_loop_phi_[load_or_store->GetId()];
2334 if (record == nullptr) {
2335 continue; // This load never needed a loop Phi.
2336 }
2337 if (record->value.NeedsLoopPhi()) {
2338 if (local_heap_values[idx].IsInvalid()) {
2339 local_heap_values[idx] = get_initial_value(block, idx);
2340 }
2341 if (local_heap_values[idx].IsUnknown()) {
2342 // This load cannot be replaced. Keep stores that feed the Phi placeholder
2343 // (no aliasing since then, otherwise the Phi placeholder would not have been
2344 // propagated as a value to this load) and store the load as the new heap value.
2345 found_unreplaceable_load = true;
2346 KeepStores(record->value);
2347 record->value = Value::Unknown();
2348 local_heap_values[idx] = Value::ForInstruction(load_or_store);
2349 } else if (local_heap_values[idx].NeedsLoopPhi()) {
2350 // The load may still be replaced with a Phi later.
2351 DCHECK(local_heap_values[idx].Equals(record->value));
2352 } else {
2353 // This load can be eliminated but we may need to construct non-loop Phis.
2354 if (local_heap_values[idx].NeedsNonLoopPhi()) {
2355 MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
2356 load_or_store->GetType());
2357 local_heap_values[idx] = Replacement(local_heap_values[idx]);
2358 }
2359 record->value = local_heap_values[idx];
2360 DCHECK(local_heap_values[idx].IsDefault() || local_heap_values[idx].IsInstruction())
2361 << "The replacement heap value can be an HIR instruction or the default value.";
2362 HInstruction* heap_value = local_heap_values[idx].IsDefault() ?
2363 GetDefaultValue(load_or_store->GetType()) :
2364 local_heap_values[idx].GetInstruction();
2365 AddRemovedLoad(load_or_store, heap_value);
2366 }
2367 }
2368 }
2369 }
2370
2371 // All heap values that previously needed a loop Phi at the end of the block
2372 // need to be updated for processing successors.
2373 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
2374 for (size_t idx = 0; idx != num_heap_locations; ++idx) {
2375 if (heap_values[idx].value.NeedsLoopPhi()) {
2376 if (local_heap_values[idx].IsValid()) {
2377 heap_values[idx].value = local_heap_values[idx];
2378 } else {
2379 heap_values[idx].value = get_initial_value(block, idx);
2380 }
2381 if (heap_values[idx].value.IsUnknown()) {
2382 replaced_heap_value_with_unknown = true;
2383 }
2384 }
2385 }
2386 }
2387 DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2388 }
2389
ProcessLoadsRequiringLoopPhis()2390 void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2391 // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2392 // make the result of the processing depend on the order in which we process these loads.
2393 // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2394 // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2395 if (loads_requiring_loop_phi_.empty()) {
2396 return; // No loads to process.
2397 }
2398 for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2399 ValueRecord* record = loads_requiring_loop_phi_[load_store_record.load_or_store->GetId()];
2400 if (record == nullptr) {
2401 continue;
2402 }
2403 HInstruction* load = load_store_record.load_or_store;
2404 while (record->value.NeedsLoopPhi() &&
2405 phi_placeholder_replacements_[PhiPlaceholderIndex(record->value)].IsInvalid()) {
2406 std::optional<PhiPlaceholder> loop_phi_with_unknown_input =
2407 TryToMaterializeLoopPhis(record->value.GetPhiPlaceholder(), load);
2408 DCHECK_EQ(loop_phi_with_unknown_input.has_value(),
2409 phi_placeholder_replacements_[PhiPlaceholderIndex(record->value)].IsInvalid());
2410 if (loop_phi_with_unknown_input) {
2411 DCHECK_GE(GetGraph()
2412 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2413 ->GetPredecessors()
2414 .size(),
2415 2u);
2416 ProcessLoopPhiWithUnknownInput(*loop_phi_with_unknown_input);
2417 }
2418 }
2419 // The load could have been marked as unreplaceable (and stores marked for keeping)
2420 // or marked for replacement with an instruction in ProcessLoopPhiWithUnknownInput().
2421 DCHECK(record->value.IsUnknown() ||
2422 record->value.IsInstruction() ||
2423 record->value.NeedsLoopPhi());
2424 if (record->value.NeedsLoopPhi()) {
2425 record->value = Replacement(record->value);
2426 HInstruction* heap_value = record->value.GetInstruction();
2427 AddRemovedLoad(load, heap_value);
2428 }
2429 }
2430 }
2431
SearchPhiPlaceholdersForKeptStores()2432 void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2433 ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2434 size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2435 work_queue.reserve(((start_size * 3u) + 1u) / 2u); // Reserve 1.5x start size, rounded up.
2436 for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2437 work_queue.push_back(index);
2438 }
2439 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2440 while (!work_queue.empty()) {
2441 uint32_t cur_phi_idx = work_queue.back();
2442 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx);
2443 work_queue.pop_back();
2444 size_t idx = phi_placeholder.GetHeapLocation();
2445 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2446 DCHECK(block != nullptr) << cur_phi_idx << " phi: " << phi_placeholder
2447 << " (blocks: " << blocks.size() << ")";
2448 for (HBasicBlock* predecessor : block->GetPredecessors()) {
2449 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2450 // For loop back-edges we must also preserve all stores to locations that
2451 // may alias with the location `idx`.
2452 // TODO: Add tests cases around this.
2453 bool is_back_edge =
2454 block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2455 size_t start = is_back_edge ? 0u : idx;
2456 size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2457 for (size_t i = start; i != end; ++i) {
2458 Value stored_by = heap_values[i].stored_by;
2459 if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) {
2460 if (stored_by.NeedsPhi()) {
2461 size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
2462 if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2463 phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2464 work_queue.push_back(phi_placeholder_index);
2465 }
2466 } else {
2467 DCHECK(IsStore(stored_by.GetInstruction()));
2468 ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
2469 DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
2470 << " id: " << stored_by.GetInstruction()->GetId() << " block: "
2471 << stored_by.GetInstruction()->GetBlock()->GetBlockId();
2472 kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2473 }
2474 }
2475 }
2476 }
2477 }
2478 }
2479
UpdateValueRecordForStoreElimination(ValueRecord * value_record)2480 void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2481 while (value_record->stored_by.IsInstruction() &&
2482 !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2483 StoreRecord* store_record = store_records_[value_record->stored_by.GetInstruction()->GetId()];
2484 DCHECK(store_record != nullptr);
2485 *value_record = store_record->old_value_record;
2486 }
2487 if (value_record->stored_by.NeedsPhi() &&
2488 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2489 PhiPlaceholderIndex(value_record->stored_by))) {
2490 // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2491 // Phi placeholder to recalculate the actual value.
2492 value_record->value = value_record->stored_by;
2493 }
2494 value_record->value = ReplacementOrValue(value_record->value);
2495 if (value_record->value.NeedsNonLoopPhi()) {
2496 // Treat all Phi placeholders as requiring loop Phis at this point.
2497 // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2498 value_record->value = Value::ForLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2499 }
2500 }
2501
FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,DataType::Type type)2502 void LSEVisitor::FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,
2503 DataType::Type type) {
2504 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2505
2506 // Use local allocator to reduce peak memory usage.
2507 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2508 ArenaBitVector visited(&allocator,
2509 /*start_bits=*/ num_phi_placeholders_,
2510 /*expandable=*/ false,
2511 kArenaAllocLSE);
2512
2513 // Find Phi placeholders to try and match against existing Phis or other replacement values.
2514 ArenaBitVector phi_placeholders_to_materialize(
2515 &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2516 std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2517 phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/true);
2518 if (loop_phi_with_unknown_input) {
2519 DCHECK_GE(GetGraph()
2520 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2521 ->GetPredecessors()
2522 .size(),
2523 2u);
2524 // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2525 phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::Unknown();
2526 phi_placeholder_replacements_[PhiPlaceholderIndex(*loop_phi_with_unknown_input)] =
2527 Value::Unknown();
2528 return;
2529 }
2530
2531 DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2532 bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2533 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2534 DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2535 !success);
2536 }
2537
FindStoresWritingOldValues()2538 void LSEVisitor::FindStoresWritingOldValues() {
2539 // The Phi placeholder replacements have so far been used for eliminating loads,
2540 // tracking values that would be stored if all stores were kept. As we want to
2541 // compare actual old values after removing unmarked stores, prune the Phi
2542 // placeholder replacements that can be fed by values we may not actually store.
2543 // Replacements marked as unknown can be kept as they are fed by some unknown
2544 // value and would end up as unknown again if we recalculated them.
2545 for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2546 if (!phi_placeholder_replacements_[i].IsUnknown() &&
2547 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2548 phi_placeholder_replacements_[i] = Value::Invalid();
2549 }
2550 }
2551
2552 // Update heap values at end of blocks.
2553 for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2554 for (ValueRecord& value_record : heap_values_for_[block->GetBlockId()]) {
2555 UpdateValueRecordForStoreElimination(&value_record);
2556 }
2557 }
2558
2559 // Use local allocator to reduce peak memory usage.
2560 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2561 // Mark the stores we want to eliminate in a separate bit vector.
2562 ArenaBitVector eliminated_stores(&allocator,
2563 /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2564 /*expandable=*/ false,
2565 kArenaAllocLSE);
2566
2567 for (uint32_t store_id : kept_stores_.Indexes()) {
2568 DCHECK(kept_stores_.IsBitSet(store_id));
2569 StoreRecord* store_record = store_records_[store_id];
2570 DCHECK(store_record != nullptr);
2571 UpdateValueRecordForStoreElimination(&store_record->old_value_record);
2572 if (store_record->old_value_record.value.NeedsPhi()) {
2573 DataType::Type type = store_record->stored_value->GetType();
2574 FindOldValueForPhiPlaceholder(store_record->old_value_record.value.GetPhiPlaceholder(), type);
2575 store_record->old_value_record.value =
2576 ReplacementOrValue(store_record->old_value_record.value);
2577 }
2578 DCHECK(!store_record->old_value_record.value.NeedsPhi());
2579 HInstruction* stored_value = FindSubstitute(store_record->stored_value);
2580 if (store_record->old_value_record.value.Equals(stored_value)) {
2581 eliminated_stores.SetBit(store_id);
2582 }
2583 }
2584
2585 // Commit the stores to eliminate by removing them from `kept_stores_`.
2586 kept_stores_.Subtract(&eliminated_stores);
2587 }
2588
Run()2589 void LSEVisitor::Run() {
2590 // 0. Set HasMonitorOperations to false. If we encounter some MonitorOperations that we can't
2591 // remove, we will set it to true in VisitMonitorOperation.
2592 GetGraph()->SetHasMonitorOperations(false);
2593
2594 // 1. Process blocks and instructions in reverse post order.
2595 for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2596 VisitBasicBlock(block);
2597 }
2598
2599 // 2. Process loads that require loop Phis, trying to find/create replacements.
2600 current_phase_ = Phase::kLoadElimination;
2601 ProcessLoadsRequiringLoopPhis();
2602
2603 // 3. Determine which stores to keep and which to eliminate.
2604 current_phase_ = Phase::kStoreElimination;
2605 // Finish marking stores for keeping.
2606 SearchPhiPlaceholdersForKeptStores();
2607
2608 // Find stores that write the same value as is already present in the location.
2609 FindStoresWritingOldValues();
2610
2611 // 4. Replace loads and remove unnecessary stores and singleton allocations.
2612 FinishFullLSE();
2613 }
2614
2615
FinishFullLSE()2616 void LSEVisitor::FinishFullLSE() {
2617 // Remove recorded load instructions that should be eliminated.
2618 for (const LoadStoreRecord& record : loads_and_stores_) {
2619 size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
2620 HInstruction* substitute = substitute_instructions_for_loads_[id];
2621 if (substitute == nullptr) {
2622 continue;
2623 }
2624 HInstruction* load = record.load_or_store;
2625 DCHECK(load != nullptr);
2626 DCHECK(IsLoad(load));
2627 DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
2628 // We proactively retrieve the substitute for a removed load, so
2629 // a load that has a substitute should not be observed as a heap
2630 // location value.
2631 DCHECK_EQ(FindSubstitute(substitute), substitute);
2632
2633 load->ReplaceWith(substitute);
2634 load->GetBlock()->RemoveInstruction(load);
2635 if ((load->IsInstanceFieldGet() && load->AsInstanceFieldGet()->IsVolatile()) ||
2636 (load->IsStaticFieldGet() && load->AsStaticFieldGet()->IsVolatile())) {
2637 MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileLoad);
2638 }
2639 }
2640
2641 // Remove all the stores we can.
2642 for (const LoadStoreRecord& record : loads_and_stores_) {
2643 if (IsStore(record.load_or_store) && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
2644 record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
2645 if ((record.load_or_store->IsInstanceFieldSet() &&
2646 record.load_or_store->AsInstanceFieldSet()->IsVolatile()) ||
2647 (record.load_or_store->IsStaticFieldSet() &&
2648 record.load_or_store->AsStaticFieldSet()->IsVolatile())) {
2649 MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileStore);
2650 }
2651 }
2652 }
2653
2654 // Eliminate singleton-classified instructions:
2655 // * - Constructor fences (they never escape this thread).
2656 // * - Allocations (if they are unused).
2657 for (HInstruction* new_instance : singleton_new_instances_) {
2658 size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
2659 MaybeRecordStat(stats_,
2660 MethodCompilationStat::kConstructorFenceRemovedLSE,
2661 removed);
2662
2663 if (!new_instance->HasNonEnvironmentUses()) {
2664 new_instance->RemoveEnvironmentUsers();
2665 new_instance->GetBlock()->RemoveInstruction(new_instance);
2666 MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
2667 }
2668 }
2669 }
2670
2671 // The LSEVisitor is a ValueObject (indirectly through base classes) and therefore
2672 // cannot be directly allocated with an arena allocator, so we need to wrap it.
2673 class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
2674 public:
LSEVisitorWrapper(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)2675 LSEVisitorWrapper(HGraph* graph,
2676 const HeapLocationCollector& heap_location_collector,
2677 OptimizingCompilerStats* stats)
2678 : lse_visitor_(graph, heap_location_collector, stats) {}
2679
Run()2680 void Run() {
2681 lse_visitor_.Run();
2682 }
2683
2684 private:
2685 LSEVisitor lse_visitor_;
2686 };
2687
Run()2688 bool LoadStoreElimination::Run() {
2689 if (graph_->IsDebuggable()) {
2690 // Debugger may set heap values or trigger deoptimization of callers.
2691 // Skip this optimization.
2692 return false;
2693 }
2694 ScopedArenaAllocator allocator(graph_->GetArenaStack());
2695 LoadStoreAnalysis lsa(graph_, stats_, &allocator);
2696 lsa.Run();
2697 const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
2698 if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
2699 // No HeapLocation information from LSA, skip this optimization.
2700 return false;
2701 }
2702
2703 // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
2704 // memory operations with different predicates.
2705 // TODO: support predicated SIMD.
2706 if (graph_->HasPredicatedSIMD()) {
2707 return false;
2708 }
2709
2710 std::unique_ptr<LSEVisitorWrapper> lse_visitor(
2711 new (&allocator) LSEVisitorWrapper(graph_, heap_location_collector, stats_));
2712 lse_visitor->Run();
2713 return true;
2714 }
2715
2716 #undef LSE_VLOG
2717
2718 } // namespace art
2719