summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc217
-rw-r--r--compiler/optimizing/code_generator.cc77
-rw-r--r--compiler/optimizing/code_generator.h35
-rw-r--r--compiler/optimizing/code_generator_arm.cc774
-rw-r--r--compiler/optimizing/code_generator_arm.h35
-rw-r--r--compiler/optimizing/code_generator_arm64.cc548
-rw-r--r--compiler/optimizing/code_generator_arm64.h276
-rw-r--r--compiler/optimizing/code_generator_mips.cc640
-rw-r--r--compiler/optimizing/code_generator_mips.h48
-rw-r--r--compiler/optimizing/code_generator_mips64.cc72
-rw-r--r--compiler/optimizing/code_generator_mips64.h1
-rw-r--r--compiler/optimizing/code_generator_x86.cc264
-rw-r--r--compiler/optimizing/code_generator_x86.h13
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc256
-rw-r--r--compiler/optimizing/code_generator_x86_64.h9
-rw-r--r--compiler/optimizing/codegen_test.cc67
-rw-r--r--compiler/optimizing/common_arm64.h157
-rw-r--r--compiler/optimizing/dead_code_elimination.cc239
-rw-r--r--compiler/optimizing/dead_code_elimination.h11
-rw-r--r--compiler/optimizing/dex_cache_array_fixups_arm.h4
-rw-r--r--compiler/optimizing/dex_cache_array_fixups_mips.cc44
-rw-r--r--compiler/optimizing/dex_cache_array_fixups_mips.h13
-rw-r--r--compiler/optimizing/graph_visualizer.cc16
-rw-r--r--compiler/optimizing/induction_var_analysis.h2
-rw-r--r--compiler/optimizing/inliner.cc65
-rw-r--r--compiler/optimizing/instruction_builder.cc4
-rw-r--r--compiler/optimizing/instruction_builder.h2
-rw-r--r--compiler/optimizing/instruction_simplifier.cc49
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.cc42
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.h6
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc70
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h9
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.cc55
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.h5
-rw-r--r--compiler/optimizing/intrinsics.h3
-rw-r--r--compiler/optimizing/intrinsics_arm.cc547
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc660
-rw-r--r--compiler/optimizing/intrinsics_arm64.h5
-rw-r--r--compiler/optimizing/intrinsics_mips.cc23
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc105
-rw-r--r--compiler/optimizing/intrinsics_x86.cc606
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc493
-rw-r--r--compiler/optimizing/locations.h37
-rw-r--r--compiler/optimizing/nodes.cc19
-rw-r--r--compiler/optimizing/nodes.h47
-rw-r--r--compiler/optimizing/nodes_arm64.h26
-rw-r--r--compiler/optimizing/nodes_shared.h28
-rw-r--r--compiler/optimizing/optimization.h5
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc18
-rw-r--r--compiler/optimizing/optimizing_compiler.cc284
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h2
-rw-r--r--compiler/optimizing/pc_relative_fixups_mips.cc37
-rw-r--r--compiler/optimizing/pc_relative_fixups_mips.h2
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc1
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.h4
-rw-r--r--compiler/optimizing/reference_type_propagation.cc3
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc653
-rw-r--r--compiler/optimizing/register_allocation_resolver.h98
-rw-r--r--compiler/optimizing/register_allocator.cc1829
-rw-r--r--compiler/optimizing/register_allocator.h211
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc2105
-rw-r--r--compiler/optimizing/register_allocator_graph_color.h205
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc1225
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.h188
-rw-r--r--compiler/optimizing/register_allocator_test.cc170
-rw-r--r--compiler/optimizing/sharpening.cc39
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc21
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h74
-rw-r--r--compiler/optimizing/x86_memory_gen.cc84
-rw-r--r--compiler/optimizing/x86_memory_gen.h46
70 files changed, 9852 insertions, 4176 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 1fc247faf1..8aefd9ea1f 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -533,9 +533,6 @@ class BCEVisitor : public HGraphVisitor {
first_index_bounds_check_map_(
std::less<int>(),
graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
- dynamic_bce_standby_(
- graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
- record_dynamic_bce_standby_(true),
early_exit_loop_(
std::less<uint32_t>(),
graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
@@ -560,14 +557,6 @@ class BCEVisitor : public HGraphVisitor {
}
void Finish() {
- // Retry dynamic bce candidates on standby that are still in the graph.
- record_dynamic_bce_standby_ = false;
- for (HBoundsCheck* bounds_check : dynamic_bce_standby_) {
- if (bounds_check->IsInBlock()) {
- TryDynamicBCE(bounds_check);
- }
- }
-
// Preserve SSA structure which may have been broken by adding one or more
// new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
InsertPhiNodes();
@@ -576,7 +565,6 @@ class BCEVisitor : public HGraphVisitor {
early_exit_loop_.clear();
taken_test_loop_.clear();
finite_loop_.clear();
- dynamic_bce_standby_.clear();
}
private:
@@ -832,7 +820,6 @@ class BCEVisitor : public HGraphVisitor {
array_length->IsArrayLength() ||
array_length->IsPhi());
bool try_dynamic_bce = true;
-
// Analyze index range.
if (!index->IsIntConstant()) {
// Non-constant index.
@@ -896,10 +883,20 @@ class BCEVisitor : public HGraphVisitor {
// If static analysis fails, and OOB is not certain, try dynamic elimination.
if (try_dynamic_bce) {
// Try loop-based dynamic elimination.
- if (TryDynamicBCE(bounds_check)) {
+ HLoopInformation* loop = bounds_check->GetBlock()->GetLoopInformation();
+ bool needs_finite_test = false;
+ bool needs_taken_test = false;
+ if (DynamicBCESeemsProfitable(loop, bounds_check->GetBlock()) &&
+ induction_range_.CanGenerateCode(
+ bounds_check, index, &needs_finite_test, &needs_taken_test) &&
+ CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+ // Do this test last, since it may generate code.
+ CanHandleLength(loop, array_length, needs_taken_test)) {
+ TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ TransformLoopForDynamicBCE(loop, bounds_check);
return;
}
- // Prepare dominator-based dynamic elimination.
+ // Otherwise, prepare dominator-based dynamic elimination.
if (first_index_bounds_check_map_.find(array_length->GetId()) ==
first_index_bounds_check_map_.end()) {
// Remember the first bounds check against each array_length. That bounds check
@@ -1180,7 +1177,7 @@ class BCEVisitor : public HGraphVisitor {
}
}
- // Perform dominator-based dynamic elimination on suitable set of bounds checks.
+ /** Performs dominator-based dynamic elimination on suitable set of bounds checks. */
void AddCompareWithDeoptimization(HBasicBlock* block,
HInstruction* array_length,
HInstruction* base,
@@ -1190,6 +1187,12 @@ class BCEVisitor : public HGraphVisitor {
// Construct deoptimization on single or double bounds on range [base-min_c,base+max_c],
// for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1
// and base+3, since we made the assumption any in between value may occur too.
+ // In code, using unsigned comparisons:
+ // (1) constants only
+ // if (max_c >= a.length) deoptimize;
+ // (2) general case
+ // if (base-min_c > base+max_c) deoptimize;
+ // if (base+max_c >= a.length ) deoptimize;
static_assert(kMaxLengthForAddingDeoptimize < std::numeric_limits<int32_t>::max(),
"Incorrect max length may be subject to arithmetic wrap-around");
HInstruction* upper = GetGraph()->GetIntConstant(max_c);
@@ -1208,7 +1211,7 @@ class BCEVisitor : public HGraphVisitor {
has_dom_based_dynamic_bce_ = true;
}
- // Attempt dominator-based dynamic elimination on remaining candidates.
+ /** Attempts dominator-based dynamic elimination on remaining candidates. */
void AddComparesWithDeoptimization(HBasicBlock* block) {
for (const auto& entry : first_index_bounds_check_map_) {
HBoundsCheck* bounds_check = entry.second;
@@ -1272,17 +1275,19 @@ class BCEVisitor : public HGraphVisitor {
candidates.push_back(other_bounds_check);
}
}
- // Perform dominator-based deoptimization if it seems profitable. Note that we reject cases
- // where the distance min_c:max_c range gets close to the maximum possible array length,
- // since those cases are likely to always deopt (such situations do not necessarily go
- // OOB, though, since the programmer could rely on wrap-around from max to min).
+ // Perform dominator-based deoptimization if it seems profitable, where we eliminate
+ // bounds checks and replace these with deopt checks that guard against any possible
+ // OOB. Note that we reject cases where the distance min_c:max_c range gets close to
+ // the maximum possible array length, since those cases are likely to always deopt
+ // (such situations do not necessarily go OOB, though, since the array could be really
+ // large, or the programmer could rely on arithmetic wrap-around from max to min).
size_t threshold = kThresholdForAddingDeoptimize + (base == nullptr ? 0 : 1); // extra test?
uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
if (candidates.size() >= threshold &&
(base != nullptr || min_c >= 0) && // reject certain OOB
distance <= kMaxLengthForAddingDeoptimize) { // reject likely/certain deopt
AddCompareWithDeoptimization(block, array_length, base, min_c, max_c);
- for (HInstruction* other_bounds_check : candidates) {
+ for (HBoundsCheck* other_bounds_check : candidates) {
// Only replace if still in the graph. This avoids visiting the same
// bounds check twice if it occurred multiple times in the use list.
if (other_bounds_check->IsInBlock()) {
@@ -1328,45 +1333,127 @@ class BCEVisitor : public HGraphVisitor {
}
/**
- * When the compiler fails to remove a bounds check statically, we try to remove the bounds
- * check dynamically by adding runtime tests that trigger a deoptimization in case bounds
- * will go out of range (we want to be rather certain of that given the slowdown of
- * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
- * bounds checks and related null checks removed.
+ * Performs loop-based dynamic elimination on a bounds check. In order to minimize the
+ * number of eventually generated tests, related bounds checks with tests that can be
+ * combined with tests for the given bounds check are collected first.
*/
- bool TryDynamicBCE(HBoundsCheck* instruction) {
- HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
- HInstruction* index = instruction->InputAt(0);
- HInstruction* length = instruction->InputAt(1);
- // If dynamic bounds check elimination seems profitable and is possible, then proceed.
- bool needs_finite_test = false;
- bool needs_taken_test = false;
- if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) &&
- induction_range_.CanGenerateCode(
- instruction, index, &needs_finite_test, &needs_taken_test) &&
- CanHandleInfiniteLoop(loop, instruction, index, needs_finite_test) &&
- CanHandleLength(loop, length, needs_taken_test)) { // do this test last (may code gen)
- HInstruction* lower = nullptr;
- HInstruction* upper = nullptr;
- // Generate the following unsigned comparisons
- // if (lower > upper) deoptimize;
- // if (upper >= length) deoptimize;
- // or, for a non-induction index, just the unsigned comparison on its 'upper' value
- // if (upper >= length) deoptimize;
- // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
- // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
- // correctly guard against any possible OOB (including arithmetic wrap-around cases).
- TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
- HBasicBlock* block = GetPreHeader(loop, instruction);
- induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
- if (lower != nullptr) {
- InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
- }
- InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
- ReplaceInstruction(instruction, index);
- return true;
+ void TransformLoopForDynamicBCE(HLoopInformation* loop, HBoundsCheck* bounds_check) {
+ HInstruction* index = bounds_check->InputAt(0);
+ HInstruction* array_length = bounds_check->InputAt(1);
+ DCHECK(loop->IsDefinedOutOfTheLoop(array_length)); // pre-checked
+ DCHECK(loop->DominatesAllBackEdges(bounds_check->GetBlock()));
+ // Collect all bounds checks in the same loop that are related as "a[base + constant]"
+ // for a base instruction (possibly absent) and various constants.
+ ValueBound value = ValueBound::AsValueBound(index);
+ HInstruction* base = value.GetInstruction();
+ int32_t min_c = base == nullptr ? 0 : value.GetConstant();
+ int32_t max_c = value.GetConstant();
+ ArenaVector<HBoundsCheck*> candidates(
+ GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+ ArenaVector<HBoundsCheck*> standby(
+ GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+ for (const HUseListNode<HInstruction*>& use : array_length->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (user->IsBoundsCheck() && loop == user->GetBlock()->GetLoopInformation()) {
+ HBoundsCheck* other_bounds_check = user->AsBoundsCheck();
+ HInstruction* other_index = other_bounds_check->InputAt(0);
+ HInstruction* other_array_length = other_bounds_check->InputAt(1);
+ ValueBound other_value = ValueBound::AsValueBound(other_index);
+ int32_t other_c = other_value.GetConstant();
+ if (array_length == other_array_length && base == other_value.GetInstruction()) {
+ // Does the current basic block dominate all back edges? If not,
+ // add this candidate later only if it falls into the range.
+ if (!loop->DominatesAllBackEdges(user->GetBlock())) {
+ standby.push_back(other_bounds_check);
+ continue;
+ }
+ min_c = std::min(min_c, other_c);
+ max_c = std::max(max_c, other_c);
+ candidates.push_back(other_bounds_check);
+ }
+ }
+ }
+ // Add standby candidates that fall in selected range.
+ for (HBoundsCheck* other_bounds_check : standby) {
+ HInstruction* other_index = other_bounds_check->InputAt(0);
+ int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+ if (min_c <= other_c && other_c <= max_c) {
+ candidates.push_back(other_bounds_check);
+ }
+ }
+ // Perform loop-based deoptimization if it seems profitable, where we eliminate bounds
+ // checks and replace these with deopt checks that guard against any possible OOB.
+ DCHECK_LT(0u, candidates.size());
+ uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
+ if ((base != nullptr || min_c >= 0) && // reject certain OOB
+ distance <= kMaxLengthForAddingDeoptimize) { // reject likely/certain deopt
+ HBasicBlock* block = GetPreHeader(loop, bounds_check);
+ HInstruction* min_lower = nullptr;
+ HInstruction* min_upper = nullptr;
+ HInstruction* max_lower = nullptr;
+ HInstruction* max_upper = nullptr;
+ // Iterate over all bounds checks.
+ for (HBoundsCheck* other_bounds_check : candidates) {
+ // Only handle if still in the graph. This avoids visiting the same
+ // bounds check twice if it occurred multiple times in the use list.
+ if (other_bounds_check->IsInBlock()) {
+ HInstruction* other_index = other_bounds_check->InputAt(0);
+ int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+ // Generate code for either the maximum or minimum. Range analysis already was queried
+ // whether code generation on the original and, thus, related bounds check was possible.
+ // It handles either loop invariants (lower is not set) or unit strides.
+ if (other_c == max_c) {
+ induction_range_.GenerateRangeCode(
+ other_bounds_check, other_index, GetGraph(), block, &max_lower, &max_upper);
+ } else if (other_c == min_c && base != nullptr) {
+ induction_range_.GenerateRangeCode(
+ other_bounds_check, other_index, GetGraph(), block, &min_lower, &min_upper);
+ }
+ ReplaceInstruction(other_bounds_check, other_index);
+ }
+ }
+ // In code, using unsigned comparisons:
+ // (1) constants only
+ // if (max_upper >= a.length ) deoptimize;
+ // (2) two symbolic invariants
+ // if (min_upper > max_upper) deoptimize; unless min_c == max_c
+ // if (max_upper >= a.length ) deoptimize;
+ // (3) general case, unit strides (where lower would exceed upper for arithmetic wrap-around)
+ // if (min_lower > max_lower) deoptimize; unless min_c == max_c
+ // if (max_lower > max_upper) deoptimize;
+ // if (max_upper >= a.length ) deoptimize;
+ if (base == nullptr) {
+ // Constants only.
+ DCHECK_GE(min_c, 0);
+ DCHECK(min_lower == nullptr && min_upper == nullptr &&
+ max_lower == nullptr && max_upper != nullptr);
+ } else if (max_lower == nullptr) {
+ // Two symbolic invariants.
+ if (min_c != max_c) {
+ DCHECK(min_lower == nullptr && min_upper != nullptr &&
+ max_lower == nullptr && max_upper != nullptr);
+ InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_upper, max_upper));
+ } else {
+ DCHECK(min_lower == nullptr && min_upper == nullptr &&
+ max_lower == nullptr && max_upper != nullptr);
+ }
+ } else {
+ // General case, unit strides.
+ if (min_c != max_c) {
+ DCHECK(min_lower != nullptr && min_upper != nullptr &&
+ max_lower != nullptr && max_upper != nullptr);
+ InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_lower, max_lower));
+ } else {
+ DCHECK(min_lower == nullptr && min_upper == nullptr &&
+ max_lower != nullptr && max_upper != nullptr);
+ }
+ InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(max_lower, max_upper));
+ }
+ InsertDeoptInLoop(
+ loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(max_upper, array_length));
+ } else {
+ // TODO: if rejected, avoid doing this again for subsequent instructions in this set?
}
- return false;
}
/**
@@ -1474,8 +1561,7 @@ class BCEVisitor : public HGraphVisitor {
* of the loop to use, dynamic bce in such cases is only allowed if other tests
* ensure the loop is finite.
*/
- bool CanHandleInfiniteLoop(
- HLoopInformation* loop, HBoundsCheck* check, HInstruction* index, bool needs_infinite_test) {
+ bool CanHandleInfiniteLoop(HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
if (needs_infinite_test) {
// If we already forced the loop to be finite, allow directly.
const uint32_t loop_id = loop->GetHeader()->GetBlockId();
@@ -1497,11 +1583,6 @@ class BCEVisitor : public HGraphVisitor {
}
}
}
- // If bounds check made it this far, it is worthwhile to check later if
- // the loop was forced finite by another candidate.
- if (record_dynamic_bce_standby_) {
- dynamic_bce_standby_.push_back(check);
- }
return false;
}
return true;
@@ -1727,10 +1808,6 @@ class BCEVisitor : public HGraphVisitor {
// in a block that checks an index against that HArrayLength.
ArenaSafeMap<int, HBoundsCheck*> first_index_bounds_check_map_;
- // Stand by list for dynamic bce.
- ArenaVector<HBoundsCheck*> dynamic_bce_standby_;
- bool record_dynamic_bce_standby_;
-
// Early-exit loop bookkeeping.
ArenaSafeMap<uint32_t, bool> early_exit_loop_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 4520f9b3e3..c532e72465 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -137,7 +137,7 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) {
size_t CodeGenerator::GetCachePointerOffset(uint32_t index) {
auto pointer_size = InstructionSetPointerSize(GetInstructionSet());
- return pointer_size * index;
+ return static_cast<size_t>(pointer_size) * index;
}
uint32_t CodeGenerator::GetArrayLengthOffset(HArrayLength* array_length) {
@@ -291,7 +291,8 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
DCHECK(!block_order.empty());
DCHECK(block_order[0] == GetGraph()->GetEntryBlock());
ComputeSpillMask();
- first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
+ first_register_slot_in_slow_path_ = RoundUp(
+ (number_of_out_slots + number_of_spill_slots) * kVRegSize, GetPreferredSlotsAlignment());
if (number_of_spill_slots == 0
&& !HasAllocatedCalleeSaveRegisters()
@@ -302,8 +303,7 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
} else {
SetFrameSize(RoundUp(
- number_of_spill_slots * kVRegSize
- + number_of_out_slots * kVRegSize
+ first_register_slot_in_slow_path_
+ maximum_number_of_live_core_registers * GetWordSize()
+ maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize()
+ FrameEntrySpillSize(),
@@ -314,7 +314,8 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
void CodeGenerator::CreateCommonInvokeLocationSummary(
HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) {
ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
- LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCall);
+ LocationSummary* locations = new (allocator) LocationSummary(invoke,
+ LocationSummary::kCallOnMainOnly);
for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
HInstruction* input = invoke->InputAt(i);
@@ -378,7 +379,7 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary(
ArenaAllocator* allocator = field_access->GetBlock()->GetGraph()->GetArena();
LocationSummary* locations =
- new (allocator) LocationSummary(field_access, LocationSummary::kCall);
+ new (allocator) LocationSummary(field_access, LocationSummary::kCallOnMainOnly);
locations->AddTemp(calling_convention.GetFieldIndexLocation());
@@ -499,7 +500,7 @@ void CodeGenerator::CreateLoadClassLocationSummary(HLoadClass* cls,
bool code_generator_supports_read_barrier) {
ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena();
LocationSummary::CallKind call_kind = cls->NeedsAccessCheck()
- ? LocationSummary::kCall
+ ? LocationSummary::kCallOnMainOnly
: (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) ||
cls->CanCallRuntime())
? LocationSummary::kCallOnSlowPath
@@ -764,16 +765,24 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
LocationSummary* locations = instruction->GetLocations();
uint32_t register_mask = locations->GetRegisterMask();
- if (locations->OnlyCallsOnSlowPath()) {
- // In case of slow path, we currently set the location of caller-save registers
- // to register (instead of their stack location when pushed before the slow-path
- // call). Therefore register_mask contains both callee-save and caller-save
- // registers that hold objects. We must remove the caller-save from the mask, since
- // they will be overwritten by the callee.
- register_mask &= core_callee_save_mask_;
+ if (instruction->IsSuspendCheck()) {
+ // Suspend check has special ABI that saves the caller-save registers in callee,
+ // so we want to emit stack maps containing the registers.
+ // TODO: Register allocator still reserves space for the caller-save registers.
+ // We should add slow-path-specific caller-save information into LocationSummary
+ // and refactor the code here as well as in the register allocator to use it.
+ } else {
+ if (locations->OnlyCallsOnSlowPath()) {
+ // In case of slow path, we currently set the location of caller-save registers
+ // to register (instead of their stack location when pushed before the slow-path
+ // call). Therefore register_mask contains both callee-save and caller-save
+ // registers that hold objects. We must remove the caller-save from the mask, since
+ // they will be overwritten by the callee.
+ register_mask &= core_callee_save_mask_;
+ }
+ // The register mask must be a subset of callee-save registers.
+ DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
}
- // The register mask must be a subset of callee-save registers.
- DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
stack_map_stream_.BeginStackMapEntry(outer_dex_pc,
native_pc,
register_mask,
@@ -1173,23 +1182,23 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod
<< "instruction->DebugName()=" << instruction->DebugName()
<< " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString();
} else {
- DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal())
+ DCHECK(instruction->GetLocations()->CallsOnSlowPath() || slow_path->IsFatal())
<< "instruction->DebugName()=" << instruction->DebugName()
<< " slow_path->GetDescription()=" << slow_path->GetDescription();
DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
- // When read barriers are enabled, some instructions use a
- // slow path to emit a read barrier, which does not trigger
- // GC, is not fatal, nor is emitted by HDeoptimize
- // instructions.
+ // When (non-Baker) read barriers are enabled, some instructions
+ // use a slow path to emit a read barrier, which does not trigger
+ // GC.
(kEmitCompilerReadBarrier &&
+ !kUseBakerReadBarrier &&
(instruction->IsInstanceFieldGet() ||
instruction->IsStaticFieldGet() ||
- instruction->IsArraySet() ||
instruction->IsArrayGet() ||
instruction->IsLoadClass() ||
instruction->IsLoadString() ||
instruction->IsInstanceOf() ||
- instruction->IsCheckCast())))
+ instruction->IsCheckCast() ||
+ (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))))
<< "instruction->DebugName()=" << instruction->DebugName()
<< " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString()
<< " slow_path->GetDescription()=" << slow_path->GetDescription();
@@ -1203,6 +1212,28 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod
<< instruction->DebugName() << ((slow_path != nullptr) ? slow_path->GetDescription() : "");
}
+void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath())
+ << "instruction->DebugName()=" << instruction->DebugName()
+ << " slow_path->GetDescription()=" << slow_path->GetDescription();
+ // Only the Baker read barrier marking slow path used by certains
+ // instructions is expected to invoke the runtime without recording
+ // PC-related information.
+ DCHECK(kUseBakerReadBarrier);
+ DCHECK(instruction->IsInstanceFieldGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsArrayGet() ||
+ instruction->IsLoadClass() ||
+ instruction->IsLoadString() ||
+ instruction->IsInstanceOf() ||
+ instruction->IsCheckCast() ||
+ (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()) ||
+ (instruction->IsInvokeStaticOrDirect() && instruction->GetLocations()->Intrinsified()))
+ << "instruction->DebugName()=" << instruction->DebugName()
+ << " slow_path->GetDescription()=" << slow_path->GetDescription();
+}
+
void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
RegisterSet* live_registers = locations->GetLiveRegisters();
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 9364be35ff..fd396c474c 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -22,6 +22,7 @@
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/bit_field.h"
+#include "base/enums.h"
#include "compiled_method.h"
#include "driver/compiler_options.h"
#include "globals.h"
@@ -80,7 +81,11 @@ class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
+ // Save live core and floating-point caller-save registers and
+ // update the stack mask in `locations` for registers holding object
+ // references.
virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
+ // Restore live core and floating-point caller-save registers.
virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
bool IsCoreRegisterSaved(int reg) const {
@@ -187,7 +192,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
// Note that this follows the current calling convention.
return GetFrameSize()
- + InstructionSetPointerSize(GetInstructionSet()) // Art method
+ + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet())) // Art method
+ parameter->GetIndex() * kVRegSize;
}
@@ -211,6 +216,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
size_t maximum_number_of_live_fpu_registers,
size_t number_of_out_slots,
const ArenaVector<HBasicBlock*>& block_order);
+ // Backends can override this as necessary. For most, no special alignment is required.
+ virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
uint32_t GetFrameSize() const { return frame_size_; }
void SetFrameSize(uint32_t size) { frame_size_ = size; }
@@ -333,6 +340,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
+ bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
+ bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
+
// Helper that returns the pointer offset of an index in an object array.
// Note: this method assumes we always have the same pointer size, regardless
// of the architecture.
@@ -350,6 +360,17 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// accessing the String's `value` field in String intrinsics.
static uint32_t GetArrayDataOffset(HArrayGet* array_get);
+ // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`.
+ template <PointerSize pointer_size>
+ static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) {
+ // The entry point list defines 30 ReadBarrierMarkRegX entry points.
+ DCHECK_LT(reg, 30u);
+ // The ReadBarrierMarkRegX entry points are ordered by increasing
+ // register number in Thread::tls_Ptr_.quick_entrypoints.
+ return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value()
+ + static_cast<size_t>(pointer_size) * reg;
+ }
+
void EmitParallelMoves(Location from1,
Location to1,
Primitive::Type type1,
@@ -363,8 +384,14 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
return type == Primitive::kPrimNot && !value->IsNullConstant();
}
+
+ // Perfoms checks pertaining to an InvokeRuntime call.
void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
+ // Perfoms checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
+ static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+ SlowPathCode* slow_path);
+
void AddAllocatedRegister(Location location) {
allocated_registers_.Add(location);
}
@@ -677,7 +704,7 @@ class CallingConvention {
size_t number_of_registers,
const F* fpu_registers,
size_t number_of_fpu_registers,
- size_t pointer_size)
+ PointerSize pointer_size)
: registers_(registers),
number_of_registers_(number_of_registers),
fpu_registers_(fpu_registers),
@@ -700,7 +727,7 @@ class CallingConvention {
size_t GetStackOffsetOf(size_t index) const {
// We still reserve the space for parameters passed by registers.
// Add space for the method pointer.
- return pointer_size_ + index * kVRegSize;
+ return static_cast<size_t>(pointer_size_) + index * kVRegSize;
}
private:
@@ -708,7 +735,7 @@ class CallingConvention {
const size_t number_of_registers_;
const F* fpu_registers_;
const size_t number_of_fpu_registers_;
- const size_t pointer_size_;
+ const PointerSize pointer_size_;
DISALLOW_COPY_AND_ASSIGN(CallingConvention);
};
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e441f825bc..404f044cef 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,9 +59,9 @@ static constexpr DRegister DTMP = D31;
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
class NullCheckSlowPathARM : public SlowPathCode {
public:
@@ -119,11 +119,9 @@ class SuspendCheckSlowPathARM : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
arm_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ b(GetReturnLabel());
} else {
@@ -316,7 +314,7 @@ class TypeCheckSlowPathARM : public SlowPathCode {
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<
- kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+ kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
} else {
DCHECK(instruction_->IsCheckCast());
@@ -412,8 +410,8 @@ class ArraySetSlowPathARM : public SlowPathCode {
// Slow path marking an object during a read barrier.
class ReadBarrierMarkSlowPathARM : public SlowPathCode {
public:
- ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj)
- : SlowPathCode(instruction), out_(out), obj_(obj) {
+ ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj)
+ : SlowPathCode(instruction), obj_(obj) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -421,9 +419,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
- Register reg_out = out_.AsRegister<Register>();
+ Register reg = obj_.AsRegister<Register>();
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
@@ -431,30 +429,45 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, locations);
-
- InvokeRuntimeCallingConvention calling_convention;
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
- arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
- instruction_,
- instruction_->GetDexPc(),
- this);
- CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
- arm_codegen->Move32(out_, Location::RegisterLocation(R0));
-
- RestoreLiveRegisters(codegen, locations);
+ DCHECK_NE(reg, SP);
+ DCHECK_NE(reg, LR);
+ DCHECK_NE(reg, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(reg, IP);
+ DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // R0 <- obj
+ // R0 <- ReadBarrierMark(R0)
+ // obj <- R0
+ //
+ // we just use rX (the register holding `obj`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(reg);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ b(GetExitLabel());
}
private:
- const Location out_;
const Location obj_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
@@ -500,8 +513,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
@@ -688,8 +700,8 @@ class ReadBarrierForRootSlowPathARM : public SlowPathCode {
};
#undef __
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT
inline Condition ARMCondition(IfCondition cond) {
switch (cond) {
@@ -956,7 +968,7 @@ void CodeGeneratorARM::GenerateFrameExit() {
if (fpu_spill_mask_ != 0) {
SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
__ vpops(start_register, POPCOUNT(fpu_spill_mask_));
- __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_));
+ __ cfi().AdjustCFAOffset(-static_cast<int>(kArmPointerSize) * POPCOUNT(fpu_spill_mask_));
__ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_);
}
// Pop LR into PC to return.
@@ -1208,7 +1220,7 @@ void CodeGeneratorARM::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kArmWordSize>(entrypoint).Int32Value(),
+ InvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value(),
instruction,
dex_pc,
slow_path);
@@ -1224,6 +1236,14 @@ void CodeGeneratorARM::InvokeRuntime(int32_t entry_point_offset,
RecordPcInfo(instruction, dex_pc, slow_path);
}
+void CodeGeneratorARM::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
+ __ blx(LR);
+}
+
void InstructionCodeGeneratorARM::HandleGoto(HInstruction* got, HBasicBlock* successor) {
DCHECK(!successor->IsExitBlock());
@@ -1271,6 +1291,44 @@ void LocationsBuilderARM::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
+void InstructionCodeGeneratorARM::GenerateVcmp(HInstruction* instruction) {
+ Primitive::Type type = instruction->InputAt(0)->GetType();
+ Location lhs_loc = instruction->GetLocations()->InAt(0);
+ Location rhs_loc = instruction->GetLocations()->InAt(1);
+ if (rhs_loc.IsConstant()) {
+ // 0.0 is the only immediate that can be encoded directly in
+ // a VCMP instruction.
+ //
+ // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+ // specify that in a floating-point comparison, positive zero
+ // and negative zero are considered equal, so we can use the
+ // literal 0.0 for both cases here.
+ //
+ // Note however that some methods (Float.equal, Float.compare,
+ // Float.compareTo, Double.equal, Double.compare,
+ // Double.compareTo, Math.max, Math.min, StrictMath.max,
+ // StrictMath.min) consider 0.0 to be (strictly) greater than
+ // -0.0. So if we ever translate calls to these methods into a
+ // HCompare instruction, we must handle the -0.0 case with
+ // care here.
+ DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+ if (type == Primitive::kPrimFloat) {
+ __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>());
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()));
+ }
+ } else {
+ if (type == Primitive::kPrimFloat) {
+ __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>());
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()),
+ FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>()));
+ }
+ }
+}
+
void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
Label* true_label,
Label* false_label ATTRIBUTE_UNUSED) {
@@ -1371,22 +1429,14 @@ void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condi
Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
- LocationSummary* locations = condition->GetLocations();
- Location left = locations->InAt(0);
- Location right = locations->InAt(1);
-
Primitive::Type type = condition->InputAt(0)->GetType();
switch (type) {
case Primitive::kPrimLong:
GenerateLongComparesAndJumps(condition, true_target, false_target);
break;
case Primitive::kPrimFloat:
- __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
- GenerateFPJumps(condition, true_target, false_target);
- break;
case Primitive::kPrimDouble:
- __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
- FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
+ GenerateVcmp(condition);
GenerateFPJumps(condition, true_target, false_target);
break;
default:
@@ -1567,7 +1617,7 @@ void LocationsBuilderARM::HandleCondition(HCondition* cond) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
if (!cond->IsEmittedAtUseSite()) {
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
@@ -1614,12 +1664,8 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
GenerateLongComparesAndJumps(cond, &true_label, &false_label);
break;
case Primitive::kPrimFloat:
- __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
- GenerateFPJumps(cond, &true_label, &false_label);
- break;
case Primitive::kPrimDouble:
- __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
- FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
+ GenerateVcmp(cond);
GenerateFPJumps(cond, &true_label, &false_label);
break;
}
@@ -1889,8 +1935,6 @@ void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke)
LocationSummary* locations = invoke->GetLocations();
Register temp = locations->GetTemp(0).AsRegister<Register>();
Register hidden_reg = locations->GetTemp(1).AsRegister<Register>();
- uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
- invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
Location receiver = locations->InAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -1916,10 +1960,14 @@ void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke)
// intact/accessible until the end of the marking phase (the
// concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
+ __ LoadFromOffset(kLoadWord, temp, temp,
+ mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ invoke->GetImtIndex(), kArmPointerSize));
// temp = temp->GetImtEntryAt(method_offset);
- uint32_t entry_point =
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value();
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
+ uint32_t entry_point =
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
// LR = temp->GetEntryPoint();
__ LoadFromOffset(kLoadWord, LR, temp, entry_point);
// LR();
@@ -2012,7 +2060,7 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) {
(((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
&& result_type == Primitive::kPrimLong)
|| (input_type == Primitive::kPrimLong && result_type == Primitive::kPrimFloat))
- ? LocationSummary::kCall
+ ? LocationSummary::kCallOnMainOnly
: LocationSummary::kNoCall;
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -2477,7 +2525,7 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -2514,13 +2562,18 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) {
break;
case Primitive::kPrimLong: {
- DCHECK(second.IsRegisterPair());
- __ adds(out.AsRegisterPairLow<Register>(),
- first.AsRegisterPairLow<Register>(),
- ShifterOperand(second.AsRegisterPairLow<Register>()));
- __ adc(out.AsRegisterPairHigh<Register>(),
- first.AsRegisterPairHigh<Register>(),
- ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ if (second.IsConstant()) {
+ uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+ GenerateAddLongConst(out, first, value);
+ } else {
+ DCHECK(second.IsRegisterPair());
+ __ adds(out.AsRegisterPairLow<Register>(),
+ first.AsRegisterPairLow<Register>(),
+ ShifterOperand(second.AsRegisterPairLow<Register>()));
+ __ adc(out.AsRegisterPairHigh<Register>(),
+ first.AsRegisterPairHigh<Register>(),
+ ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ }
break;
}
@@ -2554,7 +2607,7 @@ void LocationsBuilderARM::VisitSub(HSub* sub) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -2590,13 +2643,18 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) {
}
case Primitive::kPrimLong: {
- DCHECK(second.IsRegisterPair());
- __ subs(out.AsRegisterPairLow<Register>(),
- first.AsRegisterPairLow<Register>(),
- ShifterOperand(second.AsRegisterPairLow<Register>()));
- __ sbc(out.AsRegisterPairHigh<Register>(),
- first.AsRegisterPairHigh<Register>(),
- ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ if (second.IsConstant()) {
+ uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+ GenerateAddLongConst(out, first, -value);
+ } else {
+ DCHECK(second.IsRegisterPair());
+ __ subs(out.AsRegisterPairLow<Register>(),
+ first.AsRegisterPairLow<Register>(),
+ ShifterOperand(second.AsRegisterPairLow<Register>()));
+ __ sbc(out.AsRegisterPairHigh<Register>(),
+ first.AsRegisterPairHigh<Register>(),
+ ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ }
break;
}
@@ -2831,13 +2889,13 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
if (div->GetResultType() == Primitive::kPrimLong) {
// pLdiv runtime call.
- call_kind = LocationSummary::kCall;
+ call_kind = LocationSummary::kCallOnMainOnly;
} else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) {
// sdiv will be replaced by other instruction sequence.
} else if (div->GetResultType() == Primitive::kPrimInt &&
!codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
// pIdivmod runtime call.
- call_kind = LocationSummary::kCall;
+ call_kind = LocationSummary::kCallOnMainOnly;
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -2956,7 +3014,7 @@ void LocationsBuilderARM::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
// Most remainders are implemented in the runtime.
- LocationSummary::CallKind call_kind = LocationSummary::kCall;
+ LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) {
// sdiv will be replaced by other instruction sequence.
call_kind = LocationSummary::kNoCall;
@@ -3493,7 +3551,7 @@ void InstructionCodeGeneratorARM::VisitUShr(HUShr* ushr) {
void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
if (instruction->IsStringAlloc()) {
locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
} else {
@@ -3510,7 +3568,7 @@ void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
if (instruction->IsStringAlloc()) {
// String is allocated through StringFactory. Call NewEmptyString entry point.
Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize);
+ MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
__ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
__ LoadFromOffset(kLoadWord, LR, temp, code_offset.Int32Value());
__ blx(LR);
@@ -3526,7 +3584,7 @@ void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetOut(Location::RegisterLocation(R0));
@@ -3634,7 +3692,7 @@ void LocationsBuilderARM::VisitCompare(HCompare* compare) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
locations->SetOut(Location::RequiresRegister());
break;
}
@@ -3679,12 +3737,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
__ LoadImmediate(out, 0);
- if (type == Primitive::kPrimFloat) {
- __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
- } else {
- __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
- FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
- }
+ GenerateVcmp(compare);
__ vmstat(); // transfer FP status register to ARM APSR.
less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
break;
@@ -3978,6 +4031,17 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI
}
}
+Location LocationsBuilderARM::ArithmeticZeroOrFpuRegister(HInstruction* input) {
+ DCHECK(input->GetType() == Primitive::kPrimDouble || input->GetType() == Primitive::kPrimFloat)
+ << input->GetType();
+ if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
+ (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
+ return Location::ConstantLocation(input->AsConstant());
+ } else {
+ return Location::RequiresFpuRegister();
+ }
+}
+
Location LocationsBuilderARM::ArmEncodableConstantOrRegister(HInstruction* constant,
Opcode opcode) {
DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
@@ -3992,31 +4056,51 @@ bool LocationsBuilderARM::CanEncodeConstantAsImmediate(HConstant* input_cst,
Opcode opcode) {
uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
if (Primitive::Is64BitType(input_cst->GetType())) {
- return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) &&
- CanEncodeConstantAsImmediate(High32Bits(value), opcode);
+ Opcode high_opcode = opcode;
+ SetCc low_set_cc = kCcDontCare;
+ switch (opcode) {
+ case SUB:
+ // Flip the operation to an ADD.
+ value = -value;
+ opcode = ADD;
+ FALLTHROUGH_INTENDED;
+ case ADD:
+ if (Low32Bits(value) == 0u) {
+ return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare);
+ }
+ high_opcode = ADC;
+ low_set_cc = kCcSet;
+ break;
+ default:
+ break;
+ }
+ return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) &&
+ CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare);
} else {
return CanEncodeConstantAsImmediate(Low32Bits(value), opcode);
}
}
-bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) {
+bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value,
+ Opcode opcode,
+ SetCc set_cc) {
ShifterOperand so;
ArmAssembler* assembler = codegen_->GetAssembler();
- if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) {
+ if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, set_cc, &so)) {
return true;
}
Opcode neg_opcode = kNoOperand;
switch (opcode) {
- case AND:
- neg_opcode = BIC;
- break;
- case ORR:
- neg_opcode = ORN;
- break;
+ case AND: neg_opcode = BIC; value = ~value; break;
+ case ORR: neg_opcode = ORN; value = ~value; break;
+ case ADD: neg_opcode = SUB; value = -value; break;
+ case ADC: neg_opcode = SBC; value = ~value; break;
+ case SUB: neg_opcode = ADD; value = -value; break;
+ case SBC: neg_opcode = ADC; value = ~value; break;
default:
return false;
}
- return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so);
+ return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so);
}
void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
@@ -4264,6 +4348,122 @@ void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) {
codegen_->GenerateNullCheck(instruction);
}
+static LoadOperandType GetLoadOperandType(Primitive::Type type) {
+ switch (type) {
+ case Primitive::kPrimNot:
+ return kLoadWord;
+ case Primitive::kPrimBoolean:
+ return kLoadUnsignedByte;
+ case Primitive::kPrimByte:
+ return kLoadSignedByte;
+ case Primitive::kPrimChar:
+ return kLoadUnsignedHalfword;
+ case Primitive::kPrimShort:
+ return kLoadSignedHalfword;
+ case Primitive::kPrimInt:
+ return kLoadWord;
+ case Primitive::kPrimLong:
+ return kLoadWordPair;
+ case Primitive::kPrimFloat:
+ return kLoadSWord;
+ case Primitive::kPrimDouble:
+ return kLoadDWord;
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+static StoreOperandType GetStoreOperandType(Primitive::Type type) {
+ switch (type) {
+ case Primitive::kPrimNot:
+ return kStoreWord;
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ return kStoreByte;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ return kStoreHalfword;
+ case Primitive::kPrimInt:
+ return kStoreWord;
+ case Primitive::kPrimLong:
+ return kStoreWordPair;
+ case Primitive::kPrimFloat:
+ return kStoreSWord;
+ case Primitive::kPrimDouble:
+ return kStoreDWord;
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+void CodeGeneratorARM::LoadFromShiftedRegOffset(Primitive::Type type,
+ Location out_loc,
+ Register base,
+ Register reg_offset,
+ Condition cond) {
+ uint32_t shift_count = Primitive::ComponentSizeShift(type);
+ Address mem_address(base, reg_offset, Shift::LSL, shift_count);
+
+ switch (type) {
+ case Primitive::kPrimByte:
+ __ ldrsb(out_loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimBoolean:
+ __ ldrb(out_loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimShort:
+ __ ldrsh(out_loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimChar:
+ __ ldrh(out_loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ __ ldr(out_loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+void CodeGeneratorARM::StoreToShiftedRegOffset(Primitive::Type type,
+ Location loc,
+ Register base,
+ Register reg_offset,
+ Condition cond) {
+ uint32_t shift_count = Primitive::ComponentSizeShift(type);
+ Address mem_address(base, reg_offset, Shift::LSL, shift_count);
+
+ switch (type) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ __ strb(loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ __ strh(loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ __ str(loc.AsRegister<Register>(), mem_address, cond);
+ break;
+ // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
@@ -4298,70 +4498,40 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
Location index = locations->InAt(1);
Location out_loc = locations->Out();
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
-
Primitive::Type type = instruction->GetType();
- switch (type) {
- case Primitive::kPrimBoolean: {
- Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
- } else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
- __ LoadFromOffset(kLoadUnsignedByte, out, IP, data_offset);
- }
- break;
- }
-
- case Primitive::kPrimByte: {
- Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
- } else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
- __ LoadFromOffset(kLoadSignedByte, out, IP, data_offset);
- }
- break;
- }
-
- case Primitive::kPrimShort: {
- Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
- } else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
- __ LoadFromOffset(kLoadSignedHalfword, out, IP, data_offset);
- }
- break;
- }
-
- case Primitive::kPrimChar: {
- Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
- } else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
- __ LoadFromOffset(kLoadUnsignedHalfword, out, IP, data_offset);
- }
- break;
- }
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
case Primitive::kPrimInt: {
- Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadFromOffset(kLoadWord, out, obj, offset);
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
+
+ LoadOperandType load_type = GetLoadOperandType(type);
+ __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
} else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+ Register temp = IP;
+
+ if (has_intermediate_address) {
+ // We do not need to compute the intermediate address from the array: the
+ // input instruction has done it already. See the comment in
+ // `TryExtractArrayAccessAddress()`.
+ if (kIsDebugBuild) {
+ HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+ DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+ }
+ temp = obj;
+ } else {
+ __ add(temp, obj, ShifterOperand(data_offset));
+ }
+ codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
}
break;
}
@@ -4390,8 +4560,22 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
// reference, if heap poisoning is enabled).
codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
} else {
- __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+ Register temp = IP;
+
+ if (has_intermediate_address) {
+ // We do not need to compute the intermediate address from the array: the
+ // input instruction has done it already. See the comment in
+ // `TryExtractArrayAccessAddress()`.
+ if (kIsDebugBuild) {
+ HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+ DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+ }
+ temp = obj;
+ } else {
+ __ add(temp, obj, ShifterOperand(data_offset));
+ }
+ codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
+
codegen_->MaybeRecordImplicitNullCheck(instruction);
// If read barriers are enabled, emit read barriers other than
// Baker's using a slow path (and also unpoison the loaded
@@ -4490,54 +4674,68 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ uint32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
+ Location value_loc = locations->InAt(2);
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
switch (value_type) {
case Primitive::kPrimBoolean:
- case Primitive::kPrimByte: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
- Register value = locations->InAt(2).AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ StoreToOffset(kStoreByte, value, array, offset);
- } else {
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>()));
- __ StoreToOffset(kStoreByte, value, IP, data_offset);
- }
- break;
- }
-
+ case Primitive::kPrimByte:
case Primitive::kPrimShort:
- case Primitive::kPrimChar: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
- Register value = locations->InAt(2).AsRegister<Register>();
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt: {
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ StoreToOffset(kStoreHalfword, value, array, offset);
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ uint32_t full_offset =
+ data_offset + (const_index << Primitive::ComponentSizeShift(value_type));
+ StoreOperandType store_type = GetStoreOperandType(value_type);
+ __ StoreToOffset(store_type, value_loc.AsRegister<Register>(), array, full_offset);
} else {
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
- __ StoreToOffset(kStoreHalfword, value, IP, data_offset);
+ Register temp = IP;
+
+ if (has_intermediate_address) {
+ // We do not need to compute the intermediate address from the array: the
+ // input instruction has done it already. See the comment in
+ // `TryExtractArrayAccessAddress()`.
+ if (kIsDebugBuild) {
+ HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+ DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == data_offset);
+ }
+ temp = array;
+ } else {
+ __ add(temp, array, ShifterOperand(data_offset));
+ }
+ codegen_->StoreToShiftedRegOffset(value_type,
+ value_loc,
+ temp,
+ index.AsRegister<Register>());
}
break;
}
case Primitive::kPrimNot: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Location value_loc = locations->InAt(2);
Register value = value_loc.AsRegister<Register>();
- Register source = value;
+ // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
+ // See the comment in instruction_simplifier_shared.cc.
+ DCHECK(!has_intermediate_address);
if (instruction->InputAt(2)->IsNullConstant()) {
// Just setting null.
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreToOffset(kStoreWord, source, array, offset);
+ __ StoreToOffset(kStoreWord, value, array, offset);
} else {
DCHECK(index.IsRegister()) << index;
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ StoreToOffset(kStoreWord, source, IP, data_offset);
+ __ add(IP, array, ShifterOperand(data_offset));
+ codegen_->StoreToShiftedRegOffset(value_type,
+ value_loc,
+ IP,
+ index.AsRegister<Register>());
}
codegen_->MaybeRecordImplicitNullCheck(instruction);
DCHECK(!needs_write_barrier);
@@ -4566,8 +4764,11 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
__ StoreToOffset(kStoreWord, value, array, offset);
} else {
DCHECK(index.IsRegister()) << index;
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ StoreToOffset(kStoreWord, value, IP, data_offset);
+ __ add(IP, array, ShifterOperand(data_offset));
+ codegen_->StoreToShiftedRegOffset(value_type,
+ value_loc,
+ IP,
+ index.AsRegister<Register>());
}
codegen_->MaybeRecordImplicitNullCheck(instruction);
__ b(&done);
@@ -4634,6 +4835,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
}
}
+ Register source = value;
if (kPoisonHeapReferences) {
// Note that in the case where `value` is a null reference,
// we do not enter this block, as a null reference does not
@@ -4650,8 +4852,12 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
__ StoreToOffset(kStoreWord, source, array, offset);
} else {
DCHECK(index.IsRegister()) << index;
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ StoreToOffset(kStoreWord, source, IP, data_offset);
+
+ __ add(IP, array, ShifterOperand(data_offset));
+ codegen_->StoreToShiftedRegOffset(value_type,
+ Location::RegisterLocation(source),
+ IP,
+ index.AsRegister<Register>());
}
if (!may_need_runtime_call_for_type_check) {
@@ -4671,23 +4877,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
break;
}
- case Primitive::kPrimInt: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Register value = locations->InAt(2).AsRegister<Register>();
- if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreToOffset(kStoreWord, value, array, offset);
- } else {
- DCHECK(index.IsRegister()) << index;
- __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ StoreToOffset(kStoreWord, value, IP, data_offset);
- }
- break;
- }
-
case Primitive::kPrimLong: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
Location value = locations->InAt(2);
if (index.IsConstant()) {
size_t offset =
@@ -4701,7 +4891,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
}
case Primitive::kPrimFloat: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
Location value = locations->InAt(2);
DCHECK(value.IsFpuRegister());
if (index.IsConstant()) {
@@ -4715,7 +4904,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
}
case Primitive::kPrimDouble: {
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
Location value = locations->InAt(2);
DCHECK(value.IsFpuRegisterPair());
if (index.IsConstant()) {
@@ -4756,6 +4944,37 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
+void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!kEmitCompilerReadBarrier);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location out = locations->Out();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!kEmitCompilerReadBarrier);
+
+ if (second.IsRegister()) {
+ __ add(out.AsRegister<Register>(),
+ first.AsRegister<Register>(),
+ ShifterOperand(second.AsRegister<Register>()));
+ } else {
+ __ AddConstant(out.AsRegister<Register>(),
+ first.AsRegister<Register>(),
+ second.GetConstant()->AsIntConstant()->GetValue());
+ }
+}
+
void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
? LocationSummary::kCallOnSlowPath
@@ -4790,7 +5009,7 @@ void CodeGeneratorARM::MarkGCCard(Register temp,
if (can_be_null) {
__ CompareAndBranchIfZero(value, &is_null);
}
- __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kArmWordSize>().Int32Value());
+ __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
__ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
__ strb(card, Address(card, temp));
if (can_be_null) {
@@ -4841,7 +5060,7 @@ void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instructio
}
__ LoadFromOffset(
- kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value());
+ kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
if (successor == nullptr) {
__ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
@@ -5370,59 +5589,19 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
__ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
- // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives
- // a 128B range. To try and reduce the number of literals if we load multiple strings,
- // simply split the dex cache address to a 128B aligned base loaded from a literal
- // and the remaining offset embedded in the load.
- static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
- DCHECK_ALIGNED(load->GetAddress(), 4u);
- constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2;
- uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits);
- uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
- __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
- // /* GcRoot<mirror::String> */ out = *(base_address + offset)
- GenerateGcRootFieldLoad(load, out_loc, out, offset);
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- Register base_reg = locations->InAt(0).AsRegister<Register>();
- HArmDexCacheArraysBase* base = load->InputAt(0)->AsArmDexCacheArraysBase();
- int32_t offset = load->GetDexCacheElementOffset() - base->GetElementOffset();
- // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset)
- GenerateGcRootFieldLoad(load, out_loc, base_reg, offset);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- Register current_method = locations->InAt(0).AsRegister<Register>();
-
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- break;
- }
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
- codegen_->AddSlowPath(slow_path);
- __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
+ codegen_->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
static int32_t GetExceptionTlsOffset() {
- return Thread::ExceptionOffset<kArmWordSize>().Int32Value();
+ return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
}
void LocationsBuilderARM::VisitLoadException(HLoadException* load) {
@@ -5447,7 +5626,7 @@ void InstructionCodeGeneratorARM::VisitClearException(HClearException* clear ATT
void LocationsBuilderARM::VisitThrow(HThrow* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
@@ -5848,7 +6027,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
@@ -6011,6 +6190,34 @@ void InstructionCodeGeneratorARM::GenerateEorConst(Register out, Register first,
__ eor(out, first, ShifterOperand(value));
}
+void InstructionCodeGeneratorARM::GenerateAddLongConst(Location out,
+ Location first,
+ uint64_t value) {
+ Register out_low = out.AsRegisterPairLow<Register>();
+ Register out_high = out.AsRegisterPairHigh<Register>();
+ Register first_low = first.AsRegisterPairLow<Register>();
+ Register first_high = first.AsRegisterPairHigh<Register>();
+ uint32_t value_low = Low32Bits(value);
+ uint32_t value_high = High32Bits(value);
+ if (value_low == 0u) {
+ if (out_low != first_low) {
+ __ mov(out_low, ShifterOperand(first_low));
+ }
+ __ AddConstant(out_high, first_high, value_high);
+ return;
+ }
+ __ AddConstantSetFlags(out_low, first_low, value_low);
+ ShifterOperand so;
+ if (__ ShifterOperandCanHold(out_high, first_high, ADC, value_high, kCcDontCare, &so)) {
+ __ adc(out_high, first_high, so);
+ } else if (__ ShifterOperandCanHold(out_low, first_low, SBC, ~value_high, kCcDontCare, &so)) {
+ __ sbc(out_high, first_high, so);
+ } else {
+ LOG(FATAL) << "Unexpected constant " << value_high;
+ UNREACHABLE();
+ }
+}
+
void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location first = locations->InAt(0);
@@ -6172,12 +6379,12 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
// Slow path used to mark the GC root `root`.
SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root);
codegen_->AddSlowPath(slow_path);
// IP = Thread::Current()->GetIsGcMarking()
__ LoadFromOffset(
- kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
+ kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value());
__ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
} else {
@@ -6275,21 +6482,13 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// /* LockWord */ lock_word = LockWord(monitor)
static_assert(sizeof(LockWord) == sizeof(int32_t),
"art::LockWord and int32_t have different sizes.");
- // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
- __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift);
- __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
- static_assert(
- LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
- "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
- // Introduce a dependency on the high bits of rb_state, which shall
- // be all zeroes, to prevent load-load reordering, and without using
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
// a memory barrier (which would be more expensive).
- // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0
- __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
- // obj is unchanged by this operation, but its value now depends on
- // IP, which depends on temp_reg.
- __ add(obj, obj, ShifterOperand(IP));
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp_reg`.
+ __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
// The actual reference load.
if (index.IsValid()) {
@@ -6321,13 +6520,19 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Slow path used to mark the object `ref` when it is gray.
SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref, ref);
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
AddSlowPath(slow_path);
// if (rb_state == ReadBarrier::gray_ptr_)
// ref = ReadBarrier::Mark(ref);
- __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_));
- __ b(slow_path->GetEntryLabel(), EQ);
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
+ __ b(slow_path->GetEntryLabel(), CS); // Carry flag is the last bit shifted out by LSRS.
__ Bind(slow_path->GetExitLabel());
}
@@ -6539,7 +6744,7 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
// LR = callee_method->entry_point_from_quick_compiled_code_
__ LoadFromOffset(
kLoadWord, LR, callee_method.AsRegister<Register>(),
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value());
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
// LR()
__ blx(LR);
break;
@@ -6573,7 +6778,7 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp
__ MaybeUnpoisonHeapReference(temp);
// temp = temp->GetMethodAt(method_offset);
uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kArmWordSize).Int32Value();
+ kArmPointerSize).Int32Value();
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
// LR = temp->GetEntryPoint();
__ LoadFromOffset(kLoadWord, LR, temp, entry_point);
@@ -6951,18 +7156,25 @@ void LocationsBuilderARM::VisitClassTableGet(HClassTableGet* instruction) {
void InstructionCodeGeneratorARM::VisitClassTableGet(HClassTableGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- uint32_t method_offset = 0;
if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
- method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+ uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
instruction->GetIndex(), kArmPointerSize).SizeValue();
+ __ LoadFromOffset(kLoadWord,
+ locations->Out().AsRegister<Register>(),
+ locations->InAt(0).AsRegister<Register>(),
+ method_offset);
} else {
- method_offset = mirror::Class::EmbeddedImTableEntryOffset(
- instruction->GetIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ instruction->GetIndex(), kArmPointerSize));
+ __ LoadFromOffset(kLoadWord,
+ locations->Out().AsRegister<Register>(),
+ locations->InAt(0).AsRegister<Register>(),
+ mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+ __ LoadFromOffset(kLoadWord,
+ locations->Out().AsRegister<Register>(),
+ locations->Out().AsRegister<Register>(),
+ method_offset);
}
- __ LoadFromOffset(kLoadWord,
- locations->Out().AsRegister<Register>(),
- locations->InAt(0).AsRegister<Register>(),
- method_offset);
}
#undef __
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 477c4f18c1..5d9b2dce1c 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -17,13 +17,13 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_
+#include "base/enums.h"
#include "code_generator.h"
-#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
#include "nodes.h"
+#include "string_reference.h"
#include "parallel_move_resolver.h"
#include "utils/arm/assembler_thumb2.h"
-#include "utils/string_reference.h"
#include "utils/type_reference.h"
namespace art {
@@ -32,7 +32,7 @@ namespace arm {
class CodeGeneratorARM;
// Use a local definition to prevent copying mistakes.
-static constexpr size_t kArmWordSize = kArmPointerSize;
+static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
@@ -180,9 +180,10 @@ class LocationsBuilderARM : public HGraphVisitor {
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ Location ArithmeticZeroOrFpuRegister(HInstruction* input);
Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode);
bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode);
- bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode);
+ bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare);
CodeGeneratorARM* const codegen_;
InvokeDexCallingConventionVisitorARM parameter_visitor_;
@@ -219,6 +220,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
void GenerateAndConst(Register out, Register first, uint32_t value);
void GenerateOrrConst(Register out, Register first, uint32_t value);
void GenerateEorConst(Register out, Register first, uint32_t value);
+ void GenerateAddLongConst(Location out, Location first, uint64_t value);
void HandleBitwiseOperation(HBinaryOperation* operation);
void HandleCondition(HCondition* condition);
void HandleIntegerRotate(LocationSummary* locations);
@@ -281,6 +283,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
void GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target,
Label* false_target);
+ void GenerateVcmp(HInstruction* instruction);
void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -365,6 +368,24 @@ class CodeGeneratorARM : public CodeGenerator {
// Helper method to move a 64bits value between two locations.
void Move64(Location destination, Location source);
+ void LoadOrStoreToOffset(Primitive::Type type,
+ Location loc,
+ Register base,
+ int32_t offset,
+ bool is_load,
+ Condition cond = AL);
+
+ void LoadFromShiftedRegOffset(Primitive::Type type,
+ Location out_loc,
+ Register base,
+ Register reg_offset,
+ Condition cond = AL);
+ void StoreToShiftedRegOffset(Primitive::Type type,
+ Location out_loc,
+ Register base,
+ Register reg_offset,
+ Condition cond = AL);
+
// Generate code to invoke a runtime entry point.
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
@@ -376,6 +397,12 @@ class CodeGeneratorARM : public CodeGenerator {
uint32_t dex_pc,
SlowPathCode* slow_path);
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
// Emit a write barrier.
void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index fc2c2c34aa..122c174eae 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -33,8 +33,7 @@
#include "utils/assembler.h"
#include "utils/stack_checks.h"
-
-using namespace vixl; // NOLINT(build/namespaces)
+using namespace vixl::aarch64; // NOLINT(build/namespaces)
#ifdef __
#error "ARM64 Codegen VIXL macro-assembler macro already defined."
@@ -132,9 +131,9 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type retur
return ARM64ReturnLocation(return_type);
}
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
// Calculate memory accessing operand for save/restore live registers.
static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
@@ -147,20 +146,20 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
codegen->GetNumberOfFloatingPointRegisters()));
CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize,
- register_set->GetCoreRegisters() & (~callee_saved_core_registers.list()));
+ register_set->GetCoreRegisters() & (~callee_saved_core_registers.GetList()));
CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize,
- register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.list()));
+ register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.GetList()));
MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
Register base = masm->StackPointer();
- int64_t core_spill_size = core_list.TotalSizeInBytes();
- int64_t fp_spill_size = fp_list.TotalSizeInBytes();
+ int64_t core_spill_size = core_list.GetTotalSizeInBytes();
+ int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
int64_t reg_size = kXRegSizeInBytes;
int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
uint32_t ls_access_size = WhichPowerOf2(reg_size);
- if (((core_list.Count() > 1) || (fp_list.Count() > 1)) &&
+ if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
!masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
// If the offset does not fit in the instruction's immediate field, use an alternate register
// to compute the base address(float point registers spill base address).
@@ -399,11 +398,9 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
@@ -411,7 +408,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
}
}
- vixl::Label* GetReturnLabel() {
+ vixl::aarch64::Label* GetReturnLabel() {
DCHECK(successor_ == nullptr);
return &return_label_;
}
@@ -427,7 +424,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
HBasicBlock* const successor_;
// If `successor_` is null, the label to branch to after the suspend check.
- vixl::Label return_label_;
+ vixl::aarch64::Label return_label_;
DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
};
@@ -463,7 +460,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
if (instruction_->IsInstanceOf()) {
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
- CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
+ CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t,
const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
@@ -567,9 +564,9 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
__ Bind(&table_start_);
const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
for (uint32_t i = 0; i < num_entries; i++) {
- vixl::Label* target_label = codegen->GetLabelOf(successors[i]);
+ vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
DCHECK(target_label->IsBound());
- ptrdiff_t jump_offset = target_label->location() - table_start_.location();
+ ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
Literal<int32_t> literal(jump_offset);
@@ -580,8 +577,8 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
// Slow path marking an object during a read barrier.
class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
public:
- ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj)
- : SlowPathCodeARM64(instruction), out_(out), obj_(obj) {
+ ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj)
+ : SlowPathCodeARM64(instruction), obj_(obj) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -589,9 +586,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
- Primitive::Type type = Primitive::kPrimNot;
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg()));
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
@@ -599,30 +595,45 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, locations);
-
- InvokeRuntimeCallingConvention calling_convention;
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type);
- arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
- instruction_,
- instruction_->GetDexPc(),
- this);
- CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
- arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
-
- RestoreLiveRegisters(codegen, locations);
+ DCHECK_NE(obj_.reg(), LR);
+ DCHECK_NE(obj_.reg(), WSP);
+ DCHECK_NE(obj_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(obj_.reg(), IP0);
+ DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in W0):
+ //
+ // W0 <- obj
+ // W0 <- ReadBarrierMark(W0)
+ // obj <- W0
+ //
+ // we just use rX (the register holding `obj`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(obj_.reg());
+ // This runtime call does not require a stack map.
+ arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ B(GetExitLabel());
}
private:
- const Location out_;
const Location obj_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
@@ -668,14 +679,12 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
DCHECK(!(instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()));
+ instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
__ Bind(GetEntryLabel());
@@ -744,10 +753,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
<< instruction_->AsInvoke()->GetIntrinsic();
DCHECK_EQ(offset_, 0U);
- DCHECK(index_.IsRegisterPair());
- // UnsafeGet's offset location is a register pair, the low
- // part contains the correct offset.
- index = index_.ToLow();
+ DCHECK(index_.IsRegister());
}
}
@@ -790,8 +796,8 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
private:
Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
- size_t ref = static_cast<int>(XRegisterFrom(ref_).code());
- size_t obj = static_cast<int>(XRegisterFrom(obj_).code());
+ size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
+ size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
return Register(VIXLRegCodeFromART(i), kXRegSize);
@@ -909,8 +915,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
kNumberOfAllocatableRegisters,
kNumberOfAllocatableFPRegisters,
kNumberOfAllocatableRegisterPairs,
- callee_saved_core_registers.list(),
- callee_saved_fp_registers.list(),
+ callee_saved_core_registers.GetList(),
+ callee_saved_fp_registers.GetList(),
compiler_options,
stats),
block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -1060,17 +1066,17 @@ void CodeGeneratorARM64::GenerateFrameExit() {
GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
}
-vixl::CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
+CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
- return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
- core_spill_mask_);
+ return CPURegList(CPURegister::kRegister, kXRegSize,
+ core_spill_mask_);
}
-vixl::CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
+CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
GetNumberOfFloatingPointRegisters()));
- return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize,
- fpu_spill_mask_);
+ return CPURegList(CPURegister::kFPRegister, kDRegSize,
+ fpu_spill_mask_);
}
void CodeGeneratorARM64::Bind(HBasicBlock* block) {
@@ -1094,11 +1100,11 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_
UseScratchRegisterScope temps(GetVIXLAssembler());
Register card = temps.AcquireX();
Register temp = temps.AcquireW(); // Index within the CardTable - 32bit.
- vixl::Label done;
+ vixl::aarch64::Label done;
if (value_can_be_null) {
__ Cbz(value, &done);
}
- __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64WordSize>().Int32Value()));
+ __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
__ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
__ Strb(card, MemOperand(card, temp.X()));
if (value_can_be_null) {
@@ -1119,12 +1125,12 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const {
CPURegList reserved_core_registers = vixl_reserved_core_registers;
reserved_core_registers.Combine(runtime_reserved_core_registers);
while (!reserved_core_registers.IsEmpty()) {
- blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true;
+ blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
}
CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
while (!reserved_fp_registers.IsEmpty()) {
- blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
+ blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
}
if (GetGraph()->IsDebuggable()) {
@@ -1133,7 +1139,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const {
// now, just block them.
CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
while (!reserved_fp_registers_debuggable.IsEmpty()) {
- blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true;
+ blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
}
}
}
@@ -1277,17 +1283,21 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
UseScratchRegisterScope temps(GetVIXLAssembler());
HConstant* src_cst = source.GetConstant();
CPURegister temp;
- if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) {
- temp = temps.AcquireW();
- } else if (src_cst->IsLongConstant()) {
- temp = temps.AcquireX();
- } else if (src_cst->IsFloatConstant()) {
- temp = temps.AcquireS();
+ if (src_cst->IsZeroBitPattern()) {
+ temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) ? xzr : wzr;
} else {
- DCHECK(src_cst->IsDoubleConstant());
- temp = temps.AcquireD();
+ if (src_cst->IsIntConstant()) {
+ temp = temps.AcquireW();
+ } else if (src_cst->IsLongConstant()) {
+ temp = temps.AcquireX();
+ } else if (src_cst->IsFloatConstant()) {
+ temp = temps.AcquireS();
+ } else {
+ DCHECK(src_cst->IsDoubleConstant());
+ temp = temps.AcquireD();
+ }
+ MoveConstant(temp, src_cst);
}
- MoveConstant(temp, src_cst);
__ Str(temp, StackOperandFrom(destination));
} else {
DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
@@ -1344,7 +1354,7 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
DCHECK(!src.IsPostIndex());
// TODO(vixl): Let the MacroAssembler handle MemOperand.
- __ Add(temp_base, src.base(), OperandFromMemOperand(src));
+ __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
MemOperand base = MemOperand(temp_base);
switch (type) {
case Primitive::kPrimBoolean:
@@ -1436,7 +1446,7 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
// TODO(vixl): Let the MacroAssembler handle this.
Operand op = OperandFromMemOperand(dst);
- __ Add(temp_base, dst.base(), op);
+ __ Add(temp_base, dst.GetBaseRegister(), op);
MemOperand base = MemOperand(temp_base);
switch (type) {
case Primitive::kPrimBoolean:
@@ -1472,7 +1482,7 @@ void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kArm64WordSize>(entrypoint).Int32Value(),
+ InvokeRuntime(GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value(),
instruction,
dex_pc,
slow_path);
@@ -1489,8 +1499,17 @@ void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset,
RecordPcInfo(instruction, dex_pc, slow_path);
}
+void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ BlockPoolsScope block_pools(GetVIXLAssembler());
+ __ Ldr(lr, MemOperand(tr, entry_point_offset));
+ __ Blr(lr);
+}
+
void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
- vixl::Register class_reg) {
+ Register class_reg) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireW();
size_t status_offset = mirror::Class::StatusOffset().SizeValue();
@@ -1546,7 +1565,7 @@ void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruct
UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
Register temp = temps.AcquireW();
- __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64WordSize>().SizeValue()));
+ __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
if (successor == nullptr) {
__ Cbnz(temp, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
@@ -1755,7 +1774,7 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
__ Sub(dst, lhs, rhs);
} else if (instr->IsRor()) {
if (rhs.IsImmediate()) {
- uint32_t shift = rhs.immediate() & (lhs.SizeInBits() - 1);
+ uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
__ Ror(dst, lhs, shift);
} else {
// Ensure shift distance is in the same size register as the result. If
@@ -1818,7 +1837,7 @@ void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
Register lhs = InputRegisterAt(instr, 0);
Operand rhs = InputOperandAt(instr, 1);
if (rhs.IsImmediate()) {
- uint32_t shift_value = rhs.immediate() &
+ uint32_t shift_value = rhs.GetImmediate() &
(type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
if (instr->IsShl()) {
__ Lsl(dst, lhs, shift_value);
@@ -1828,7 +1847,7 @@ void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
__ Lsr(dst, lhs, shift_value);
}
} else {
- Register rhs_reg = dst.IsX() ? rhs.reg().X() : rhs.reg().W();
+ Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
if (instr->IsShl()) {
__ Lsl(dst, lhs, rhs_reg);
@@ -1965,9 +1984,8 @@ void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp(
}
}
-void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -1976,10 +1994,9 @@ void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddr
locations->SetOut(Location::RequiresRegister());
}
-void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
- HArm64IntermediateAddress* instruction) {
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+void InstructionCodeGeneratorARM64::VisitIntermediateAddress(
+ HIntermediateAddress* instruction) {
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
__ Add(OutputRegister(instruction),
InputRegisterAt(instruction, 0),
@@ -2014,13 +2031,14 @@ void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate*
if (instr->GetType() == Primitive::kPrimLong &&
codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
- vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize;
+ vixl::aarch64::Instruction* prev =
+ masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
if (prev->IsLoadOrStore()) {
// Make sure we emit only exactly one nop.
- vixl::CodeBufferCheckScope scope(masm,
- vixl::kInstructionSize,
- vixl::CodeBufferCheckScope::kCheck,
- vixl::CodeBufferCheckScope::kExactSize);
+ vixl::aarch64::CodeBufferCheckScope scope(masm,
+ kInstructionSize,
+ vixl::aarch64::CodeBufferCheckScope::kCheck,
+ vixl::aarch64::CodeBufferCheckScope::kExactSize);
__ nop();
}
}
@@ -2078,9 +2096,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Object ArrayGet with Baker's read barrier case.
Register temp = temps.AcquireW();
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
- DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!instruction->GetArray()->IsIntermediateAddress());
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -2093,15 +2110,15 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
source = HeapOperand(obj, offset);
} else {
Register temp = temps.AcquireSameSizeAs(obj);
- if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+ if (instruction->GetArray()->IsIntermediateAddress()) {
// The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+ // HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
// We do not need to compute the intermediate address from the array: the
// input instruction has done it already. See the comment in
- // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+ // `TryExtractArrayAccessAddress()`.
if (kIsDebugBuild) {
- HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+ HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
}
temp = obj;
@@ -2185,15 +2202,15 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
} else {
UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireSameSizeAs(array);
- if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+ if (instruction->GetArray()->IsIntermediateAddress()) {
// The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+ // HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
// We do not need to compute the intermediate address from the array: the
// input instruction has done it already. See the comment in
- // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+ // `TryExtractArrayAccessAddress()`.
if (kIsDebugBuild) {
- HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+ HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
}
temp = array;
@@ -2209,8 +2226,8 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
DCHECK(needs_write_barrier);
- DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
- vixl::Label done;
+ DCHECK(!instruction->GetArray()->IsIntermediateAddress());
+ vixl::aarch64::Label done;
SlowPathCodeARM64* slow_path = nullptr;
{
// We use a block to end the scratch scope before the write barrier, thus
@@ -2235,7 +2252,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
codegen_->AddSlowPath(slow_path);
if (instruction->GetValueCanBeNull()) {
- vixl::Label non_zero;
+ vixl::aarch64::Label non_zero;
__ Cbnz(Register(value), &non_zero);
if (!index.IsConstant()) {
__ Add(temp, array, offset);
@@ -2289,7 +2306,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
__ Cmp(temp, temp2);
if (instruction->StaticTypeOfArrayIsObjectArray()) {
- vixl::Label do_put;
+ vixl::aarch64::Label do_put;
__ B(eq, &do_put);
// If heap poisoning is enabled, the `temp` reference has
// not been unpoisoned yet; unpoison it now.
@@ -2822,11 +2839,11 @@ void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary)
void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
- vixl::Label* true_target,
- vixl::Label* false_target) {
+ vixl::aarch64::Label* true_target,
+ vixl::aarch64::Label* false_target) {
// FP branching requires both targets to be explicit. If either of the targets
// is nullptr (fallthrough) use and bind `fallthrough_target` instead.
- vixl::Label fallthrough_target;
+ vixl::aarch64::Label fallthrough_target;
HInstruction* cond = instruction->InputAt(condition_input_index);
if (true_target == nullptr && false_target == nullptr) {
@@ -2884,7 +2901,7 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
Operand rhs = InputOperandAt(condition, 1);
Condition arm64_cond;
- vixl::Label* non_fallthrough_target;
+ vixl::aarch64::Label* non_fallthrough_target;
if (true_target == nullptr) {
arm64_cond = ARM64Condition(condition->GetOppositeCondition());
non_fallthrough_target = false_target;
@@ -2894,7 +2911,7 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
}
if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
- rhs.IsImmediate() && (rhs.immediate() == 0)) {
+ rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
switch (arm64_cond) {
case eq:
__ Cbz(lhs, non_fallthrough_target);
@@ -2943,10 +2960,14 @@ void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
- vixl::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
- nullptr : codegen_->GetLabelOf(true_successor);
- vixl::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
- nullptr : codegen_->GetLabelOf(false_successor);
+ vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
+ true_target = nullptr;
+ }
+ vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
+ false_target = nullptr;
+ }
GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
}
@@ -3130,7 +3151,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- vixl::Label done, zero;
+ vixl::aarch64::Label done, zero;
SlowPathCodeARM64* slow_path = nullptr;
// Return 0 if `obj` is null.
@@ -3155,7 +3176,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck: {
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
- vixl::Label loop, success;
+ vixl::aarch64::Label loop, success;
__ Bind(&loop);
// /* HeapReference<Class> */ out = out->super_class_
GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
@@ -3172,7 +3193,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kClassHierarchyCheck: {
// Walk over the class hierarchy to find a match.
- vixl::Label loop, success;
+ vixl::aarch64::Label loop, success;
__ Bind(&loop);
__ Cmp(out, cls);
__ B(eq, &success);
@@ -3191,7 +3212,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck: {
// Do an exact check.
- vixl::Label exact_check;
+ vixl::aarch64::Label exact_check;
__ Cmp(out, cls);
__ B(eq, &exact_check);
// Otherwise, we need to check that the object's class is a non-primitive array.
@@ -3328,7 +3349,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
is_type_check_slow_path_fatal);
codegen_->AddSlowPath(type_check_slow_path);
- vixl::Label done;
+ vixl::aarch64::Label done;
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
__ Cbz(obj, &done);
@@ -3350,7 +3371,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kAbstractClassCheck: {
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
- vixl::Label loop, compare_classes;
+ vixl::aarch64::Label loop, compare_classes;
__ Bind(&loop);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
@@ -3377,7 +3398,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kClassHierarchyCheck: {
// Walk over the class hierarchy to find a match.
- vixl::Label loop;
+ vixl::aarch64::Label loop;
__ Bind(&loop);
__ Cmp(temp, cls);
__ B(eq, &done);
@@ -3402,7 +3423,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kArrayObjectCheck: {
// Do an exact check.
- vixl::Label check_non_primitive_component_type;
+ vixl::aarch64::Label check_non_primitive_component_type;
__ Cmp(temp, cls);
__ B(eq, &done);
@@ -3506,11 +3527,9 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
LocationSummary* locations = invoke->GetLocations();
Register temp = XRegisterFrom(locations->GetTemp(0));
- uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
- invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
Location receiver = locations->InAt(0);
Offset class_offset = mirror::Object::ClassOffset();
- Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
// The register ip1 is required to be used for the hidden argument in
// art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
@@ -3537,6 +3556,10 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
// intact/accessible until the end of the marking phase (the
// concurrent copying collector may not in the future).
GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
+ __ Ldr(temp,
+ MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ invoke->GetImtIndex(), kArm64PointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ Ldr(temp, MemOperand(temp, method_offset));
// lr = temp->GetEntryPoint();
@@ -3626,17 +3649,17 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
// Add ADRP with its PC-relative DexCache access patch.
const DexFile& dex_file = *invoke->GetTargetMethod().dex_file;
uint32_t element_offset = invoke->GetDexCacheArrayOffset();
- vixl::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
+ vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
{
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(adrp_label);
__ adrp(XRegisterFrom(temp), /* offset placeholder */ 0);
}
// Add LDR with its PC-relative DexCache access patch.
- vixl::Label* ldr_label =
+ vixl::aarch64::Label* ldr_label =
NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
{
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(ldr_label);
__ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), /* offset placeholder */ 0));
}
@@ -3658,7 +3681,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
// /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
__ Ldr(reg.X(),
MemOperand(method_reg.X(),
- ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value()));
+ ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value()));
// temp = temp[index_in_cache];
// Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
uint32_t index_in_cache = invoke->GetDexMethodIndex();
@@ -3673,8 +3696,8 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
break;
case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
relative_call_patches_.emplace_back(invoke->GetTargetMethod());
- vixl::Label* label = &relative_call_patches_.back().label;
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ vixl::aarch64::Label* label = &relative_call_patches_.back().label;
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(label);
__ bl(0); // Branch and link to itself. This will be overriden at link time.
break;
@@ -3690,7 +3713,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
// LR = callee_method->entry_point_from_quick_compiled_code_;
__ Ldr(lr, MemOperand(
XRegisterFrom(callee_method),
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value()));
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
// lr()
__ Blr(lr);
break;
@@ -3710,7 +3733,7 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te
size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
Offset class_offset = mirror::Object::ClassOffset();
- Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
BlockPoolsScope block_pools(GetVIXLAssembler());
@@ -3733,58 +3756,64 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te
__ Blr(lr);
}
-vixl::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(const DexFile& dex_file,
- uint32_t string_index,
- vixl::Label* adrp_label) {
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
+ const DexFile& dex_file,
+ uint32_t string_index,
+ vixl::aarch64::Label* adrp_label) {
return NewPcRelativePatch(dex_file, string_index, adrp_label, &pc_relative_string_patches_);
}
-vixl::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(const DexFile& dex_file,
- uint32_t type_index,
- vixl::Label* adrp_label) {
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
+ const DexFile& dex_file,
+ uint32_t type_index,
+ vixl::aarch64::Label* adrp_label) {
return NewPcRelativePatch(dex_file, type_index, adrp_label, &pc_relative_type_patches_);
}
-vixl::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
- uint32_t element_offset,
- vixl::Label* adrp_label) {
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
+ const DexFile& dex_file,
+ uint32_t element_offset,
+ vixl::aarch64::Label* adrp_label) {
return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
}
-vixl::Label* CodeGeneratorARM64::NewPcRelativePatch(const DexFile& dex_file,
- uint32_t offset_or_index,
- vixl::Label* adrp_label,
- ArenaDeque<PcRelativePatchInfo>* patches) {
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
+ const DexFile& dex_file,
+ uint32_t offset_or_index,
+ vixl::aarch64::Label* adrp_label,
+ ArenaDeque<PcRelativePatchInfo>* patches) {
// Add a patch entry and return the label.
patches->emplace_back(dex_file, offset_or_index);
PcRelativePatchInfo* info = &patches->back();
- vixl::Label* label = &info->label;
+ vixl::aarch64::Label* label = &info->label;
// If adrp_label is null, this is the ADRP patch and needs to point to its own label.
info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
return label;
}
-vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
const DexFile& dex_file, uint32_t string_index) {
return boot_image_string_patches_.GetOrCreate(
StringReference(&dex_file, string_index),
[this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
}
-vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
const DexFile& dex_file, uint32_t type_index) {
return boot_image_type_patches_.GetOrCreate(
TypeReference(&dex_file, type_index),
[this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
}
-vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(uint64_t address) {
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
+ uint64_t address) {
bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
}
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddressLiteral(uint64_t address) {
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddressLiteral(
+ uint64_t address) {
return DeduplicateUint64Literal(address);
}
@@ -3803,76 +3832,76 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
linker_patches->reserve(size);
for (const auto& entry : method_patches_) {
const MethodReference& target_method = entry.first;
- vixl::Literal<uint64_t>* literal = entry.second;
- linker_patches->push_back(LinkerPatch::MethodPatch(literal->offset(),
+ vixl::aarch64::Literal<uint64_t>* literal = entry.second;
+ linker_patches->push_back(LinkerPatch::MethodPatch(literal->GetOffset(),
target_method.dex_file,
target_method.dex_method_index));
}
for (const auto& entry : call_patches_) {
const MethodReference& target_method = entry.first;
- vixl::Literal<uint64_t>* literal = entry.second;
- linker_patches->push_back(LinkerPatch::CodePatch(literal->offset(),
+ vixl::aarch64::Literal<uint64_t>* literal = entry.second;
+ linker_patches->push_back(LinkerPatch::CodePatch(literal->GetOffset(),
target_method.dex_file,
target_method.dex_method_index));
}
- for (const MethodPatchInfo<vixl::Label>& info : relative_call_patches_) {
- linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location(),
+ for (const MethodPatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) {
+ linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.GetLocation(),
info.target_method.dex_file,
info.target_method.dex_method_index));
}
for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
- linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location(),
+ linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
&info.target_dex_file,
- info.pc_insn_label->location(),
+ info.pc_insn_label->GetLocation(),
info.offset_or_index));
}
for (const auto& entry : boot_image_string_patches_) {
const StringReference& target_string = entry.first;
- vixl::Literal<uint32_t>* literal = entry.second;
- linker_patches->push_back(LinkerPatch::StringPatch(literal->offset(),
+ vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+ linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
target_string.dex_file,
target_string.string_index));
}
for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
- linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.location(),
+ linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.GetLocation(),
&info.target_dex_file,
- info.pc_insn_label->location(),
+ info.pc_insn_label->GetLocation(),
info.offset_or_index));
}
for (const auto& entry : boot_image_type_patches_) {
const TypeReference& target_type = entry.first;
- vixl::Literal<uint32_t>* literal = entry.second;
- linker_patches->push_back(LinkerPatch::TypePatch(literal->offset(),
+ vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+ linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(),
target_type.dex_file,
target_type.type_index));
}
for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
- linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.location(),
+ linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.GetLocation(),
&info.target_dex_file,
- info.pc_insn_label->location(),
+ info.pc_insn_label->GetLocation(),
info.offset_or_index));
}
for (const auto& entry : boot_image_address_patches_) {
DCHECK(GetCompilerOptions().GetIncludePatchInformation());
- vixl::Literal<uint32_t>* literal = entry.second;
- linker_patches->push_back(LinkerPatch::RecordPosition(literal->offset()));
+ vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+ linker_patches->push_back(LinkerPatch::RecordPosition(literal->GetOffset()));
}
}
-vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
Uint32ToLiteralMap* map) {
return map->GetOrCreate(
value,
[this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
}
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
return uint64_literals_.GetOrCreate(
value,
[this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
}
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
MethodReference target_method,
MethodToLiteralMap* map) {
return map->GetOrCreate(
@@ -3880,12 +3909,12 @@ vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
[this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); });
}
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral(
MethodReference target_method) {
return DeduplicateMethodLiteral(target_method, &method_patches_);
}
-vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
+vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
MethodReference target_method) {
return DeduplicateMethodLiteral(target_method, &call_patches_);
}
@@ -3959,7 +3988,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
CodeGenerator::CreateLoadClassLocationSummary(
cls,
LocationFrom(calling_convention.GetRegisterAt(0)),
- LocationFrom(vixl::x0),
+ LocationFrom(vixl::aarch64::x0),
/* code_generator_supports_read_barrier */ true);
return;
}
@@ -4011,16 +4040,17 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
// Add ADRP with its PC-relative type patch.
const DexFile& dex_file = cls->GetDexFile();
uint32_t type_index = cls->GetTypeIndex();
- vixl::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
+ vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
{
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(adrp_label);
__ adrp(out.X(), /* offset placeholder */ 0);
}
// Add ADD with its PC-relative type patch.
- vixl::Label* add_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
+ vixl::aarch64::Label* add_label =
+ codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
{
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(add_label);
__ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
}
@@ -4053,14 +4083,15 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
// Add ADRP with its PC-relative DexCache access patch.
const DexFile& dex_file = cls->GetDexFile();
uint32_t element_offset = cls->GetDexCacheElementOffset();
- vixl::Label* adrp_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
+ vixl::aarch64::Label* adrp_label =
+ codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
{
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(adrp_label);
__ adrp(out.X(), /* offset placeholder */ 0);
}
// Add LDR with its PC-relative DexCache access patch.
- vixl::Label* ldr_label =
+ vixl::aarch64::Label* ldr_label =
codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
// /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
GenerateGcRootFieldLoad(cls, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
@@ -4099,7 +4130,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
}
static MemOperand GetExceptionTlsAddress() {
- return MemOperand(tr, Thread::ExceptionOffset<kArm64WordSize>().Int32Value());
+ return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
}
void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
@@ -4166,7 +4197,6 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
}
void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
- Location out_loc = load->GetLocations()->Out();
Register out = OutputRegister(load);
switch (load->GetLoadKind()) {
@@ -4180,17 +4210,17 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
// Add ADRP with its PC-relative String patch.
const DexFile& dex_file = load->GetDexFile();
uint32_t string_index = load->GetStringIndex();
- vixl::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
+ vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
{
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(adrp_label);
__ adrp(out.X(), /* offset placeholder */ 0);
}
// Add ADD with its PC-relative String patch.
- vixl::Label* add_label =
+ vixl::aarch64::Label* add_label =
codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
{
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(add_label);
__ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
}
@@ -4202,62 +4232,15 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
__ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress()));
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads
- // that gives a 16KiB range. To try and reduce the number of literals if we load
- // multiple strings, simply split the dex cache address to a 16KiB aligned base
- // loaded from a literal and the remaining offset embedded in the load.
- static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
- DCHECK_ALIGNED(load->GetAddress(), 4u);
- constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2;
- uint64_t base_address = load->GetAddress() & ~MaxInt<uint64_t>(offset_bits);
- uint32_t offset = load->GetAddress() & MaxInt<uint64_t>(offset_bits);
- __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address));
- // /* GcRoot<mirror::String> */ out = *(base_address + offset)
- GenerateGcRootFieldLoad(load, out_loc, out.X(), offset);
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- // Add ADRP with its PC-relative DexCache access patch.
- const DexFile& dex_file = load->GetDexFile();
- uint32_t element_offset = load->GetDexCacheElementOffset();
- vixl::Label* adrp_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
- {
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(adrp_label);
- __ adrp(out.X(), /* offset placeholder */ 0);
- }
- // Add LDR with its PC-relative DexCache access patch.
- vixl::Label* ldr_label =
- codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
- // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- Register current_method = InputRegisterAt(load, 0);
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- break;
- }
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
- codegen_->AddSlowPath(slow_path);
- __ Cbz(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
@@ -4271,7 +4254,7 @@ void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant AT
void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
}
@@ -4369,7 +4352,7 @@ void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
locations->SetOut(LocationFrom(x0));
@@ -4394,7 +4377,7 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
if (instruction->IsStringAlloc()) {
locations->AddTemp(LocationFrom(kArtMethodRegister));
@@ -4411,7 +4394,7 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction)
if (instruction->IsStringAlloc()) {
// String is allocated through StringFactory. Call NewEmptyString entry point.
Location temp = instruction->GetLocations()->GetTemp(0);
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+ MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
__ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
__ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
__ Blr(lr);
@@ -4450,7 +4433,7 @@ void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
}
void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
- __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::Operand(1));
+ __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
}
void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
@@ -4547,7 +4530,8 @@ void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED)
void LocationsBuilderARM64::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
LocationSummary::CallKind call_kind =
- Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+ Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
switch (type) {
@@ -4764,7 +4748,7 @@ void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction
void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
}
@@ -4882,7 +4866,7 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
// Roughly set 16 as max average assemblies generated per HIR in a graph.
- static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * vixl::kInstructionSize;
+ static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
// ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
// make sure we don't emit it if the target may run out of range.
// TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
@@ -5027,9 +5011,9 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstructi
void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
- vixl::Register obj,
+ Register obj,
uint32_t offset,
- vixl::Label* fixup_label) {
+ vixl::aarch64::Label* fixup_label) {
Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
@@ -5045,7 +5029,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
if (fixup_label == nullptr) {
__ Ldr(root_reg, MemOperand(obj, offset));
} else {
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(fixup_label);
__ ldr(root_reg, MemOperand(obj, offset));
}
@@ -5059,14 +5043,14 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
// Slow path used to mark the GC root `root`.
SlowPathCodeARM64* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root);
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
codegen_->AddSlowPath(slow_path);
MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireW();
// temp = Thread::Current()->GetIsGcMarking()
- __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64WordSize>().Int32Value()));
+ __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
__ Cbnz(temp, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
} else {
@@ -5076,7 +5060,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
if (fixup_label == nullptr) {
__ Add(root_reg.X(), obj.X(), offset);
} else {
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(fixup_label);
__ add(root_reg.X(), obj.X(), offset);
}
@@ -5089,7 +5073,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
if (fixup_label == nullptr) {
__ Ldr(root_reg, MemOperand(obj, offset));
} else {
- vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(fixup_label);
__ ldr(root_reg, MemOperand(obj, offset));
}
@@ -5100,7 +5084,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
- vixl::Register obj,
+ Register obj,
uint32_t offset,
Register temp,
bool needs_null_check,
@@ -5124,7 +5108,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
- vixl::Register obj,
+ Register obj,
uint32_t data_offset,
Location index,
Register temp,
@@ -5155,7 +5139,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins
void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
- vixl::Register obj,
+ Register obj,
uint32_t offset,
Location index,
size_t scale_factor,
@@ -5204,23 +5188,13 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// /* LockWord */ lock_word = LockWord(monitor)
static_assert(sizeof(LockWord) == sizeof(int32_t),
"art::LockWord and int32_t have different sizes.");
- // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
- __ Lsr(temp, temp, LockWord::kReadBarrierStateShift);
- __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask));
- static_assert(
- LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
- "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
- // Introduce a dependency on the high bits of rb_state, which shall
- // be all zeroes, to prevent load-load reordering, and without using
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
// a memory barrier (which would be more expensive).
- // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0
- Register temp2 = temps.AcquireW();
- __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask));
- // obj is unchanged by this operation, but its value now depends on
- // temp2, which depends on temp.
- __ Add(obj, obj, Operand(temp2));
- temps.Release(temp2);
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
// The actual reference load.
if (index.IsValid()) {
@@ -5246,7 +5220,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
Load(type, ref_reg, HeapOperand(obj, computed_offset));
} else {
- temp2 = temps.AcquireW();
+ Register temp2 = temps.AcquireW();
__ Add(temp2, obj, offset);
Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor));
temps.Release(temp2);
@@ -5267,13 +5241,16 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// Slow path used to mark the object `ref` when it is gray.
SlowPathCodeARM64* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref);
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
AddSlowPath(slow_path);
// if (rb_state == ReadBarrier::gray_ptr_)
// ref = ReadBarrier::Mark(ref);
- __ Cmp(temp, ReadBarrier::gray_ptr_);
- __ B(eq, slow_path->GetEntryLabel());
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -5348,16 +5325,19 @@ void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- uint32_t method_offset = 0;
if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
- method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+ uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
instruction->GetIndex(), kArm64PointerSize).SizeValue();
+ __ Ldr(XRegisterFrom(locations->Out()),
+ MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
} else {
- method_offset = mirror::Class::EmbeddedImTableEntryOffset(
- instruction->GetIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ instruction->GetIndex(), kArm64PointerSize));
+ __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
+ mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
+ __ Ldr(XRegisterFrom(locations->Out()),
+ MemOperand(XRegisterFrom(locations->Out()), method_offset));
}
- __ Ldr(XRegisterFrom(locations->Out()),
- MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index d4bf695602..921ce10aaa 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -20,15 +20,19 @@
#include "arch/arm64/quick_method_frame_info_arm64.h"
#include "code_generator.h"
#include "common_arm64.h"
-#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
+#include "string_reference.h"
#include "utils/arm64/assembler_arm64.h"
-#include "utils/string_reference.h"
#include "utils/type_reference.h"
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#pragma GCC diagnostic pop
namespace art {
namespace arm64 {
@@ -36,34 +40,49 @@ namespace arm64 {
class CodeGeneratorARM64;
// Use a local definition to prevent copying mistakes.
-static constexpr size_t kArm64WordSize = kArm64PointerSize;
-
-static const vixl::Register kParameterCoreRegisters[] = {
- vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7
+static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize);
+
+static const vixl::aarch64::Register kParameterCoreRegisters[] = {
+ vixl::aarch64::x1,
+ vixl::aarch64::x2,
+ vixl::aarch64::x3,
+ vixl::aarch64::x4,
+ vixl::aarch64::x5,
+ vixl::aarch64::x6,
+ vixl::aarch64::x7
};
static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
-static const vixl::FPRegister kParameterFPRegisters[] = {
- vixl::d0, vixl::d1, vixl::d2, vixl::d3, vixl::d4, vixl::d5, vixl::d6, vixl::d7
+static const vixl::aarch64::FPRegister kParameterFPRegisters[] = {
+ vixl::aarch64::d0,
+ vixl::aarch64::d1,
+ vixl::aarch64::d2,
+ vixl::aarch64::d3,
+ vixl::aarch64::d4,
+ vixl::aarch64::d5,
+ vixl::aarch64::d6,
+ vixl::aarch64::d7
};
static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
-const vixl::Register tr = vixl::x19; // Thread Register
-static const vixl::Register kArtMethodRegister = vixl::x0; // Method register on invoke.
-
-const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
-const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
+// Thread Register
+const vixl::aarch64::Register tr = vixl::aarch64::x19;
+// Method register on invoke.
+static const vixl::aarch64::Register kArtMethodRegister = vixl::aarch64::x0;
+const vixl::aarch64::CPURegList vixl_reserved_core_registers(vixl::aarch64::ip0,
+ vixl::aarch64::ip1);
+const vixl::aarch64::CPURegList vixl_reserved_fp_registers(vixl::aarch64::d31);
-const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
+const vixl::aarch64::CPURegList runtime_reserved_core_registers(tr, vixl::aarch64::lr);
// Callee-saved registers AAPCS64 (without x19 - Thread Register)
-const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister,
- vixl::kXRegSize,
- vixl::x20.code(),
- vixl::x30.code());
-const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister,
- vixl::kDRegSize,
- vixl::d8.code(),
- vixl::d15.code());
+const vixl::aarch64::CPURegList callee_saved_core_registers(vixl::aarch64::CPURegister::kRegister,
+ vixl::aarch64::kXRegSize,
+ vixl::aarch64::x20.GetCode(),
+ vixl::aarch64::x30.GetCode());
+const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kFPRegister,
+ vixl::aarch64::kDRegSize,
+ vixl::aarch64::d8.GetCode(),
+ vixl::aarch64::d15.GetCode());
Location ARM64ReturnLocation(Primitive::Type return_type);
class SlowPathCodeARM64 : public SlowPathCode {
@@ -71,15 +90,15 @@ class SlowPathCodeARM64 : public SlowPathCode {
explicit SlowPathCodeARM64(HInstruction* instruction)
: SlowPathCode(instruction), entry_label_(), exit_label_() {}
- vixl::Label* GetEntryLabel() { return &entry_label_; }
- vixl::Label* GetExitLabel() { return &exit_label_; }
+ vixl::aarch64::Label* GetEntryLabel() { return &entry_label_; }
+ vixl::aarch64::Label* GetExitLabel() { return &exit_label_; }
void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
private:
- vixl::Label entry_label_;
- vixl::Label exit_label_;
+ vixl::aarch64::Label entry_label_;
+ vixl::aarch64::Label exit_label_;
DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
};
@@ -89,27 +108,42 @@ class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> {
explicit JumpTableARM64(HPackedSwitch* switch_instr)
: switch_instr_(switch_instr), table_start_() {}
- vixl::Label* GetTableStartLabel() { return &table_start_; }
+ vixl::aarch64::Label* GetTableStartLabel() { return &table_start_; }
void EmitTable(CodeGeneratorARM64* codegen);
private:
HPackedSwitch* const switch_instr_;
- vixl::Label table_start_;
+ vixl::aarch64::Label table_start_;
DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
};
-static const vixl::Register kRuntimeParameterCoreRegisters[] =
- { vixl::x0, vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 };
+static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] =
+ { vixl::aarch64::x0,
+ vixl::aarch64::x1,
+ vixl::aarch64::x2,
+ vixl::aarch64::x3,
+ vixl::aarch64::x4,
+ vixl::aarch64::x5,
+ vixl::aarch64::x6,
+ vixl::aarch64::x7 };
static constexpr size_t kRuntimeParameterCoreRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
-static const vixl::FPRegister kRuntimeParameterFpuRegisters[] =
- { vixl::d0, vixl::d1, vixl::d2, vixl::d3, vixl::d4, vixl::d5, vixl::d6, vixl::d7 };
+static const vixl::aarch64::FPRegister kRuntimeParameterFpuRegisters[] =
+ { vixl::aarch64::d0,
+ vixl::aarch64::d1,
+ vixl::aarch64::d2,
+ vixl::aarch64::d3,
+ vixl::aarch64::d4,
+ vixl::aarch64::d5,
+ vixl::aarch64::d6,
+ vixl::aarch64::d7 };
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
-class InvokeRuntimeCallingConvention : public CallingConvention<vixl::Register, vixl::FPRegister> {
+class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register,
+ vixl::aarch64::FPRegister> {
public:
static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
@@ -126,7 +160,8 @@ class InvokeRuntimeCallingConvention : public CallingConvention<vixl::Register,
DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
};
-class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl::FPRegister> {
+class InvokeDexCallingConvention : public CallingConvention<vixl::aarch64::Register,
+ vixl::aarch64::FPRegister> {
public:
InvokeDexCallingConvention()
: CallingConvention(kParameterCoreRegisters,
@@ -166,23 +201,23 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
FieldAccessCallingConventionARM64() {}
Location GetObjectLocation() const OVERRIDE {
- return helpers::LocationFrom(vixl::x1);
+ return helpers::LocationFrom(vixl::aarch64::x1);
}
Location GetFieldIndexLocation() const OVERRIDE {
- return helpers::LocationFrom(vixl::x0);
+ return helpers::LocationFrom(vixl::aarch64::x0);
}
Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
- return helpers::LocationFrom(vixl::x0);
+ return helpers::LocationFrom(vixl::aarch64::x0);
}
Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
return Primitive::Is64BitType(type)
- ? helpers::LocationFrom(vixl::x2)
+ ? helpers::LocationFrom(vixl::aarch64::x2)
: (is_instance
- ? helpers::LocationFrom(vixl::x2)
- : helpers::LocationFrom(vixl::x1));
+ ? helpers::LocationFrom(vixl::aarch64::x2)
+ : helpers::LocationFrom(vixl::aarch64::x1));
}
Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
- return helpers::LocationFrom(vixl::d0);
+ return helpers::LocationFrom(vixl::aarch64::d0);
}
private:
@@ -208,10 +243,11 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
}
Arm64Assembler* GetAssembler() const { return assembler_; }
- vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+ vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
private:
- void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
+ void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
+ vixl::aarch64::Register class_reg);
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* instr);
@@ -256,9 +292,9 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
// while honoring read barriers (if any).
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
- vixl::Register obj,
+ vixl::aarch64::Register obj,
uint32_t offset,
- vixl::Label* fixup_label = nullptr);
+ vixl::aarch64::Label* fixup_label = nullptr);
// Generate a floating-point comparison.
void GenerateFcmp(HInstruction* instruction);
@@ -266,8 +302,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
void HandleShift(HBinaryOperation* instr);
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
- vixl::Label* true_target,
- vixl::Label* false_target);
+ vixl::aarch64::Label* true_target,
+ vixl::aarch64::Label* false_target);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivRemByPowerOfTwo(HBinaryOperation* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -327,12 +363,12 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
private:
Arm64Assembler* GetAssembler() const;
- vixl::MacroAssembler* GetVIXLAssembler() const {
- return GetAssembler()->vixl_masm_;
+ vixl::aarch64::MacroAssembler* GetVIXLAssembler() const {
+ return GetAssembler()->GetVIXLAssembler();
}
CodeGeneratorARM64* const codegen_;
- vixl::UseScratchRegisterScope vixl_temps_;
+ vixl::aarch64::UseScratchRegisterScope vixl_temps_;
DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
};
@@ -348,12 +384,12 @@ class CodeGeneratorARM64 : public CodeGenerator {
void GenerateFrameEntry() OVERRIDE;
void GenerateFrameExit() OVERRIDE;
- vixl::CPURegList GetFramePreservedCoreRegisters() const;
- vixl::CPURegList GetFramePreservedFPRegisters() const;
+ vixl::aarch64::CPURegList GetFramePreservedCoreRegisters() const;
+ vixl::aarch64::CPURegList GetFramePreservedFPRegisters() const;
void Bind(HBasicBlock* block) OVERRIDE;
- vixl::Label* GetLabelOf(HBasicBlock* block) {
+ vixl::aarch64::Label* GetLabelOf(HBasicBlock* block) {
block = FirstNonEmptyBlock(block);
return &(block_labels_[block->GetBlockId()]);
}
@@ -368,19 +404,21 @@ class CodeGeneratorARM64 : public CodeGenerator {
}
uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
- vixl::Label* block_entry_label = GetLabelOf(block);
+ vixl::aarch64::Label* block_entry_label = GetLabelOf(block);
DCHECK(block_entry_label->IsBound());
- return block_entry_label->location();
+ return block_entry_label->GetLocation();
}
HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
const Arm64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
- vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
+ vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
// Emit a write barrier.
- void MarkGCCard(vixl::Register object, vixl::Register value, bool value_can_be_null);
+ void MarkGCCard(vixl::aarch64::Register object,
+ vixl::aarch64::Register value,
+ bool value_can_be_null);
void GenerateMemoryBarrier(MemBarrierKind kind);
@@ -399,8 +437,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
// (xzr, wzr), or make for poor allocatable registers (sp alignment
// requirements, etc.). This also facilitates our task as all other registers
// can easily be mapped via to or from their type and index or code.
- static const int kNumberOfAllocatableRegisters = vixl::kNumberOfRegisters - 1;
- static const int kNumberOfAllocatableFPRegisters = vixl::kNumberOfFPRegisters;
+ static const int kNumberOfAllocatableRegisters = vixl::aarch64::kNumberOfRegisters - 1;
+ static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfFPRegisters;
static constexpr int kNumberOfAllocatableRegisterPairs = 0;
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -418,6 +456,10 @@ class CodeGeneratorARM64 : public CodeGenerator {
block_labels_.resize(GetGraph()->GetBlocks().size());
}
+ // We want to use the STP and LDP instructions to spill and restore registers for slow paths.
+ // These instructions can only encode offsets that are multiples of the register size accessed.
+ uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return vixl::aarch64::kXRegSizeInBytes; }
+
JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) {
jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARM64(switch_instr));
return jump_tables_.back().get();
@@ -426,18 +468,24 @@ class CodeGeneratorARM64 : public CodeGenerator {
void Finalize(CodeAllocator* allocator) OVERRIDE;
// Code generation helpers.
- void MoveConstant(vixl::CPURegister destination, HConstant* constant);
+ void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant);
void MoveConstant(Location destination, int32_t value) OVERRIDE;
void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
- void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
- void Store(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
+ void Load(Primitive::Type type,
+ vixl::aarch64::CPURegister dst,
+ const vixl::aarch64::MemOperand& src);
+ void Store(Primitive::Type type,
+ vixl::aarch64::CPURegister src,
+ const vixl::aarch64::MemOperand& dst);
void LoadAcquire(HInstruction* instruction,
- vixl::CPURegister dst,
- const vixl::MemOperand& src,
+ vixl::aarch64::CPURegister dst,
+ const vixl::aarch64::MemOperand& src,
bool needs_null_check);
- void StoreRelease(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
+ void StoreRelease(Primitive::Type type,
+ vixl::aarch64::CPURegister src,
+ const vixl::aarch64::MemOperand& dst);
// Generate code to invoke a runtime entry point.
void InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -450,6 +498,12 @@ class CodeGeneratorARM64 : public CodeGenerator {
uint32_t dex_pc,
SlowPathCode* slow_path);
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; }
bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
@@ -484,32 +538,33 @@ class CodeGeneratorARM64 : public CodeGenerator {
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
- vixl::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
- uint32_t string_index,
- vixl::Label* adrp_label = nullptr);
+ vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
+ uint32_t string_index,
+ vixl::aarch64::Label* adrp_label = nullptr);
// Add a new PC-relative type patch for an instruction and return the label
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
- vixl::Label* NewPcRelativeTypePatch(const DexFile& dex_file,
- uint32_t type_index,
- vixl::Label* adrp_label = nullptr);
+ vixl::aarch64::Label* NewPcRelativeTypePatch(const DexFile& dex_file,
+ uint32_t type_index,
+ vixl::aarch64::Label* adrp_label = nullptr);
// Add a new PC-relative dex cache array patch for an instruction and return
// the label to be bound before the instruction. The instruction will be
// either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
// pointing to the associated ADRP patch label).
- vixl::Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
- uint32_t element_offset,
- vixl::Label* adrp_label = nullptr);
-
- vixl::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
- uint32_t string_index);
- vixl::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
- uint32_t type_index);
- vixl::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
- vixl::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
+ vixl::aarch64::Label* NewPcRelativeDexCacheArrayPatch(
+ const DexFile& dex_file,
+ uint32_t element_offset,
+ vixl::aarch64::Label* adrp_label = nullptr);
+
+ vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+ uint32_t string_index);
+ vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+ uint32_t type_index);
+ vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
+ vixl::aarch64::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
@@ -517,29 +572,29 @@ class CodeGeneratorARM64 : public CodeGenerator {
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
- vixl::Register obj,
+ vixl::aarch64::Register obj,
uint32_t offset,
- vixl::Register temp,
+ vixl::aarch64::Register temp,
bool needs_null_check,
bool use_load_acquire);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
- vixl::Register obj,
+ vixl::aarch64::Register obj,
uint32_t data_offset,
Location index,
- vixl::Register temp,
+ vixl::aarch64::Register temp,
bool needs_null_check);
// Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
- vixl::Register obj,
+ vixl::aarch64::Register obj,
uint32_t offset,
Location index,
size_t scale_factor,
- vixl::Register temp,
+ vixl::aarch64::Register temp,
bool needs_null_check,
bool use_load_acquire);
@@ -597,24 +652,25 @@ class CodeGeneratorARM64 : public CodeGenerator {
void GenerateExplicitNullCheck(HNullCheck* instruction);
private:
- using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
- using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::Literal<uint32_t>*>;
+ using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
+ using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
using MethodToLiteralMap = ArenaSafeMap<MethodReference,
- vixl::Literal<uint64_t>*,
+ vixl::aarch64::Literal<uint64_t>*,
MethodReferenceComparator>;
using BootStringToLiteralMap = ArenaSafeMap<StringReference,
- vixl::Literal<uint32_t>*,
+ vixl::aarch64::Literal<uint32_t>*,
StringReferenceValueComparator>;
using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
- vixl::Literal<uint32_t>*,
+ vixl::aarch64::Literal<uint32_t>*,
TypeReferenceValueComparator>;
- vixl::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
- vixl::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
- vixl::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
- MethodToLiteralMap* map);
- vixl::Literal<uint64_t>* DeduplicateMethodAddressLiteral(MethodReference target_method);
- vixl::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method);
+ vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value,
+ Uint32ToLiteralMap* map);
+ vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
+ vixl::aarch64::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
+ MethodToLiteralMap* map);
+ vixl::aarch64::Literal<uint64_t>* DeduplicateMethodAddressLiteral(MethodReference target_method);
+ vixl::aarch64::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method);
// The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
// and boot image strings/types. The only difference is the interpretation of the
@@ -626,21 +682,21 @@ class CodeGeneratorARM64 : public CodeGenerator {
const DexFile& target_dex_file;
// Either the dex cache array element offset or the string/type index.
uint32_t offset_or_index;
- vixl::Label label;
- vixl::Label* pc_insn_label;
+ vixl::aarch64::Label label;
+ vixl::aarch64::Label* pc_insn_label;
};
- vixl::Label* NewPcRelativePatch(const DexFile& dex_file,
- uint32_t offset_or_index,
- vixl::Label* adrp_label,
- ArenaDeque<PcRelativePatchInfo>* patches);
+ vixl::aarch64::Label* NewPcRelativePatch(const DexFile& dex_file,
+ uint32_t offset_or_index,
+ vixl::aarch64::Label* adrp_label,
+ ArenaDeque<PcRelativePatchInfo>* patches);
void EmitJumpTables();
// Labels for each block that will be compiled.
- // We use a deque so that the `vixl::Label` objects do not move in memory.
- ArenaDeque<vixl::Label> block_labels_; // Indexed by block id.
- vixl::Label frame_entry_label_;
+ // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory.
+ ArenaDeque<vixl::aarch64::Label> block_labels_; // Indexed by block id.
+ vixl::aarch64::Label frame_entry_label_;
ArenaVector<std::unique_ptr<JumpTableARM64>> jump_tables_;
LocationsBuilderARM64 location_builder_;
@@ -659,7 +715,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
MethodToLiteralMap call_patches_;
// Relative call patch info.
// Using ArenaDeque<> which retains element addresses on push/emplace_back().
- ArenaDeque<MethodPatchInfo<vixl::Label>> relative_call_patches_;
+ ArenaDeque<MethodPatchInfo<vixl::aarch64::Label>> relative_call_patches_;
// PC-relative DexCache access info.
ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
// Deduplication map for boot string literals for kBootImageLinkTimeAddress.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 37f1c35c50..a7fbc84340 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -145,9 +145,9 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type)
return MipsReturnLocation(type);
}
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value()
class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
public:
@@ -351,14 +351,12 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
instruction_,
instruction_->GetDexPc(),
this,
IsDirectEntrypoint(kQuickTestSuspend));
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
@@ -415,7 +413,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
this,
IsDirectEntrypoint(kQuickInstanceofNonTrivial));
CheckEntrypointTypes<
- kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+ kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
@@ -482,19 +480,30 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
move_resolver_(graph->GetArena(), this),
assembler_(graph->GetArena(), &isa_features),
isa_features_(isa_features),
+ uint32_literals_(std::less<uint32_t>(),
+ graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
method_patches_(MethodReferenceComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
call_patches_(MethodReferenceComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+ pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_string_patches_(StringReferenceValueComparator(),
+ graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_type_patches_(TypeReferenceValueComparator(),
+ graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_address_patches_(std::less<uint32_t>(),
+ graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ clobbered_ra_(false) {
// Save RA (containing the return address) to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(RA));
}
#undef __
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value()
void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) {
// Ensure that we fix up branches.
@@ -688,6 +697,16 @@ void CodeGeneratorMIPS::ComputeSpillMask() {
if ((fpu_spill_mask_ != 0) && (POPCOUNT(core_spill_mask_) % 2 != 0)) {
core_spill_mask_ |= (1 << ZERO);
}
+ // If RA is clobbered by PC-relative operations on R2 and it's the only spilled register
+ // (this can happen in leaf methods), artificially spill the ZERO register in order to
+ // force explicit saving and restoring of RA. RA isn't saved/restored when it's the only
+ // spilled register.
+ // TODO: Can this be improved? It causes creation of a stack frame (while RA might be
+ // saved in an unused temporary register) and saving of RA and the current method pointer
+ // in the frame.
+ if (clobbered_ra_ && core_spill_mask_ == (1u << RA) && fpu_spill_mask_ == 0) {
+ core_spill_mask_ |= (1 << ZERO);
+ }
}
static dwarf::Reg DWARFReg(Register reg) {
@@ -962,7 +981,12 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch
size_t size =
method_patches_.size() +
call_patches_.size() +
- pc_relative_dex_cache_patches_.size();
+ pc_relative_dex_cache_patches_.size() +
+ pc_relative_string_patches_.size() +
+ pc_relative_type_patches_.size() +
+ boot_image_string_patches_.size() +
+ boot_image_type_patches_.size() +
+ boot_image_address_patches_.size();
linker_patches->reserve(size);
for (const auto& entry : method_patches_) {
const MethodReference& target_method = entry.first;
@@ -994,6 +1018,71 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch
pc_rel_offset,
base_element_offset));
}
+ for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
+ const DexFile& dex_file = info.target_dex_file;
+ size_t string_index = info.offset_or_index;
+ DCHECK(info.high_label.IsBound());
+ uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+ // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
+ // the assembler's base label used for PC-relative literals.
+ uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
+ ? __ GetLabelLocation(&info.pc_rel_label)
+ : __ GetPcRelBaseLabelLocation();
+ linker_patches->push_back(LinkerPatch::RelativeStringPatch(high_offset,
+ &dex_file,
+ pc_rel_offset,
+ string_index));
+ }
+ for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
+ const DexFile& dex_file = info.target_dex_file;
+ size_t type_index = info.offset_or_index;
+ DCHECK(info.high_label.IsBound());
+ uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+ // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
+ // the assembler's base label used for PC-relative literals.
+ uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
+ ? __ GetLabelLocation(&info.pc_rel_label)
+ : __ GetPcRelBaseLabelLocation();
+ linker_patches->push_back(LinkerPatch::RelativeTypePatch(high_offset,
+ &dex_file,
+ pc_rel_offset,
+ type_index));
+ }
+ for (const auto& entry : boot_image_string_patches_) {
+ const StringReference& target_string = entry.first;
+ Literal* literal = entry.second;
+ DCHECK(literal->GetLabel()->IsBound());
+ uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+ linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
+ target_string.dex_file,
+ target_string.string_index));
+ }
+ for (const auto& entry : boot_image_type_patches_) {
+ const TypeReference& target_type = entry.first;
+ Literal* literal = entry.second;
+ DCHECK(literal->GetLabel()->IsBound());
+ uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+ linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+ target_type.dex_file,
+ target_type.type_index));
+ }
+ for (const auto& entry : boot_image_address_patches_) {
+ DCHECK(GetCompilerOptions().GetIncludePatchInformation());
+ Literal* literal = entry.second;
+ DCHECK(literal->GetLabel()->IsBound());
+ uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+ linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+ }
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
+ const DexFile& dex_file, uint32_t string_index) {
+ return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch(
+ const DexFile& dex_file, uint32_t type_index) {
+ return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_);
}
CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch(
@@ -1007,6 +1096,12 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch(
return &patches->back();
}
+Literal* CodeGeneratorMIPS::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) {
+ return map->GetOrCreate(
+ value,
+ [this, value]() { return __ NewLiteral<uint32_t>(value); });
+}
+
Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method,
MethodToLiteralMap* map) {
return map->GetOrCreate(
@@ -1022,6 +1117,26 @@ Literal* CodeGeneratorMIPS::DeduplicateMethodCodeLiteral(MethodReference target_
return DeduplicateMethodLiteral(target_method, &call_patches_);
}
+Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+ uint32_t string_index) {
+ return boot_image_string_patches_.GetOrCreate(
+ StringReference(&dex_file, string_index),
+ [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+ uint32_t type_index) {
+ return boot_image_type_patches_.GetOrCreate(
+ TypeReference(&dex_file, type_index),
+ [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) {
+ bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
+ Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
+ return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+}
+
void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
MipsLabel done;
Register card = AT;
@@ -1030,7 +1145,7 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
__ LoadFromOffset(kLoadWord,
card,
TR,
- Thread::CardTableOffset<kMipsWordSize>().Int32Value());
+ Thread::CardTableOffset<kMipsPointerSize>().Int32Value());
__ Srl(temp, object, gc::accounting::CardTable::kCardShift);
__ Addu(temp, card, temp);
__ Sb(card, temp, 0);
@@ -1067,6 +1182,15 @@ void CodeGeneratorMIPS::SetupBlockedRegisters() const {
blocked_fpu_registers_[i] = true;
}
+ if (GetGraph()->IsDebuggable()) {
+ // Stubs do not save callee-save floating point registers. If the graph
+ // is debuggable, we need to deal with these registers differently. For
+ // now, just block them.
+ for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+ blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+ }
+ }
+
UpdateBlockedPairRegisters();
}
@@ -1113,7 +1237,7 @@ void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kMipsWordSize>(entrypoint).Int32Value(),
+ InvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(),
instruction,
dex_pc,
slow_path,
@@ -1164,7 +1288,7 @@ void InstructionCodeGeneratorMIPS::GenerateSuspendCheck(HSuspendCheck* instructi
__ LoadFromOffset(kLoadUnsignedHalfword,
TMP,
TR,
- Thread::ThreadFlagsOffset<kMipsWordSize>().Int32Value());
+ Thread::ThreadFlagsOffset<kMipsPointerSize>().Int32Value());
if (successor == nullptr) {
__ Bnez(TMP, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
@@ -1855,7 +1979,7 @@ void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) {
bool needs_runtime_call = instruction->NeedsTypeCheck();
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+ needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
if (needs_runtime_call) {
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2467,7 +2591,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* inst
void LocationsBuilderMIPS::VisitDiv(HDiv* div) {
Primitive::Type type = div->GetResultType();
LocationSummary::CallKind call_kind = (type == Primitive::kPrimLong)
- ? LocationSummary::kCall
+ ? LocationSummary::kCallOnMainOnly
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -3430,7 +3554,7 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field
bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
bool generate_volatile = field_info.IsVolatile() && is_wide;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
- instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+ instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
if (generate_volatile) {
@@ -3440,7 +3564,8 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field
if (field_type == Primitive::kPrimLong) {
locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimLong));
} else {
- locations->SetOut(Location::RequiresFpuRegister());
+ // Use Location::Any() to prevent situations when running out of available fp registers.
+ locations->SetOut(Location::Any());
// Need some temp core regs since FP results are returned in core registers
Location reg = calling_convention.GetReturnLocation(Primitive::kPrimLong);
locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairLow<Register>()));
@@ -3505,11 +3630,23 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
IsDirectEntrypoint(kQuickA64Load));
CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>();
if (type == Primitive::kPrimDouble) {
- // Need to move to FP regs since FP results are returned in core registers.
- __ Mtc1(locations->GetTemp(1).AsRegister<Register>(),
- locations->Out().AsFpuRegister<FRegister>());
- __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
- locations->Out().AsFpuRegister<FRegister>());
+ // FP results are returned in core registers. Need to move them.
+ Location out = locations->Out();
+ if (out.IsFpuRegister()) {
+ __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>());
+ __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+ out.AsFpuRegister<FRegister>());
+ } else {
+ DCHECK(out.IsDoubleStackSlot());
+ __ StoreToOffset(kStoreWord,
+ locations->GetTemp(1).AsRegister<Register>(),
+ SP,
+ out.GetStackIndex());
+ __ StoreToOffset(kStoreWord,
+ locations->GetTemp(2).AsRegister<Register>(),
+ SP,
+ out.GetStackIndex() + 4);
+ }
}
} else {
if (!Primitive::IsFloatingPointType(type)) {
@@ -3557,7 +3694,7 @@ void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const Field
bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
bool generate_volatile = field_info.IsVolatile() && is_wide;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
- instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+ instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
if (generate_volatile) {
@@ -3568,7 +3705,8 @@ void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const Field
locations->SetInAt(1, Location::RegisterPairLocation(
calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
} else {
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ // Use Location::Any() to prevent situations when running out of available fp registers.
+ locations->SetInAt(1, Location::Any());
// Pass FP parameters in core registers.
locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
@@ -3627,10 +3765,28 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
if (type == Primitive::kPrimDouble) {
// Pass FP parameters in core registers.
- __ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
- locations->InAt(1).AsFpuRegister<FRegister>());
- __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
- locations->InAt(1).AsFpuRegister<FRegister>());
+ Location in = locations->InAt(1);
+ if (in.IsFpuRegister()) {
+ __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), in.AsFpuRegister<FRegister>());
+ __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+ in.AsFpuRegister<FRegister>());
+ } else if (in.IsDoubleStackSlot()) {
+ __ LoadFromOffset(kLoadWord,
+ locations->GetTemp(1).AsRegister<Register>(),
+ SP,
+ in.GetStackIndex());
+ __ LoadFromOffset(kLoadWord,
+ locations->GetTemp(2).AsRegister<Register>(),
+ SP,
+ in.GetStackIndex() + 4);
+ } else {
+ DCHECK(in.IsConstant());
+ DCHECK(in.GetConstant()->IsDoubleConstant());
+ int64_t value = bit_cast<int64_t, double>(in.GetConstant()->AsDoubleConstant()->GetValue());
+ __ LoadConst64(locations->GetTemp(2).AsRegister<Register>(),
+ locations->GetTemp(1).AsRegister<Register>(),
+ value);
+ }
}
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
instruction,
@@ -3696,6 +3852,23 @@ void InstructionCodeGeneratorMIPS::VisitInstanceFieldSet(HInstanceFieldSet* inst
HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc());
}
+void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(
+ HInstruction* instruction ATTRIBUTE_UNUSED,
+ Location root,
+ Register obj,
+ uint32_t offset) {
+ Register root_reg = root.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ UNIMPLEMENTED(FATAL) << "for read barrier";
+ } else {
+ // Plain GC root load with no read barrier.
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ // Note that GC roots are not affected by heap poisoning, thus we
+ // do not have to unpoison `root_reg` here.
+ }
+}
+
void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind =
instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
@@ -3772,11 +3945,9 @@ void LocationsBuilderMIPS::VisitInvokeInterface(HInvokeInterface* invoke) {
void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
- uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
- invoke->GetImtIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
Location receiver = invoke->GetLocations()->InAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
// Set the hidden argument.
__ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(),
@@ -3790,6 +3961,10 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke
__ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
+ __ LoadFromOffset(kLoadWord, temp, temp,
+ mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ invoke->GetImtIndex(), kMipsPointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
// T9 = temp->GetEntryPoint();
@@ -3859,16 +4034,80 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen
}
HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
- HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) {
- // TODO: Implement other kinds.
- return HLoadString::LoadKind::kDexCacheViaMethod;
+ HLoadString::LoadKind desired_string_load_kind) {
+ if (kEmitCompilerReadBarrier) {
+ UNIMPLEMENTED(FATAL) << "for read barrier";
+ }
+ // We disable PC-relative load when there is an irreducible loop, as the optimization
+ // is incompatible with it.
+ bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+ bool fallback_load = has_irreducible_loops;
+ switch (desired_string_load_kind) {
+ case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+ DCHECK(!GetCompilerOptions().GetCompilePic());
+ break;
+ case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ DCHECK(GetCompilerOptions().GetCompilePic());
+ break;
+ case HLoadString::LoadKind::kBootImageAddress:
+ break;
+ case HLoadString::LoadKind::kDexCacheAddress:
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ fallback_load = false;
+ break;
+ case HLoadString::LoadKind::kDexCachePcRelative:
+ DCHECK(!Runtime::Current()->UseJitCompilation());
+ // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+ // with irreducible loops.
+ break;
+ case HLoadString::LoadKind::kDexCacheViaMethod:
+ fallback_load = false;
+ break;
+ }
+ if (fallback_load) {
+ desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+ }
+ return desired_string_load_kind;
}
HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
- DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass);
- // TODO: Implement other kinds.
- return HLoadClass::LoadKind::kDexCacheViaMethod;
+ if (kEmitCompilerReadBarrier) {
+ UNIMPLEMENTED(FATAL) << "for read barrier";
+ }
+ // We disable pc-relative load when there is an irreducible loop, as the optimization
+ // is incompatible with it.
+ bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+ bool fallback_load = has_irreducible_loops;
+ switch (desired_class_load_kind) {
+ case HLoadClass::LoadKind::kReferrersClass:
+ fallback_load = false;
+ break;
+ case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+ DCHECK(!GetCompilerOptions().GetCompilePic());
+ break;
+ case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+ DCHECK(GetCompilerOptions().GetCompilePic());
+ break;
+ case HLoadClass::LoadKind::kBootImageAddress:
+ break;
+ case HLoadClass::LoadKind::kDexCacheAddress:
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ fallback_load = false;
+ break;
+ case HLoadClass::LoadKind::kDexCachePcRelative:
+ DCHECK(!Runtime::Current()->UseJitCompilation());
+ // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+ // with irreducible loops.
+ break;
+ case HLoadClass::LoadKind::kDexCacheViaMethod:
+ fallback_load = false;
+ break;
+ }
+ if (fallback_load) {
+ desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+ }
+ return desired_class_load_kind;
}
Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
@@ -4046,7 +4285,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
T9,
callee_method.AsRegister<Register>(),
ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kMipsWordSize).Int32Value());
+ kMipsPointerSize).Int32Value());
// T9()
__ Jalr(T9);
__ Nop();
@@ -4079,7 +4318,7 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem
size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
invoke->GetVTableIndex(), kMipsPointerSize).SizeValue();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
// temp = object->GetClass();
DCHECK(receiver.IsRegister());
@@ -4105,11 +4344,40 @@ void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
}
void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
- InvokeRuntimeCallingConvention calling_convention;
- CodeGenerator::CreateLoadClassLocationSummary(
- cls,
- Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
- Location::RegisterLocation(V0));
+ if (cls->NeedsAccessCheck()) {
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGenerator::CreateLoadClassLocationSummary(
+ cls,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Location::RegisterLocation(V0),
+ /* code_generator_supports_read_barrier */ false); // TODO: revisit this bool.
+ return;
+ }
+
+ LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+ switch (load_kind) {
+ // We need an extra register for PC-relative literals on R2.
+ case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+ case HLoadClass::LoadKind::kBootImageAddress:
+ case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+ if (codegen_->GetInstructionSetFeatures().IsR6()) {
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ // We need an extra register for PC-relative dex cache accesses.
+ case HLoadClass::LoadKind::kDexCachePcRelative:
+ case HLoadClass::LoadKind::kReferrersClass:
+ case HLoadClass::LoadKind::kDexCacheViaMethod:
+ locations->SetInAt(0, Location::RequiresRegister());
+ break;
+ default:
+ break;
+ }
+ locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
@@ -4125,40 +4393,132 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
return;
}
- Register out = locations->Out().AsRegister<Register>();
- Register current_method = locations->InAt(0).AsRegister<Register>();
- if (cls->IsReferrersClass()) {
- DCHECK(!cls->CanCallRuntime());
- DCHECK(!cls->MustGenerateClinitCheck());
- __ LoadFromOffset(kLoadWord, out, current_method,
- ArtMethod::DeclaringClassOffset().Int32Value());
- } else {
- __ LoadFromOffset(kLoadWord, out, current_method,
- ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value());
- __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
-
- if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
- DCHECK(cls->CanCallRuntime());
- SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
- cls,
- cls,
- cls->GetDexPc(),
- cls->MustGenerateClinitCheck());
- codegen_->AddSlowPath(slow_path);
- if (!cls->IsInDexCache()) {
- __ Beqz(out, slow_path->GetEntryLabel());
- }
- if (cls->MustGenerateClinitCheck()) {
- GenerateClassInitializationCheck(slow_path, out);
+ HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
+ Register base_or_current_method_reg;
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ switch (load_kind) {
+ // We need an extra register for PC-relative literals on R2.
+ case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+ case HLoadClass::LoadKind::kBootImageAddress:
+ case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+ base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+ break;
+ // We need an extra register for PC-relative dex cache accesses.
+ case HLoadClass::LoadKind::kDexCachePcRelative:
+ case HLoadClass::LoadKind::kReferrersClass:
+ case HLoadClass::LoadKind::kDexCacheViaMethod:
+ base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
+ break;
+ default:
+ base_or_current_method_reg = ZERO;
+ break;
+ }
+
+ bool generate_null_check = false;
+ switch (load_kind) {
+ case HLoadClass::LoadKind::kReferrersClass: {
+ DCHECK(!cls->CanCallRuntime());
+ DCHECK(!cls->MustGenerateClinitCheck());
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ base_or_current_method_reg,
+ ArtMethod::DeclaringClassOffset().Int32Value());
+ break;
+ }
+ case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+ DCHECK(!kEmitCompilerReadBarrier);
+ __ LoadLiteral(out,
+ base_or_current_method_reg,
+ codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+ cls->GetTypeIndex()));
+ break;
+ case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(!kEmitCompilerReadBarrier);
+ CodeGeneratorMIPS::PcRelativePatchInfo* info =
+ codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ if (isR6) {
+ __ Bind(&info->high_label);
+ __ Bind(&info->pc_rel_label);
+ // Add a 32-bit offset to PC.
+ __ Auipc(out, /* placeholder */ 0x1234);
+ __ Addiu(out, out, /* placeholder */ 0x5678);
} else {
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(&info->high_label);
+ __ Lui(out, /* placeholder */ 0x1234);
+ // We do not bind info->pc_rel_label here, we'll use the assembler's label
+ // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
+ __ Ori(out, out, /* placeholder */ 0x5678);
+ // Add a 32-bit offset to PC.
+ __ Addu(out, out, base_or_current_method_reg);
}
+ break;
+ }
+ case HLoadClass::LoadKind::kBootImageAddress: {
+ DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK_NE(cls->GetAddress(), 0u);
+ uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+ __ LoadLiteral(out,
+ base_or_current_method_reg,
+ codegen_->DeduplicateBootImageAddressLiteral(address));
+ break;
+ }
+ case HLoadClass::LoadKind::kDexCacheAddress: {
+ DCHECK_NE(cls->GetAddress(), 0u);
+ uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+ static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
+ DCHECK_ALIGNED(cls->GetAddress(), 4u);
+ int16_t offset = Low16Bits(address);
+ uint32_t base_address = address - offset; // This accounts for offset sign extension.
+ __ Lui(out, High16Bits(base_address));
+ // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+ GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+ generate_null_check = !cls->IsInDexCache();
+ break;
+ }
+ case HLoadClass::LoadKind::kDexCachePcRelative: {
+ HMipsDexCacheArraysBase* base = cls->InputAt(0)->AsMipsDexCacheArraysBase();
+ int32_t offset =
+ cls->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+ // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
+ GenerateGcRootFieldLoad(cls, out_loc, base_or_current_method_reg, offset);
+ generate_null_check = !cls->IsInDexCache();
+ break;
+ }
+ case HLoadClass::LoadKind::kDexCacheViaMethod: {
+ // /* GcRoot<mirror::Class>[] */ out =
+ // current_method.ptr_sized_fields_->dex_cache_resolved_types_
+ __ LoadFromOffset(kLoadWord,
+ out,
+ base_or_current_method_reg,
+ ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+ GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+ generate_null_check = !cls->IsInDexCache();
+ }
+ }
+
+ if (generate_null_check || cls->MustGenerateClinitCheck()) {
+ DCHECK(cls->CanCallRuntime());
+ SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
+ cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ codegen_->AddSlowPath(slow_path);
+ if (generate_null_check) {
+ __ Beqz(out, slow_path->GetEntryLabel());
+ }
+ if (cls->MustGenerateClinitCheck()) {
+ GenerateClassInitializationCheck(slow_path, out);
+ } else {
+ __ Bind(slow_path->GetExitLabel());
}
}
}
static int32_t GetExceptionTlsOffset() {
- return Thread::ExceptionOffset<kMipsWordSize>().Int32Value();
+ return Thread::ExceptionOffset<kMipsPointerSize>().Int32Value();
}
void LocationsBuilderMIPS::VisitLoadException(HLoadException* load) {
@@ -4181,28 +4541,97 @@ void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear AT
}
void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
- LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+ LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
- locations->SetInAt(0, Location::RequiresRegister());
+ HLoadString::LoadKind load_kind = load->GetLoadKind();
+ switch (load_kind) {
+ // We need an extra register for PC-relative literals on R2.
+ case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+ case HLoadString::LoadKind::kBootImageAddress:
+ case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ if (codegen_->GetInstructionSetFeatures().IsR6()) {
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ // We need an extra register for PC-relative dex cache accesses.
+ case HLoadString::LoadKind::kDexCachePcRelative:
+ case HLoadString::LoadKind::kDexCacheViaMethod:
+ locations->SetInAt(0, Location::RequiresRegister());
+ break;
+ default:
+ break;
+ }
locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
+ HLoadString::LoadKind load_kind = load->GetLoadKind();
LocationSummary* locations = load->GetLocations();
- Register out = locations->Out().AsRegister<Register>();
- Register current_method = locations->InAt(0).AsRegister<Register>();
- __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
- __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
- __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
+ Register base_or_current_method_reg;
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ switch (load_kind) {
+ // We need an extra register for PC-relative literals on R2.
+ case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+ case HLoadString::LoadKind::kBootImageAddress:
+ case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+ break;
+ default:
+ base_or_current_method_reg = ZERO;
+ break;
+ }
- if (!load->IsInDexCache()) {
- SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
- codegen_->AddSlowPath(slow_path);
- __ Beqz(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ switch (load_kind) {
+ case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+ DCHECK(!kEmitCompilerReadBarrier);
+ __ LoadLiteral(out,
+ base_or_current_method_reg,
+ codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
+ load->GetStringIndex()));
+ return; // No dex cache slow path.
+ case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(!kEmitCompilerReadBarrier);
+ CodeGeneratorMIPS::PcRelativePatchInfo* info =
+ codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+ if (isR6) {
+ __ Bind(&info->high_label);
+ __ Bind(&info->pc_rel_label);
+ // Add a 32-bit offset to PC.
+ __ Auipc(out, /* placeholder */ 0x1234);
+ __ Addiu(out, out, /* placeholder */ 0x5678);
+ } else {
+ __ Bind(&info->high_label);
+ __ Lui(out, /* placeholder */ 0x1234);
+ // We do not bind info->pc_rel_label here, we'll use the assembler's label
+ // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
+ __ Ori(out, out, /* placeholder */ 0x5678);
+ // Add a 32-bit offset to PC.
+ __ Addu(out, out, base_or_current_method_reg);
+ }
+ return; // No dex cache slow path.
+ }
+ case HLoadString::LoadKind::kBootImageAddress: {
+ DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK_NE(load->GetAddress(), 0u);
+ uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+ __ LoadLiteral(out,
+ base_or_current_method_reg,
+ codegen_->DeduplicateBootImageAddressLiteral(address));
+ return; // No dex cache slow path.
+ }
+ default:
+ break;
}
+
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
+ codegen_->AddSlowPath(slow_path);
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) {
@@ -4216,7 +4645,7 @@ void InstructionCodeGeneratorMIPS::VisitLongConstant(HLongConstant* constant ATT
void LocationsBuilderMIPS::VisitMonitorOperation(HMonitorOperation* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
@@ -4395,7 +4824,7 @@ void InstructionCodeGeneratorMIPS::VisitNeg(HNeg* instruction) {
void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
@@ -4410,7 +4839,7 @@ void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) {
// Move an uint16_t value to a register.
__ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
codegen_->InvokeRuntime(
- GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
+ GetThreadOffset<kMipsPointerSize>(instruction->GetEntrypoint()).Int32Value(),
instruction,
instruction->GetDexPc(),
nullptr,
@@ -4421,7 +4850,7 @@ void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) {
void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
if (instruction->IsStringAlloc()) {
locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -4436,7 +4865,7 @@ void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
if (instruction->IsStringAlloc()) {
// String is allocated through StringFactory. Call NewEmptyString entry point.
Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+ MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
__ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
__ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value());
__ Jalr(T9);
@@ -4444,7 +4873,7 @@ void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
} else {
codegen_->InvokeRuntime(
- GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
+ GetThreadOffset<kMipsPointerSize>(instruction->GetEntrypoint()).Int32Value(),
instruction,
instruction->GetDexPc(),
nullptr,
@@ -4591,7 +5020,7 @@ void InstructionCodeGeneratorMIPS::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED)
void LocationsBuilderMIPS::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
LocationSummary::CallKind call_kind =
- (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCall;
+ (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCallOnMainOnly;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
switch (type) {
@@ -4828,7 +5257,7 @@ void InstructionCodeGeneratorMIPS::VisitSuspendCheck(HSuspendCheck* instruction)
void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
@@ -4857,7 +5286,7 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) {
if (!isR6 &&
((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
(result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) {
- call_kind = LocationSummary::kCall;
+ call_kind = LocationSummary::kCallOnMainOnly;
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -5325,6 +5754,7 @@ void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress(
__ Nal();
// Grab the return address off RA.
__ Move(reg, RA);
+ // TODO: Can we share this code with that of VisitMipsDexCacheArraysBase()?
// Remember this offset (the obtained PC value) for later use with constant area.
__ BindPcRelBaseLabel();
@@ -5355,6 +5785,7 @@ void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArra
__ Ori(reg, reg, /* placeholder */ 0x5678);
// Add a 32-bit offset to PC.
__ Addu(reg, reg, RA);
+ // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()?
}
}
@@ -5378,18 +5809,25 @@ void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet* instruction) {
void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- uint32_t method_offset = 0;
if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
- method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+ uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
instruction->GetIndex(), kMipsPointerSize).SizeValue();
+ __ LoadFromOffset(kLoadWord,
+ locations->Out().AsRegister<Register>(),
+ locations->InAt(0).AsRegister<Register>(),
+ method_offset);
} else {
- method_offset = mirror::Class::EmbeddedImTableEntryOffset(
- instruction->GetIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ instruction->GetIndex(), kMipsPointerSize));
+ __ LoadFromOffset(kLoadWord,
+ locations->Out().AsRegister<Register>(),
+ locations->InAt(0).AsRegister<Register>(),
+ mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
+ __ LoadFromOffset(kLoadWord,
+ locations->Out().AsRegister<Register>(),
+ locations->Out().AsRegister<Register>(),
+ method_offset);
}
- __ LoadFromOffset(kLoadWord,
- locations->Out().AsRegister<Register>(),
- locations->InAt(0).AsRegister<Register>(),
- method_offset);
}
#undef __
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 08f74c04d1..63a0345c1c 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -18,11 +18,12 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_
#include "code_generator.h"
-#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
+#include "string_reference.h"
#include "utils/mips/assembler_mips.h"
+#include "utils/type_reference.h"
namespace art {
namespace mips {
@@ -226,6 +227,15 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
void HandleShift(HBinaryOperation* operation);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers (if any).
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ Register obj,
+ uint32_t offset);
void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
void GenerateIntCompareAndBranch(IfCondition cond,
LocationSummary* locations,
@@ -298,6 +308,9 @@ class CodeGeneratorMIPS : public CodeGenerator {
size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+ void ClobberRA() {
+ clobbered_ra_ = true;
+ }
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -383,7 +396,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
const DexFile& target_dex_file;
- // Either the dex cache array element offset or the string index.
+ // Either the dex cache array element offset or the string/type index.
uint32_t offset_or_index;
// Label for the instruction loading the most significant half of the offset that's added to PC
// to form the base address (the least significant half is loaded with the instruction that
@@ -393,14 +406,27 @@ class CodeGeneratorMIPS : public CodeGenerator {
MipsLabel pc_rel_label;
};
+ PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+ PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index);
PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
uint32_t element_offset);
+ Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
+ Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index);
+ Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
private:
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+ using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
-
+ using BootStringToLiteralMap = ArenaSafeMap<StringReference,
+ Literal*,
+ StringReferenceValueComparator>;
+ using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
+ Literal*,
+ TypeReferenceValueComparator>;
+
+ Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
@@ -417,11 +443,27 @@ class CodeGeneratorMIPS : public CodeGenerator {
MipsAssembler assembler_;
const MipsInstructionSetFeatures& isa_features_;
+ // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+ Uint32ToLiteralMap uint32_literals_;
// Method patch info, map MethodReference to a literal for method address and method code.
MethodToLiteralMap method_patches_;
MethodToLiteralMap call_patches_;
// PC-relative patch info for each HMipsDexCacheArraysBase.
ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+ // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
+ BootStringToLiteralMap boot_image_string_patches_;
+ // PC-relative String patch info.
+ ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+ // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+ BootTypeToLiteralMap boot_image_type_patches_;
+ // PC-relative type patch info.
+ ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+ // Deduplication map for patchable boot image addresses.
+ Uint32ToLiteralMap boot_image_address_patches_;
+
+ // PC-relative loads on R2 clobber RA, which may need to be preserved explicitly in leaf methods.
+ // This is a flag set by pc_relative_fixups_mips and dex_cache_array_fixups_mips optimizations.
+ bool clobbered_ra_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS);
};
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 2e78884daf..4a5755c925 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -102,9 +102,9 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type)
return Mips64ReturnLocation(type);
}
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value()
class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
public:
@@ -300,13 +300,11 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
instruction_,
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ Bc(GetReturnLabel());
} else {
@@ -362,7 +360,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
dex_pc,
this);
CheckEntrypointTypes<
- kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+ kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
@@ -429,9 +427,9 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
}
#undef __
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value()
void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
// Ensure that we fix up branches.
@@ -888,7 +886,7 @@ void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object,
__ LoadFromOffset(kLoadDoubleword,
card,
TR,
- Thread::CardTableOffset<kMips64DoublewordSize>().Int32Value());
+ Thread::CardTableOffset<kMips64PointerSize>().Int32Value());
__ Dsrl(temp, object, gc::accounting::CardTable::kCardShift);
__ Daddu(temp, card, temp);
__ Sb(card, temp, 0);
@@ -964,7 +962,7 @@ void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kMips64DoublewordSize>(entrypoint).Int32Value(),
+ InvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value(),
instruction,
dex_pc,
slow_path);
@@ -1004,7 +1002,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc
__ LoadFromOffset(kLoadUnsignedHalfword,
TMP,
TR,
- Thread::ThreadFlagsOffset<kMips64DoublewordSize>().Int32Value());
+ Thread::ThreadFlagsOffset<kMips64PointerSize>().Int32Value());
if (successor == nullptr) {
__ Bnezc(TMP, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
@@ -1436,7 +1434,7 @@ void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) {
bool needs_runtime_call = instruction->NeedsTypeCheck();
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+ needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
if (needs_runtime_call) {
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2932,11 +2930,9 @@ void LocationsBuilderMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) {
void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
- uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
- invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value();
Location receiver = invoke->GetLocations()->InAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize);
// Set the hidden argument.
__ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<GpuRegister>(),
@@ -2950,6 +2946,10 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo
__ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
+ __ LoadFromOffset(kLoadDoubleword, temp, temp,
+ mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ invoke->GetImtIndex(), kMips64PointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
// T9 = temp->GetEntryPoint();
@@ -3113,7 +3113,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
T9,
callee_method.AsRegister<GpuRegister>(),
ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kMips64DoublewordSize).Int32Value());
+ kMips64PointerSize).Int32Value());
// T9()
__ Jalr(T9);
__ Nop();
@@ -3151,7 +3151,7 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t
size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize);
// temp = object->GetClass();
__ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
@@ -3229,7 +3229,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
}
static int32_t GetExceptionTlsOffset() {
- return Thread::ExceptionOffset<kMips64DoublewordSize>().Int32Value();
+ return Thread::ExceptionOffset<kMips64PointerSize>().Int32Value();
}
void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) {
@@ -3261,22 +3261,11 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
}
void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) {
- LocationSummary* locations = load->GetLocations();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
- GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
- __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
- ArtMethod::DeclaringClassOffset().Int32Value());
- __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
- __ LoadFromOffset(
- kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- // TODO: We will need a read barrier here.
-
- if (!load->IsInDexCache()) {
- SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
- codegen_->AddSlowPath(slow_path);
- __ Beqzc(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ Bc(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) {
@@ -3290,7 +3279,7 @@ void InstructionCodeGeneratorMIPS64::VisitLongConstant(HLongConstant* constant A
void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
@@ -3417,7 +3406,7 @@ void InstructionCodeGeneratorMIPS64::VisitNeg(HNeg* instruction) {
void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
@@ -3438,7 +3427,7 @@ void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) {
void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
if (instruction->IsStringAlloc()) {
locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -3454,7 +3443,7 @@ void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction)
// String is allocated through StringFactory. Call NewEmptyString entry point.
GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
MemberOffset code_offset =
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize);
__ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
__ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value());
__ Jalr(T9);
@@ -3598,7 +3587,8 @@ void InstructionCodeGeneratorMIPS64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED
void LocationsBuilderMIPS64::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
LocationSummary::CallKind call_kind =
- Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+ Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
switch (type) {
@@ -3811,7 +3801,7 @@ void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instructio
void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 4b462cc800..197f86b22b 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -18,7 +18,6 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_
#include "code_generator.h"
-#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1261619536..7aca16f867 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -47,9 +47,9 @@ static constexpr int kC2ConditionMask = 0x400;
static constexpr int kFakeReturnRegister = Register(8);
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86WordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
class NullCheckSlowPathX86 : public SlowPathCode {
public:
@@ -140,12 +140,29 @@ class BoundsCheckSlowPathX86 : public SlowPathCode {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
+
+ // Are we using an array length from memory?
+ HInstruction* array_length = instruction_->InputAt(1);
+ Location length_loc = locations->InAt(1);
InvokeRuntimeCallingConvention calling_convention;
+ if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
+ // Load the array length into our temporary.
+ uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+ Location array_loc = array_length->GetLocations()->InAt(0);
+ Address array_len(array_loc.AsRegister<Register>(), len_offset);
+ length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+ // Check for conflicts with index.
+ if (length_loc.Equals(locations->InAt(0))) {
+ // We know we aren't using parameter 2.
+ length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
+ }
+ __ movl(length_loc.AsRegister<Register>(), array_len);
+ }
x86_codegen->EmitParallelMoves(
locations->InAt(0),
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
Primitive::kPrimInt,
- locations->InAt(1),
+ length_loc,
Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
Primitive::kPrimInt);
uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
@@ -175,13 +192,11 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
instruction_,
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -332,7 +347,7 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<
- kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+ kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -430,8 +445,8 @@ class ArraySetSlowPathX86 : public SlowPathCode {
// Slow path marking an object during a read barrier.
class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
public:
- ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj)
- : SlowPathCode(instruction), out_(out), obj_(obj) {
+ ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj)
+ : SlowPathCode(instruction), obj_(obj) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -439,9 +454,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
- Register reg_out = out_.AsRegister<Register>();
+ Register reg = obj_.AsRegister<Register>();
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
@@ -449,30 +464,40 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, locations);
-
- InvokeRuntimeCallingConvention calling_convention;
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
- x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
- x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
- instruction_,
- instruction_->GetDexPc(),
- this);
- CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
- x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
-
- RestoreLiveRegisters(codegen, locations);
+ DCHECK_NE(reg, ESP);
+ DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in EAX):
+ //
+ // EAX <- obj
+ // EAX <- ReadBarrierMark(EAX)
+ // obj <- EAX
+ //
+ // we just use rX (the register holding `obj`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(reg);
+ // This runtime call does not require a stack map.
+ x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ jmp(GetExitLabel());
}
private:
- const Location out_;
const Location obj_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
@@ -518,8 +543,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
@@ -706,8 +730,8 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
};
#undef __
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<X86Assembler*>(GetAssembler())-> /* NOLINT */
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
inline Condition X86Condition(IfCondition cond) {
switch (cond) {
@@ -778,7 +802,7 @@ void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kX86WordSize>(entrypoint).Int32Value(),
+ InvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value(),
instruction,
dex_pc,
slow_path);
@@ -793,6 +817,13 @@ void CodeGeneratorX86::InvokeRuntime(int32_t entry_point_offset,
RecordPcInfo(instruction, dex_pc, slow_path);
}
+void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ __ fs()->call(Address::Absolute(entry_point_offset));
+}
+
CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
const X86InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
@@ -1548,15 +1579,15 @@ void LocationsBuilderX86::VisitSelect(HSelect* select) {
locations->SetOut(Location::SameAsFirstInput());
}
-void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
+void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
Register lhs_reg = lhs.AsRegister<Register>();
if (rhs.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- codegen_->Compare32BitValue(lhs_reg, value);
+ Compare32BitValue(lhs_reg, value);
} else if (rhs.IsStackSlot()) {
- __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
+ assembler_.cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
} else {
- __ cmpl(lhs_reg, rhs.AsRegister<Register>());
+ assembler_.cmpl(lhs_reg, rhs.AsRegister<Register>());
}
}
@@ -1589,7 +1620,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
LocationSummary* cond_locations = condition->GetLocations();
- GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
+ codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
cond = X86Condition(condition->GetCondition());
}
} else {
@@ -1698,7 +1729,7 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
// Clear output register: setb only sets the low byte.
__ xorl(reg, reg);
- GenerateIntCompare(lhs, rhs);
+ codegen_->GenerateIntCompare(lhs, rhs);
__ setb(X86Condition(cond->GetCondition()), reg);
return;
}
@@ -2027,8 +2058,6 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke)
LocationSummary* locations = invoke->GetLocations();
Register temp = locations->GetTemp(0).AsRegister<Register>();
XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
- uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
- invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
Location receiver = locations->InAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -2055,11 +2084,16 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke)
// intact/accessible until the end of the marking phase (the
// concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
+ // temp = temp->GetAddressOfIMT()
+ __ movl(temp,
+ Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
// temp = temp->GetImtEntryAt(method_offset);
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ invoke->GetImtIndex(), kX86PointerSize));
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
__ call(Address(temp,
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2182,7 +2216,7 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
LocationSummary::CallKind call_kind =
((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
&& result_type == Primitive::kPrimLong)
- ? LocationSummary::kCall
+ ? LocationSummary::kCallOnMainOnly
: LocationSummary::kNoCall;
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
@@ -3437,7 +3471,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
void LocationsBuilderX86::VisitDiv(HDiv* div) {
LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong)
- ? LocationSummary::kCall
+ ? LocationSummary::kCallOnMainOnly
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -3540,7 +3574,7 @@ void LocationsBuilderX86::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
- ? LocationSummary::kCall
+ ? LocationSummary::kCallOnMainOnly
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
@@ -3982,7 +4016,7 @@ void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
locations->SetOut(Location::RegisterLocation(EAX));
if (instruction->IsStringAlloc()) {
locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -3999,7 +4033,7 @@ void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
if (instruction->IsStringAlloc()) {
// String is allocated through StringFactory. Call NewEmptyString entry point.
Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize);
+ MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize);
__ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString)));
__ call(Address(temp, code_offset.Int32Value()));
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -4015,7 +4049,7 @@ void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
locations->SetOut(Location::RegisterLocation(EAX));
InvokeRuntimeCallingConvention calling_convention;
locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -4070,16 +4104,21 @@ void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- uint32_t method_offset = 0;
if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
- method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+ uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
instruction->GetIndex(), kX86PointerSize).SizeValue();
+ __ movl(locations->Out().AsRegister<Register>(),
+ Address(locations->InAt(0).AsRegister<Register>(), method_offset));
} else {
- method_offset = mirror::Class::EmbeddedImTableEntryOffset(
- instruction->GetIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ instruction->GetIndex(), kX86PointerSize));
+ __ movl(locations->Out().AsRegister<Register>(),
+ Address(locations->InAt(0).AsRegister<Register>(),
+ mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
+ // temp = temp->GetImtEntryAt(method_offset);
+ __ movl(locations->Out().AsRegister<Register>(),
+ Address(locations->Out().AsRegister<Register>(), method_offset));
}
- __ movl(locations->Out().AsRegister<Register>(),
- Address(locations->InAt(0).AsRegister<Register>(), method_offset));
}
void LocationsBuilderX86::VisitNot(HNot* not_) {
@@ -4172,7 +4211,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
case Primitive::kPrimShort:
case Primitive::kPrimChar:
case Primitive::kPrimInt: {
- GenerateIntCompare(left, right);
+ codegen_->GenerateIntCompare(left, right);
break;
}
case Primitive::kPrimLong: {
@@ -4411,7 +4450,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
// (callee_method + offset_of_quick_compiled_code)()
__ call(Address(callee_method.AsRegister<Register>(),
ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kX86WordSize).Int32Value()));
+ kX86PointerSize).Int32Value()));
break;
}
@@ -4445,7 +4484,7 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
__ call(Address(
- temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+ temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
}
void CodeGeneratorX86::RecordSimplePatch() {
@@ -4549,7 +4588,7 @@ void CodeGeneratorX86::MarkGCCard(Register temp,
__ testl(value, value);
__ j(kEqual, &is_null);
}
- __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value()));
+ __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
__ movl(temp, object);
__ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
__ movb(Address(temp, card, TIMES_1, 0),
@@ -5510,10 +5549,16 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ if (!instruction->IsEmittedAtUseSite()) {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ }
}
void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
+ if (instruction->IsEmittedAtUseSite()) {
+ return;
+ }
+
LocationSummary* locations = instruction->GetLocations();
uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
Register obj = locations->InAt(0).AsRegister<Register>();
@@ -5528,7 +5573,10 @@ void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
- locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ HInstruction* length = instruction->InputAt(1);
+ if (!length->IsEmittedAtUseSite()) {
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ }
if (instruction->HasUses()) {
locations->SetOut(Location::SameAsFirstInput());
}
@@ -5562,12 +5610,28 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
codegen_->AddSlowPath(slow_path);
__ j(kAboveEqual, slow_path->GetEntryLabel());
} else {
- Register length = length_loc.AsRegister<Register>();
- if (index_loc.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
- __ cmpl(length, Immediate(value));
+ HInstruction* array_length = instruction->InputAt(1);
+ if (array_length->IsEmittedAtUseSite()) {
+ // Address the length field in the array.
+ DCHECK(array_length->IsArrayLength());
+ uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+ Location array_loc = array_length->GetLocations()->InAt(0);
+ Address array_len(array_loc.AsRegister<Register>(), len_offset);
+ if (index_loc.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+ __ cmpl(array_len, Immediate(value));
+ } else {
+ __ cmpl(array_len, index_loc.AsRegister<Register>());
+ }
+ codegen_->MaybeRecordImplicitNullCheck(array_length);
} else {
- __ cmpl(length, index_loc.AsRegister<Register>());
+ Register length = length_loc.AsRegister<Register>();
+ if (index_loc.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+ __ cmpl(length, Immediate(value));
+ } else {
+ __ cmpl(length, index_loc.AsRegister<Register>());
+ }
}
codegen_->AddSlowPath(slow_path);
__ j(kBelowEqual, slow_path->GetEntryLabel());
@@ -5616,7 +5680,7 @@ void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instructio
DCHECK_EQ(slow_path->GetSuccessor(), successor);
}
- __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()),
+ __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
Immediate(0));
if (successor == nullptr) {
__ j(kNotEqual, slow_path->GetEntryLabel());
@@ -6167,52 +6231,19 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
codegen_->RecordSimplePatch();
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
- // /* GcRoot<mirror::String> */ out = *address
- GenerateGcRootFieldLoad(load, out_loc, Address::Absolute(address));
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- Register base_reg = locations->InAt(0).AsRegister<Register>();
- uint32_t offset = load->GetDexCacheElementOffset();
- Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
- // /* GcRoot<mirror::String> */ out = *(base + offset) /* PC-relative */
- GenerateGcRootFieldLoad(
- load, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- Register current_method = locations->InAt(0).AsRegister<Register>();
-
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
- break;
- }
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
- codegen_->AddSlowPath(slow_path);
- __ testl(out, out);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
+ codegen_->AddSlowPath(slow_path);
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
static Address GetExceptionTlsAddress() {
- return Address::Absolute(Thread::ExceptionOffset<kX86WordSize>().Int32Value());
+ return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
}
void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
@@ -6235,7 +6266,7 @@ void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATT
void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
@@ -6687,7 +6718,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
@@ -6926,10 +6957,10 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct
// Slow path used to mark the GC root `root`.
SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root, root);
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root);
codegen_->AddSlowPath(slow_path);
- __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86WordSize>().Int32Value()),
+ __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()),
Immediate(0));
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -7036,12 +7067,6 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// /* LockWord */ lock_word = LockWord(monitor)
static_assert(sizeof(LockWord) == sizeof(int32_t),
"art::LockWord and int32_t have different sizes.");
- // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
- __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
- __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
- static_assert(
- LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
- "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86 memory model.
@@ -7056,13 +7081,18 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Slow path used to mark the object `ref` when it is gray.
SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref, ref);
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref);
AddSlowPath(slow_path);
// if (rb_state == ReadBarrier::gray_ptr_)
// ref = ReadBarrier::Mark(ref);
- __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
- __ j(kEqual, slow_path->GetEntryLabel());
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 2a9fb80995..894f2e8f40 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -18,8 +18,8 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
#include "arch/x86/instruction_set_features_x86.h"
+#include "base/enums.h"
#include "code_generator.h"
-#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
@@ -29,7 +29,7 @@ namespace art {
namespace x86 {
// Use a local definition to prevent copying mistakes.
-static constexpr size_t kX86WordSize = kX86PointerSize;
+static constexpr size_t kX86WordSize = static_cast<size_t>(kX86PointerSize);
class CodeGeneratorX86;
@@ -295,7 +295,6 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
HBasicBlock* default_block);
void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
- void GenerateIntCompare(Location lhs, Location rhs);
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
@@ -336,6 +335,12 @@ class CodeGeneratorX86 : public CodeGenerator {
uint32_t dex_pc,
SlowPathCode* slow_path);
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
size_t GetWordSize() const OVERRIDE {
return kX86WordSize;
}
@@ -425,6 +430,8 @@ class CodeGeneratorX86 : public CodeGenerator {
Register value,
bool value_can_be_null);
+ void GenerateIntCompare(Location lhs, Location rhs);
+
void GenerateMemoryBarrier(MemBarrierKind kind);
Label* GetLabelOf(HBasicBlock* block) const {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5e30203b38..0c55ae44de 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -51,9 +51,9 @@ static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15
static constexpr int kC2ConditionMask = 0x400;
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
-#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
class NullCheckSlowPathX86_64 : public SlowPathCode {
public:
@@ -149,13 +149,11 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
instruction_,
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -194,14 +192,31 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
+ // Are we using an array length from memory?
+ HInstruction* array_length = instruction_->InputAt(1);
+ Location length_loc = locations->InAt(1);
+ InvokeRuntimeCallingConvention calling_convention;
+ if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
+ // Load the array length into our temporary.
+ uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+ Location array_loc = array_length->GetLocations()->InAt(0);
+ Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
+ length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+ // Check for conflicts with index.
+ if (length_loc.Equals(locations->InAt(0))) {
+ // We know we aren't using parameter 2.
+ length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
+ }
+ __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
+ }
+
// We're moving two locations to locations that could overlap, so we need a parallel
// move resolver.
- InvokeRuntimeCallingConvention calling_convention;
codegen->EmitParallelMoves(
locations->InAt(0),
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
Primitive::kPrimInt,
- locations->InAt(1),
+ length_loc,
Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
Primitive::kPrimInt);
uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
@@ -352,7 +367,7 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode {
dex_pc,
this);
CheckEntrypointTypes<
- kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
+ kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -451,8 +466,8 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
// Slow path marking an object during a read barrier.
class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
public:
- ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
- : SlowPathCode(instruction), out_(out), obj_(obj) {
+ ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj)
+ : SlowPathCode(instruction), obj_(obj) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -460,9 +475,9 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
- Register reg_out = out_.AsRegister<Register>();
+ Register reg = obj_.AsRegister<Register>();
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
@@ -470,30 +485,40 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, locations);
-
- InvokeRuntimeCallingConvention calling_convention;
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
- x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
- x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
- instruction_,
- instruction_->GetDexPc(),
- this);
- CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
- x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
-
- RestoreLiveRegisters(codegen, locations);
+ DCHECK_NE(reg, RSP);
+ DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // RDI <- obj
+ // RAX <- ReadBarrierMark(RDI)
+ // obj <- RAX
+ //
+ // we just use rX (the register holding `obj`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(reg);
+ // This runtime call does not require a stack map.
+ x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ jmp(GetExitLabel());
}
private:
- const Location out_;
const Location obj_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
@@ -539,8 +564,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
@@ -725,8 +749,8 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
};
#undef __
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
inline Condition X86_64IntegerCondition(IfCondition cond) {
switch (cond) {
@@ -858,7 +882,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
// (callee_method + offset_of_quick_compiled_code)()
__ call(Address(callee_method.AsRegister<CpuRegister>(),
ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kX86_64WordSize).SizeValue()));
+ kX86_64PointerSize).SizeValue()));
break;
}
@@ -893,7 +917,7 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t
__ movq(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
__ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kX86_64WordSize).SizeValue()));
+ kX86_64PointerSize).SizeValue()));
}
void CodeGeneratorX86_64::RecordSimplePatch() {
@@ -1006,7 +1030,7 @@ void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kX86_64WordSize>(entrypoint).Int32Value(),
+ InvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value(),
instruction,
dex_pc,
slow_path);
@@ -1021,6 +1045,13 @@ void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset,
RecordPcInfo(instruction, dex_pc, slow_path);
}
+void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
+}
+
static constexpr int kNumberOfCpuRegisterPairs = 0;
// Use a fake return address register to mimic Quick.
static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
@@ -2257,8 +2288,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo
LocationSummary* locations = invoke->GetLocations();
CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
- uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
- invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
Location receiver = locations->InAt(0);
size_t class_offset = mirror::Object::ClassOffset().SizeValue();
@@ -2284,11 +2313,17 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo
// intact/accessible until the end of the marking phase (the
// concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
+ // temp = temp->GetAddressOfIMT()
+ __ movq(temp,
+ Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
+ // temp = temp->GetImtEntryAt(method_offset);
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ invoke->GetImtIndex(), kX86_64PointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ movq(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
- __ call(Address(temp,
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue()));
+ __ call(Address(
+ temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -3909,7 +3944,7 @@ void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
if (instruction->IsStringAlloc()) {
locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
@@ -3926,7 +3961,7 @@ void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction)
if (instruction->IsStringAlloc()) {
// String is allocated through StringFactory. Call NewEmptyString entry point.
CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize);
+ MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize);
__ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
__ call(Address(temp, code_offset.SizeValue()));
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -3942,7 +3977,7 @@ void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction)
void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetOut(Location::RegisterLocation(RAX));
@@ -4002,16 +4037,20 @@ void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- uint32_t method_offset = 0;
if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
- method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+ uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
instruction->GetIndex(), kX86_64PointerSize).SizeValue();
+ __ movq(locations->Out().AsRegister<CpuRegister>(),
+ Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
} else {
- method_offset = mirror::Class::EmbeddedImTableEntryOffset(
- instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ instruction->GetIndex(), kX86_64PointerSize));
+ __ movq(locations->Out().AsRegister<CpuRegister>(),
+ Address(locations->InAt(0).AsRegister<CpuRegister>(),
+ mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
+ __ movq(locations->Out().AsRegister<CpuRegister>(),
+ Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
}
- __ movq(locations->Out().AsRegister<CpuRegister>(),
- Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
}
void LocationsBuilderX86_64::VisitNot(HNot* not_) {
@@ -4980,10 +5019,16 @@ void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ if (!instruction->IsEmittedAtUseSite()) {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ }
}
void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
+ if (instruction->IsEmittedAtUseSite()) {
+ return;
+ }
+
LocationSummary* locations = instruction->GetLocations();
uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
@@ -4998,7 +5043,10 @@ void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
- locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ HInstruction* length = instruction->InputAt(1);
+ if (!length->IsEmittedAtUseSite()) {
+ locations->SetInAt(1, Location::RegisterOrConstant(length));
+ }
if (instruction->HasUses()) {
locations->SetOut(Location::SameAsFirstInput());
}
@@ -5008,8 +5056,7 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction)
LocationSummary* locations = instruction->GetLocations();
Location index_loc = locations->InAt(0);
Location length_loc = locations->InAt(1);
- SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
if (length_loc.IsConstant()) {
int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
@@ -5032,12 +5079,28 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction)
codegen_->AddSlowPath(slow_path);
__ j(kAboveEqual, slow_path->GetEntryLabel());
} else {
- CpuRegister length = length_loc.AsRegister<CpuRegister>();
- if (index_loc.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
- __ cmpl(length, Immediate(value));
+ HInstruction* array_length = instruction->InputAt(1);
+ if (array_length->IsEmittedAtUseSite()) {
+ // Address the length field in the array.
+ DCHECK(array_length->IsArrayLength());
+ uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+ Location array_loc = array_length->GetLocations()->InAt(0);
+ Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
+ if (index_loc.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+ __ cmpl(array_len, Immediate(value));
+ } else {
+ __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
+ }
+ codegen_->MaybeRecordImplicitNullCheck(array_length);
} else {
- __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+ CpuRegister length = length_loc.AsRegister<CpuRegister>();
+ if (index_loc.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+ __ cmpl(length, Immediate(value));
+ } else {
+ __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+ }
}
codegen_->AddSlowPath(slow_path);
__ j(kBelowEqual, slow_path->GetEntryLabel());
@@ -5054,7 +5117,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
__ testl(value, value);
__ j(kEqual, &is_null);
}
- __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
+ __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
/* no_rip */ true));
__ movq(temp, object);
__ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
@@ -5106,7 +5169,7 @@ void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruc
DCHECK_EQ(slow_path->GetSuccessor(), successor);
}
- __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
+ __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
/* no_rip */ true),
Immediate(0));
if (successor == nullptr) {
@@ -5573,57 +5636,19 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
codegen_->RecordSimplePatch();
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- // /* GcRoot<mirror::String> */ out = *address
- if (IsUint<32>(load->GetAddress())) {
- Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
- GenerateGcRootFieldLoad(load, out_loc, address);
- } else {
- // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
- __ movq(out, Immediate(load->GetAddress()));
- GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
- }
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- uint32_t offset = load->GetDexCacheElementOffset();
- Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
- Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
- /* no_rip */ false);
- // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
-
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
- break;
- }
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
- codegen_->AddSlowPath(slow_path);
- __ testl(out, out);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
static Address GetExceptionTlsAddress() {
- return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
+ return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
/* no_rip */ true);
}
@@ -5647,7 +5672,7 @@ void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear
void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
@@ -6157,7 +6182,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
@@ -6378,10 +6403,10 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr
// Slow path used to mark the GC root `root`.
SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root);
codegen_->AddSlowPath(slow_path);
- __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
+ __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(),
/* no_rip */ true),
Immediate(0));
__ j(kNotEqual, slow_path->GetEntryLabel());
@@ -6489,12 +6514,6 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// /* LockWord */ lock_word = LockWord(monitor)
static_assert(sizeof(LockWord) == sizeof(int32_t),
"art::LockWord and int32_t have different sizes.");
- // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
- __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
- __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
- static_assert(
- LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
- "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86-64 memory model.
@@ -6509,13 +6528,18 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// Slow path used to mark the object `ref` when it is gray.
SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref);
AddSlowPath(slow_path);
// if (rb_state == ReadBarrier::gray_ptr_)
// ref = ReadBarrier::Mark(ref);
- __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
- __ j(kEqual, slow_path->GetEntryLabel());
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d7cfd37c33..4e0e34ce38 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -19,7 +19,6 @@
#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "code_generator.h"
-#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
@@ -29,7 +28,7 @@ namespace art {
namespace x86_64 {
// Use a local definition to prevent copying mistakes.
-static constexpr size_t kX86_64WordSize = kX86_64PointerSize;
+static constexpr size_t kX86_64WordSize = static_cast<size_t>(kX86_64PointerSize);
// Some x86_64 instructions require a register to be available as temp.
static constexpr Register TMP = R11;
@@ -318,6 +317,12 @@ class CodeGeneratorX86_64 : public CodeGenerator {
uint32_t dex_pc,
SlowPathCode* slow_path);
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
size_t GetWordSize() const OVERRIDE {
return kX86_64WordSize;
}
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 6be79fa75c..fe6c0a305e 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -29,12 +29,6 @@
#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "base/macros.h"
#include "builder.h"
-#include "code_generator_arm.h"
-#include "code_generator_arm64.h"
-#include "code_generator_mips.h"
-#include "code_generator_mips64.h"
-#include "code_generator_x86.h"
-#include "code_generator_x86_64.h"
#include "code_simulator_container.h"
#include "common_compiler_test.h"
#include "dex_file.h"
@@ -44,7 +38,7 @@
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "prepare_for_register_allocation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
#include "utils.h"
#include "utils/arm/managed_register_arm.h"
@@ -52,10 +46,35 @@
#include "utils/mips64/managed_register_mips64.h"
#include "utils/x86/managed_register_x86.h"
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "code_generator_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "code_generator_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "code_generator_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "code_generator_x86_64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "code_generator_mips.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "code_generator_mips64.h"
+#endif
+
#include "gtest/gtest.h"
namespace art {
+#ifdef ART_ENABLE_CODEGEN_arm
// Provide our own codegen, that ensures the C calling conventions
// are preserved. Currently, ART and C do not match as R4 is caller-save
// in ART, and callee-save in C. Alternatively, we could use or write
@@ -80,7 +99,9 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
blocked_register_pairs_[arm::R6_R7] = false;
}
};
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
public:
TestCodeGeneratorX86(HGraph* graph,
@@ -105,6 +126,7 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
blocked_register_pairs_[x86::ECX_EDI] = false;
}
};
+#endif
class InternalCodeAllocator : public CodeAllocator {
public:
@@ -219,7 +241,7 @@ static void RunCode(CodeGenerator* codegen,
PrepareForRegisterAllocation(graph).Run();
liveness.Analyze();
- RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+ RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
hook_before_codegen(graph);
InternalCodeAllocator allocator;
@@ -234,37 +256,54 @@ static void RunCode(InstructionSet target_isa,
bool has_result,
Expected expected) {
CompilerOptions compiler_options;
+#ifdef ART_ENABLE_CODEGEN_arm
if (target_isa == kArm || target_isa == kThumb2) {
std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
ArmInstructionSetFeatures::FromCppDefines());
TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kArm64) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+ if (target_isa == kArm64) {
std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
Arm64InstructionSetFeatures::FromCppDefines());
arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kX86) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+ if (target_isa == kX86) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
+ TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kX86_64) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+ if (target_isa == kX86_64) {
std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
X86_64InstructionSetFeatures::FromCppDefines());
x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kMips) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+ if (target_isa == kMips) {
std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
MipsInstructionSetFeatures::FromCppDefines());
mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kMips64) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+ if (target_isa == kMips64) {
std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
Mips64InstructionSetFeatures::FromCppDefines());
mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
}
+#endif
}
static ::std::vector<InstructionSet> GetTargetISAs() {
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index a849448cf9..cc949c5275 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -21,8 +21,14 @@
#include "locations.h"
#include "nodes.h"
#include "utils/arm64/assembler_arm64.h"
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/simulator-aarch64.h"
+#pragma GCC diagnostic pop
namespace art {
namespace arm64 {
@@ -34,87 +40,88 @@ static_assert((SP == 31) && (WSP == 31) && (XZR == 32) && (WZR == 32),
static inline int VIXLRegCodeFromART(int code) {
if (code == SP) {
- return vixl::kSPRegInternalCode;
+ return vixl::aarch64::kSPRegInternalCode;
}
if (code == XZR) {
- return vixl::kZeroRegCode;
+ return vixl::aarch64::kZeroRegCode;
}
return code;
}
static inline int ARTRegCodeFromVIXL(int code) {
- if (code == vixl::kSPRegInternalCode) {
+ if (code == vixl::aarch64::kSPRegInternalCode) {
return SP;
}
- if (code == vixl::kZeroRegCode) {
+ if (code == vixl::aarch64::kZeroRegCode) {
return XZR;
}
return code;
}
-static inline vixl::Register XRegisterFrom(Location location) {
+static inline vixl::aarch64::Register XRegisterFrom(Location location) {
DCHECK(location.IsRegister()) << location;
- return vixl::Register::XRegFromCode(VIXLRegCodeFromART(location.reg()));
+ return vixl::aarch64::Register::GetXRegFromCode(VIXLRegCodeFromART(location.reg()));
}
-static inline vixl::Register WRegisterFrom(Location location) {
+static inline vixl::aarch64::Register WRegisterFrom(Location location) {
DCHECK(location.IsRegister()) << location;
- return vixl::Register::WRegFromCode(VIXLRegCodeFromART(location.reg()));
+ return vixl::aarch64::Register::GetWRegFromCode(VIXLRegCodeFromART(location.reg()));
}
-static inline vixl::Register RegisterFrom(Location location, Primitive::Type type) {
+static inline vixl::aarch64::Register RegisterFrom(Location location, Primitive::Type type) {
DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type;
return type == Primitive::kPrimLong ? XRegisterFrom(location) : WRegisterFrom(location);
}
-static inline vixl::Register OutputRegister(HInstruction* instr) {
+static inline vixl::aarch64::Register OutputRegister(HInstruction* instr) {
return RegisterFrom(instr->GetLocations()->Out(), instr->GetType());
}
-static inline vixl::Register InputRegisterAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::Register InputRegisterAt(HInstruction* instr, int input_index) {
return RegisterFrom(instr->GetLocations()->InAt(input_index),
instr->InputAt(input_index)->GetType());
}
-static inline vixl::FPRegister DRegisterFrom(Location location) {
+static inline vixl::aarch64::FPRegister DRegisterFrom(Location location) {
DCHECK(location.IsFpuRegister()) << location;
- return vixl::FPRegister::DRegFromCode(location.reg());
+ return vixl::aarch64::FPRegister::GetDRegFromCode(location.reg());
}
-static inline vixl::FPRegister SRegisterFrom(Location location) {
+static inline vixl::aarch64::FPRegister SRegisterFrom(Location location) {
DCHECK(location.IsFpuRegister()) << location;
- return vixl::FPRegister::SRegFromCode(location.reg());
+ return vixl::aarch64::FPRegister::GetSRegFromCode(location.reg());
}
-static inline vixl::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
+static inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
DCHECK(Primitive::IsFloatingPointType(type)) << type;
return type == Primitive::kPrimDouble ? DRegisterFrom(location) : SRegisterFrom(location);
}
-static inline vixl::FPRegister OutputFPRegister(HInstruction* instr) {
+static inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) {
return FPRegisterFrom(instr->GetLocations()->Out(), instr->GetType());
}
-static inline vixl::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
return FPRegisterFrom(instr->GetLocations()->InAt(input_index),
instr->InputAt(input_index)->GetType());
}
-static inline vixl::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
- return Primitive::IsFloatingPointType(type) ? vixl::CPURegister(FPRegisterFrom(location, type))
- : vixl::CPURegister(RegisterFrom(location, type));
+static inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
+ return Primitive::IsFloatingPointType(type)
+ ? vixl::aarch64::CPURegister(FPRegisterFrom(location, type))
+ : vixl::aarch64::CPURegister(RegisterFrom(location, type));
}
-static inline vixl::CPURegister OutputCPURegister(HInstruction* instr) {
+static inline vixl::aarch64::CPURegister OutputCPURegister(HInstruction* instr) {
return Primitive::IsFloatingPointType(instr->GetType())
- ? static_cast<vixl::CPURegister>(OutputFPRegister(instr))
- : static_cast<vixl::CPURegister>(OutputRegister(instr));
+ ? static_cast<vixl::aarch64::CPURegister>(OutputFPRegister(instr))
+ : static_cast<vixl::aarch64::CPURegister>(OutputRegister(instr));
}
-static inline vixl::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
+static inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
return Primitive::IsFloatingPointType(instr->InputAt(index)->GetType())
- ? static_cast<vixl::CPURegister>(InputFPRegisterAt(instr, index))
- : static_cast<vixl::CPURegister>(InputRegisterAt(instr, index));
+ ? static_cast<vixl::aarch64::CPURegister>(InputFPRegisterAt(instr, index))
+ : static_cast<vixl::aarch64::CPURegister>(InputRegisterAt(instr, index));
}
static inline int64_t Int64ConstantFrom(Location location) {
@@ -129,63 +136,70 @@ static inline int64_t Int64ConstantFrom(Location location) {
}
}
-static inline vixl::Operand OperandFrom(Location location, Primitive::Type type) {
+static inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::Type type) {
if (location.IsRegister()) {
- return vixl::Operand(RegisterFrom(location, type));
+ return vixl::aarch64::Operand(RegisterFrom(location, type));
} else {
- return vixl::Operand(Int64ConstantFrom(location));
+ return vixl::aarch64::Operand(Int64ConstantFrom(location));
}
}
-static inline vixl::Operand InputOperandAt(HInstruction* instr, int input_index) {
+static inline vixl::aarch64::Operand InputOperandAt(HInstruction* instr, int input_index) {
return OperandFrom(instr->GetLocations()->InAt(input_index),
instr->InputAt(input_index)->GetType());
}
-static inline vixl::MemOperand StackOperandFrom(Location location) {
- return vixl::MemOperand(vixl::sp, location.GetStackIndex());
+static inline vixl::aarch64::MemOperand StackOperandFrom(Location location) {
+ return vixl::aarch64::MemOperand(vixl::aarch64::sp, location.GetStackIndex());
}
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base, size_t offset = 0) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+ size_t offset = 0) {
// A heap reference must be 32bit, so fit in a W register.
DCHECK(base.IsW());
- return vixl::MemOperand(base.X(), offset);
+ return vixl::aarch64::MemOperand(base.X(), offset);
}
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base,
- const vixl::Register& regoffset,
- vixl::Shift shift = vixl::LSL,
- unsigned shift_amount = 0) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+ const vixl::aarch64::Register& regoffset,
+ vixl::aarch64::Shift shift = vixl::aarch64::LSL,
+ unsigned shift_amount = 0) {
// A heap reference must be 32bit, so fit in a W register.
DCHECK(base.IsW());
- return vixl::MemOperand(base.X(), regoffset, shift, shift_amount);
+ return vixl::aarch64::MemOperand(base.X(), regoffset, shift, shift_amount);
}
-static inline vixl::MemOperand HeapOperand(const vixl::Register& base, Offset offset) {
+static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+ Offset offset) {
return HeapOperand(base, offset.SizeValue());
}
-static inline vixl::MemOperand HeapOperandFrom(Location location, Offset offset) {
+static inline vixl::aarch64::MemOperand HeapOperandFrom(Location location, Offset offset) {
return HeapOperand(RegisterFrom(location, Primitive::kPrimNot), offset);
}
-static inline Location LocationFrom(const vixl::Register& reg) {
- return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.code()));
+static inline Location LocationFrom(const vixl::aarch64::Register& reg) {
+ return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.GetCode()));
}
-static inline Location LocationFrom(const vixl::FPRegister& fpreg) {
- return Location::FpuRegisterLocation(fpreg.code());
+static inline Location LocationFrom(const vixl::aarch64::FPRegister& fpreg) {
+ return Location::FpuRegisterLocation(fpreg.GetCode());
}
-static inline vixl::Operand OperandFromMemOperand(const vixl::MemOperand& mem_op) {
+static inline vixl::aarch64::Operand OperandFromMemOperand(
+ const vixl::aarch64::MemOperand& mem_op) {
if (mem_op.IsImmediateOffset()) {
- return vixl::Operand(mem_op.offset());
+ return vixl::aarch64::Operand(mem_op.GetOffset());
} else {
DCHECK(mem_op.IsRegisterOffset());
- if (mem_op.extend() != vixl::NO_EXTEND) {
- return vixl::Operand(mem_op.regoffset(), mem_op.extend(), mem_op.shift_amount());
- } else if (mem_op.shift() != vixl::NO_SHIFT) {
- return vixl::Operand(mem_op.regoffset(), mem_op.shift(), mem_op.shift_amount());
+ if (mem_op.GetExtend() != vixl::aarch64::NO_EXTEND) {
+ return vixl::aarch64::Operand(mem_op.GetRegisterOffset(),
+ mem_op.GetExtend(),
+ mem_op.GetShiftAmount());
+ } else if (mem_op.GetShift() != vixl::aarch64::NO_SHIFT) {
+ return vixl::aarch64::Operand(mem_op.GetRegisterOffset(),
+ mem_op.GetShift(),
+ mem_op.GetShiftAmount());
} else {
LOG(FATAL) << "Should not reach here";
UNREACHABLE();
@@ -212,13 +226,13 @@ static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst
if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
// Uses logical operations.
- return vixl::Assembler::IsImmLogical(value, vixl::kXRegSize);
+ return vixl::aarch64::Assembler::IsImmLogical(value, vixl::aarch64::kXRegSize);
} else if (instr->IsNeg()) {
// Uses mov -immediate.
- return vixl::Assembler::IsImmMovn(value, vixl::kXRegSize);
+ return vixl::aarch64::Assembler::IsImmMovn(value, vixl::aarch64::kXRegSize);
} else {
DCHECK(instr->IsAdd() ||
- instr->IsArm64IntermediateAddress() ||
+ instr->IsIntermediateAddress() ||
instr->IsBoundsCheck() ||
instr->IsCompare() ||
instr->IsCondition() ||
@@ -227,7 +241,8 @@ static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst
// Uses aliases of ADD/SUB instructions.
// If `value` does not fit but `-value` does, VIXL will automatically use
// the 'opposite' instruction.
- return vixl::Assembler::IsImmAddSub(value) || vixl::Assembler::IsImmAddSub(-value);
+ return vixl::aarch64::Assembler::IsImmAddSub(value)
+ || vixl::aarch64::Assembler::IsImmAddSub(-value);
}
}
@@ -263,30 +278,30 @@ static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers,
return true;
}
-static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+static inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
switch (op_kind) {
- case HArm64DataProcWithShifterOp::kASR: return vixl::ASR;
- case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL;
- case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR;
+ case HArm64DataProcWithShifterOp::kASR: return vixl::aarch64::ASR;
+ case HArm64DataProcWithShifterOp::kLSL: return vixl::aarch64::LSL;
+ case HArm64DataProcWithShifterOp::kLSR: return vixl::aarch64::LSR;
default:
LOG(FATAL) << "Unexpected op kind " << op_kind;
UNREACHABLE();
- return vixl::NO_SHIFT;
+ return vixl::aarch64::NO_SHIFT;
}
}
-static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+static inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
switch (op_kind) {
- case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB;
- case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH;
- case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW;
- case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB;
- case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH;
- case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW;
+ case HArm64DataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB;
+ case HArm64DataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH;
+ case HArm64DataProcWithShifterOp::kUXTW: return vixl::aarch64::UXTW;
+ case HArm64DataProcWithShifterOp::kSXTB: return vixl::aarch64::SXTB;
+ case HArm64DataProcWithShifterOp::kSXTH: return vixl::aarch64::SXTH;
+ case HArm64DataProcWithShifterOp::kSXTW: return vixl::aarch64::SXTW;
default:
LOG(FATAL) << "Unexpected op kind " << op_kind;
UNREACHABLE();
- return vixl::NO_EXTEND;
+ return vixl::aarch64::NO_EXTEND;
}
}
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 49cfff46d8..e1bde7c737 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -88,13 +88,207 @@ void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
}
}
-void HDeadCodeElimination::RemoveDeadBlocks() {
- if (graph_->HasIrreducibleLoops()) {
- // Do not eliminate dead blocks if the graph has irreducible loops. We could
- // support it, but that would require changes in our loop representation to handle
- // multiple entry points. We decided it was not worth the complexity.
- return;
+void HDeadCodeElimination::MaybeRecordSimplifyIf() {
+ if (stats_ != nullptr) {
+ stats_->RecordStat(MethodCompilationStat::kSimplifyIf);
+ }
+}
+
+static bool HasInput(HCondition* instruction, HInstruction* input) {
+ return (instruction->InputAt(0) == input) ||
+ (instruction->InputAt(1) == input);
+}
+
+static bool HasEquality(IfCondition condition) {
+ switch (condition) {
+ case kCondEQ:
+ case kCondLE:
+ case kCondGE:
+ case kCondBE:
+ case kCondAE:
+ return true;
+ case kCondNE:
+ case kCondLT:
+ case kCondGT:
+ case kCondB:
+ case kCondA:
+ return false;
+ }
+}
+
+static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstruction* right) {
+ if (left == right && !Primitive::IsFloatingPointType(left->GetType())) {
+ return condition->GetBlock()->GetGraph()->GetIntConstant(
+ HasEquality(condition->GetCondition()) ? 1 : 0);
+ }
+
+ if (!left->IsConstant() || !right->IsConstant()) {
+ return nullptr;
+ }
+
+ if (left->IsIntConstant()) {
+ return condition->Evaluate(left->AsIntConstant(), right->AsIntConstant());
+ } else if (left->IsNullConstant()) {
+ return condition->Evaluate(left->AsNullConstant(), right->AsNullConstant());
+ } else if (left->IsLongConstant()) {
+ return condition->Evaluate(left->AsLongConstant(), right->AsLongConstant());
+ } else if (left->IsFloatConstant()) {
+ return condition->Evaluate(left->AsFloatConstant(), right->AsFloatConstant());
+ } else {
+ DCHECK(left->IsDoubleConstant());
+ return condition->Evaluate(left->AsDoubleConstant(), right->AsDoubleConstant());
+ }
+}
+
+// Simplify the pattern:
+//
+// B1 B2 ...
+// goto goto goto
+// \ | /
+// \ | /
+// B3
+// i1 = phi(input, input)
+// (i2 = condition on i1)
+// if i1 (or i2)
+// / \
+// / \
+// B4 B5
+//
+// Into:
+//
+// B1 B2 ...
+// | | |
+// B4 B5 B?
+//
+// This simplification cannot be applied for loop headers, as they
+// contain a suspend check.
+//
+// Note that we rely on the dead code elimination to get rid of B3.
+bool HDeadCodeElimination::SimplifyIfs() {
+ bool simplified_one_or_more_ifs = false;
+ bool rerun_dominance_and_loop_analysis = false;
+
+ for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ HInstruction* last = block->GetLastInstruction();
+ HInstruction* first = block->GetFirstInstruction();
+ if (last->IsIf() &&
+ block->HasSinglePhi() &&
+ block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) {
+ bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi());
+ bool has_only_phi_condition_and_if =
+ !has_only_phi_and_if &&
+ first->IsCondition() &&
+ HasInput(first->AsCondition(), block->GetFirstPhi()) &&
+ (first->GetNext() == last) &&
+ (last->InputAt(0) == first) &&
+ first->HasOnlyOneNonEnvironmentUse();
+
+ if (has_only_phi_and_if || has_only_phi_condition_and_if) {
+ DCHECK(!block->IsLoopHeader());
+ HPhi* phi = block->GetFirstPhi()->AsPhi();
+ bool phi_input_is_left = (first->InputAt(0) == phi);
+
+ // Walk over all inputs of the phis and update the control flow of
+ // predecessors feeding constants to the phi.
+ // Note that phi->InputCount() may change inside the loop.
+ for (size_t i = 0; i < phi->InputCount();) {
+ HInstruction* input = phi->InputAt(i);
+ HInstruction* value_to_check = nullptr;
+ if (has_only_phi_and_if) {
+ if (input->IsIntConstant()) {
+ value_to_check = input;
+ }
+ } else {
+ DCHECK(has_only_phi_condition_and_if);
+ if (phi_input_is_left) {
+ value_to_check = Evaluate(first->AsCondition(), input, first->InputAt(1));
+ } else {
+ value_to_check = Evaluate(first->AsCondition(), first->InputAt(0), input);
+ }
+ }
+ if (value_to_check == nullptr) {
+ // Could not evaluate to a constant, continue iterating over the inputs.
+ ++i;
+ } else {
+ HBasicBlock* predecessor_to_update = block->GetPredecessors()[i];
+ HBasicBlock* successor_to_update = nullptr;
+ if (value_to_check->AsIntConstant()->IsTrue()) {
+ successor_to_update = last->AsIf()->IfTrueSuccessor();
+ } else {
+ DCHECK(value_to_check->AsIntConstant()->IsFalse())
+ << value_to_check->AsIntConstant()->GetValue();
+ successor_to_update = last->AsIf()->IfFalseSuccessor();
+ }
+ predecessor_to_update->ReplaceSuccessor(block, successor_to_update);
+ phi->RemoveInputAt(i);
+ simplified_one_or_more_ifs = true;
+ if (block->IsInLoop()) {
+ rerun_dominance_and_loop_analysis = true;
+ }
+ // For simplicity, don't create a dead block, let the dead code elimination
+ // pass deal with it.
+ if (phi->InputCount() == 1) {
+ break;
+ }
+ }
+ }
+ if (block->GetPredecessors().size() == 1) {
+ phi->ReplaceWith(phi->InputAt(0));
+ block->RemovePhi(phi);
+ if (has_only_phi_condition_and_if) {
+ // Evaluate here (and not wait for a constant folding pass) to open
+ // more opportunities for DCE.
+ HInstruction* result = first->AsCondition()->TryStaticEvaluation();
+ if (result != nullptr) {
+ first->ReplaceWith(result);
+ block->RemoveInstruction(first);
+ }
+ }
+ }
+ if (simplified_one_or_more_ifs) {
+ MaybeRecordSimplifyIf();
+ }
+ }
+ }
+ }
+ // We need to re-analyze the graph in order to run DCE afterwards.
+ if (simplified_one_or_more_ifs) {
+ if (rerun_dominance_and_loop_analysis) {
+ graph_->ClearLoopInformation();
+ graph_->ClearDominanceInformation();
+ graph_->BuildDominatorTree();
+ } else {
+ graph_->ClearDominanceInformation();
+ // We have introduced critical edges, remove them.
+ graph_->SimplifyCFG();
+ graph_->ComputeDominanceInformation();
+ graph_->ComputeTryBlockInformation();
+ }
+ }
+
+ return simplified_one_or_more_ifs;
+}
+
+void HDeadCodeElimination::ConnectSuccessiveBlocks() {
+ // Order does not matter.
+ for (HReversePostOrderIterator it(*graph_); !it.Done();) {
+ HBasicBlock* block = it.Current();
+ if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
+ it.Advance();
+ continue;
+ }
+ HBasicBlock* successor = block->GetSingleSuccessor();
+ if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
+ it.Advance();
+ continue;
+ }
+ block->MergeWith(successor);
+ // Reiterate on this block in case it can be merged with its new successor.
}
+}
+
+bool HDeadCodeElimination::RemoveDeadBlocks() {
// Classify blocks as reachable/unreachable.
ArenaAllocator* allocator = graph_->GetArena();
ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false, kArenaAllocDCE);
@@ -132,23 +326,7 @@ void HDeadCodeElimination::RemoveDeadBlocks() {
graph_->ComputeTryBlockInformation();
}
}
-
- // Connect successive blocks created by dead branches. Order does not matter.
- for (HReversePostOrderIterator it(*graph_); !it.Done();) {
- HBasicBlock* block = it.Current();
- if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
- it.Advance();
- continue;
- }
- HBasicBlock* successor = block->GetSingleSuccessor();
- if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
- it.Advance();
- continue;
- }
- block->MergeWith(successor);
-
- // Reiterate on this block in case it can be merged with its new successor.
- }
+ return removed_one_or_more_blocks;
}
void HDeadCodeElimination::RemoveDeadInstructions() {
@@ -181,7 +359,20 @@ void HDeadCodeElimination::RemoveDeadInstructions() {
}
void HDeadCodeElimination::Run() {
- RemoveDeadBlocks();
+ // Do not eliminate dead blocks if the graph has irreducible loops. We could
+ // support it, but that would require changes in our loop representation to handle
+ // multiple entry points. We decided it was not worth the complexity.
+ if (!graph_->HasIrreducibleLoops()) {
+ // Simplify graph to generate more dead block patterns.
+ ConnectSuccessiveBlocks();
+ bool did_any_simplification = false;
+ did_any_simplification |= SimplifyIfs();
+ did_any_simplification |= RemoveDeadBlocks();
+ if (did_any_simplification) {
+ // Connect successive blocks created by dead branches.
+ ConnectSuccessiveBlocks();
+ }
+ }
SsaRedundantPhiElimination(graph_).Run();
RemoveDeadInstructions();
}
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 8d6008b845..58e700deba 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -31,18 +31,19 @@ class HDeadCodeElimination : public HOptimization {
public:
HDeadCodeElimination(HGraph* graph,
OptimizingCompilerStats* stats = nullptr,
- const char* name = kInitialDeadCodeEliminationPassName)
+ const char* name = kDeadCodeEliminationPassName)
: HOptimization(graph, name, stats) {}
void Run() OVERRIDE;
-
- static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination";
- static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final";
+ static constexpr const char* kDeadCodeEliminationPassName = "dead_code_elimination";
private:
void MaybeRecordDeadBlock(HBasicBlock* block);
- void RemoveDeadBlocks();
+ void MaybeRecordSimplifyIf();
+ bool RemoveDeadBlocks();
void RemoveDeadInstructions();
+ bool SimplifyIfs();
+ void ConnectSuccessiveBlocks();
DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination);
};
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
index 015f910328..9142e29eff 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.h
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -26,7 +26,9 @@ namespace arm {
class DexCacheArrayFixups : public HOptimization {
public:
DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
- : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {}
+ : HOptimization(graph, kDexCacheArrayFixupsArmPassName, stats) {}
+
+ static constexpr const char* kDexCacheArrayFixupsArmPassName = "dex_cache_array_fixups_arm";
void Run() OVERRIDE;
};
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc
index 0f42d9ce0f..19bab08eb4 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "code_generator_mips.h"
#include "dex_cache_array_fixups_mips.h"
#include "base/arena_containers.h"
@@ -27,8 +28,9 @@ namespace mips {
*/
class DexCacheArrayFixupsVisitor : public HGraphVisitor {
public:
- explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+ explicit DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen)
: HGraphVisitor(graph),
+ codegen_(down_cast<CodeGeneratorMIPS*>(codegen)),
dex_cache_array_bases_(std::less<const DexFile*>(),
// Attribute memory use to code generator.
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
@@ -41,9 +43,45 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor {
HMipsDexCacheArraysBase* base = entry.second;
base->MoveBeforeFirstUserAndOutOfLoops();
}
+ // Computing the dex cache base for PC-relative accesses will clobber RA with
+ // the NAL instruction on R2. Take a note of this before generating the method
+ // entry.
+ if (!dex_cache_array_bases_.empty() && !codegen_->GetInstructionSetFeatures().IsR6()) {
+ codegen_->ClobberRA();
+ }
}
private:
+ void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+ // If this is a load with PC-relative access to the dex cache types array,
+ // we need to add the dex cache arrays base as the special input.
+ if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) {
+ // Initialize base for target dex file if needed.
+ const DexFile& dex_file = load_class->GetDexFile();
+ HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+ // Update the element offset in base.
+ DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
+ base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex()));
+ // Add the special argument base to the load.
+ load_class->AddSpecialInput(base);
+ }
+ }
+
+ void VisitLoadString(HLoadString* load_string) OVERRIDE {
+ // If this is a load with PC-relative access to the dex cache strings array,
+ // we need to add the dex cache arrays base as the special input.
+ if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) {
+ // Initialize base for target dex file if needed.
+ const DexFile& dex_file = load_string->GetDexFile();
+ HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+ // Update the element offset in base.
+ DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
+ base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex()));
+ // Add the special argument base to the load.
+ load_string->AddSpecialInput(base);
+ }
+ }
+
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
// If this is an invoke with PC-relative access to the dex cache methods array,
// we need to add the dex cache arrays base as the special input.
@@ -74,6 +112,8 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor {
});
}
+ CodeGeneratorMIPS* codegen_;
+
using DexCacheArraysBaseMap =
ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>;
DexCacheArraysBaseMap dex_cache_array_bases_;
@@ -85,7 +125,7 @@ void DexCacheArrayFixups::Run() {
// that can be live-in at the irreducible loop header.
return;
}
- DexCacheArrayFixupsVisitor visitor(graph_);
+ DexCacheArrayFixupsVisitor visitor(graph_, codegen_);
visitor.VisitInsertionOrder();
visitor.MoveBasesIfNeeded();
}
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h
index c8def2842e..861a199d6c 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.h
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.h
@@ -21,14 +21,23 @@
#include "optimization.h"
namespace art {
+
+class CodeGenerator;
+
namespace mips {
class DexCacheArrayFixups : public HOptimization {
public:
- DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
- : HOptimization(graph, "dex_cache_array_fixups_mips", stats) {}
+ DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+ : HOptimization(graph, kDexCacheArrayFixupsMipsPassName, stats),
+ codegen_(codegen) {}
+
+ static constexpr const char* kDexCacheArrayFixupsMipsPassName = "dex_cache_array_fixups_mips";
void Run() OVERRIDE;
+
+ private:
+ CodeGenerator* codegen_;
};
} // namespace mips
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 9d67373321..b3d5341de0 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -31,7 +31,7 @@
#include "nodes.h"
#include "optimization.h"
#include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
#include "utils/assembler.h"
@@ -122,7 +122,10 @@ class HGraphVisualizerDisassembler {
new DisassemblerOptions(/* absolute_addresses */ false,
base_address,
end_address,
- /* can_read_literals */ true)));
+ /* can_read_literals */ true,
+ Is64BitInstructionSet(instruction_set)
+ ? &Thread::DumpThreadOffset<PointerSize::k64>
+ : &Thread::DumpThreadOffset<PointerSize::k32>)));
}
~HGraphVisualizerDisassembler() {
@@ -298,6 +301,12 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
stream << constant->AsIntConstant()->GetValue();
} else if (constant->IsLongConstant()) {
stream << constant->AsLongConstant()->GetValue();
+ } else if (constant->IsFloatConstant()) {
+ stream << constant->AsFloatConstant()->GetValue();
+ } else if (constant->IsDoubleConstant()) {
+ stream << constant->AsDoubleConstant()->GetValue();
+ } else if (constant->IsNullConstant()) {
+ stream << "null";
}
} else if (location.IsInvalid()) {
stream << "invalid";
@@ -401,6 +410,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
StartAttributeStream("is_string_length") << std::boolalpha
<< array_length->IsStringLength() << std::noboolalpha;
+ if (array_length->IsEmittedAtUseSite()) {
+ StartAttributeStream("emitted_at_use") << "true";
+ }
}
void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index 7c74816c26..cd4c830645 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -39,9 +39,9 @@ class HInductionVarAnalysis : public HOptimization {
void Run() OVERRIDE;
- private:
static constexpr const char* kInductionPassName = "induction_var_analysis";
+ private:
struct NodeInfo {
explicit NodeInfo(uint32_t d) : depth(d), done(false) {}
uint32_t depth;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index f9e78b0a8f..451aa38033 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -17,6 +17,7 @@
#include "inliner.h"
#include "art_method-inl.h"
+#include "base/enums.h"
#include "builder.h"
#include "class_linker.h"
#include "constant_folding.h"
@@ -35,7 +36,7 @@
#include "nodes.h"
#include "optimizing_compiler.h"
#include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
#include "quick/inline_method_analyser.h"
#include "sharpening.h"
#include "ssa_builder.h"
@@ -151,7 +152,7 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol
}
ClassLinker* cl = Runtime::Current()->GetClassLinker();
- size_t pointer_size = cl->GetImagePointerSize();
+ PointerSize pointer_size = cl->GetImagePointerSize();
if (invoke->IsInvokeInterface()) {
resolved_method = info.GetTypeHandle()->FindVirtualMethodForInterface(
resolved_method, pointer_size);
@@ -208,12 +209,8 @@ static uint32_t FindClassIndexIn(mirror::Class* cls,
DCHECK(cls->IsProxyClass()) << PrettyClass(cls);
// TODO: deal with proxy classes.
} else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+ DCHECK_EQ(cls->GetDexCache(), dex_cache.Get());
index = cls->GetDexTypeIndex();
- } else {
- index = cls->FindTypeIndexInOtherDexFile(dex_file);
- }
-
- if (index != DexFile::kDexNoIndex) {
// Update the dex cache to ensure the class is in. The generated code will
// consider it is. We make it safe by updating the dex cache, as other
// dex files might also load the class, and there is no guarantee the dex
@@ -221,6 +218,14 @@ static uint32_t FindClassIndexIn(mirror::Class* cls,
if (dex_cache->GetResolvedType(index) == nullptr) {
dex_cache->SetResolvedType(index, cls);
}
+ } else {
+ index = cls->FindTypeIndexInOtherDexFile(dex_file);
+ // We cannot guarantee the entry in the dex cache will resolve to the same class,
+ // as there may be different class loaders. So only return the index if it's
+ // the right class in the dex cache already.
+ if (index != DexFile::kDexNoIndex && dex_cache->GetResolvedType(index) != cls) {
+ index = DexFile::kDexNoIndex;
+ }
}
return index;
@@ -239,7 +244,7 @@ class ScopedProfilingInfoInlineUse {
~ScopedProfilingInfoInlineUse() {
if (profiling_info_ != nullptr) {
- size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+ PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
DCHECK_EQ(profiling_info_, method_->GetProfilingInfo(pointer_size));
Runtime::Current()->GetJit()->GetCodeCache()->DoneCompilerUse(method_, self_);
}
@@ -273,7 +278,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
return false;
}
MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
- mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file)
+ mirror::DexCache* const dex_cache = IsSameDexFile(caller_dex_file, *ref.dex_file)
? caller_compilation_unit_.GetDexCache().Get()
: class_linker->FindDexCache(soa.Self(), *ref.dex_file);
resolved_method = dex_cache->GetResolvedMethod(
@@ -386,7 +391,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
}
ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
- size_t pointer_size = class_linker->GetImagePointerSize();
+ PointerSize pointer_size = class_linker->GetImagePointerSize();
if (invoke_instruction->IsInvokeInterface()) {
resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForInterface(
resolved_method, pointer_size);
@@ -478,7 +483,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
}
ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
- size_t pointer_size = class_linker->GetImagePointerSize();
+ PointerSize pointer_size = class_linker->GetImagePointerSize();
const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
bool all_targets_inlined = true;
@@ -640,7 +645,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
return false;
}
ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
- size_t pointer_size = class_linker->GetImagePointerSize();
+ PointerSize pointer_size = class_linker->GetImagePointerSize();
DCHECK(resolved_method != nullptr);
ArtMethod* actual_method = nullptr;
@@ -656,8 +661,8 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
}
ArtMethod* new_method = nullptr;
if (invoke_instruction->IsInvokeInterface()) {
- new_method = ic.GetTypeAt(i)->GetEmbeddedImTableEntry(
- method_index % mirror::Class::kImtSize, pointer_size);
+ new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get(
+ method_index, pointer_size);
if (new_method->IsRuntimeMethod()) {
// Bail out as soon as we see a conflict trampoline in one of the target's
// interface table.
@@ -804,8 +809,6 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* metho
bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* method,
HInstruction** return_replacement) {
- const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-
if (method->IsProxyMethod()) {
VLOG(compiler) << "Method " << PrettyMethod(method)
<< " is not inlined because of unimplemented inline support for proxy methods.";
@@ -828,15 +831,6 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
return false;
}
- uint32_t method_index = FindMethodIndexIn(
- method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
- if (method_index == DexFile::kDexNoIndex) {
- VLOG(compiler) << "Call to "
- << PrettyMethod(method)
- << " cannot be inlined because unaccessible to caller";
- return false;
- }
-
bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile());
const DexFile::CodeItem* code_item = method->GetCodeItem();
@@ -873,7 +867,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
if (Runtime::Current()->UseJitCompilation() ||
!compiler_driver_->IsMethodVerifiedWithoutFailures(
method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+ VLOG(compiler) << "Method " << PrettyMethod(method)
<< " couldn't be verified, so it cannot be inlined";
return false;
}
@@ -883,7 +877,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
// Case of a static method that cannot be inlined because it implicitly
// requires an initialization check of its declaring class.
- VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+ VLOG(compiler) << "Method " << PrettyMethod(method)
<< " is not inlined because it is static and requires a clinit"
<< " check that cannot be emitted due to Dex cache limitations";
return false;
@@ -893,7 +887,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
return false;
}
- VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, caller_dex_file);
+ VLOG(compiler) << "Successfully inlined " << PrettyMethod(method);
MaybeRecordStat(kInlinedInvoke);
return true;
}
@@ -1011,7 +1005,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
// Check whether the field is final. If it is, we need to add a barrier.
- size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+ PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
DCHECK(resolved_field != nullptr);
if (resolved_field->IsFinal()) {
@@ -1037,7 +1031,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex
uint32_t field_index,
HInstruction* obj)
SHARED_REQUIRES(Locks::mutator_lock_) {
- size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+ PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
DCHECK(resolved_field != nullptr);
HInstanceFieldGet* iget = new (graph_->GetArena()) HInstanceFieldGet(
@@ -1065,7 +1059,7 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex
HInstruction* obj,
HInstruction* value)
SHARED_REQUIRES(Locks::mutator_lock_) {
- size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+ PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
DCHECK(resolved_field != nullptr);
HInstanceFieldSet* iput = new (graph_->GetArena()) HInstanceFieldSet(
@@ -1094,8 +1088,11 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
uint32_t method_index = resolved_method->GetDexMethodIndex();
ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
+ Handle<mirror::ClassLoader> class_loader(handles_->NewHandle(
+ resolved_method->GetDeclaringClass()->GetClassLoader()));
+
DexCompilationUnit dex_compilation_unit(
- caller_compilation_unit_.GetClassLoader(),
+ class_loader.ToJObject(),
class_linker,
callee_dex_file,
code_item,
@@ -1404,7 +1401,7 @@ bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod*
}
}
- size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+ PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
// Iterate over the list of parameter types and test whether any of the
// actual inputs has a more specific reference type than the type declared in
@@ -1461,7 +1458,7 @@ void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method,
// TODO: we could be more precise by merging the phi inputs but that requires
// some functionality from the reference type propagation.
DCHECK(return_replacement->IsPhi());
- size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+ PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */, pointer_size);
return_replacement->SetReferenceTypeInfo(GetClassRTI(cls));
}
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index b4125299ea..e5dab569fd 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -16,8 +16,10 @@
#include "instruction_builder.h"
+#include "art_method-inl.h"
#include "bytecode_utils.h"
#include "class_linker.h"
+#include "dex_instruction-inl.h"
#include "driver/compiler_options.h"
#include "scoped_thread_state_change.h"
@@ -890,7 +892,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
return_type,
dex_pc,
method_idx,
- resolved_method->GetDexMethodIndex());
+ resolved_method->GetImtIndex());
}
return HandleInvoke(invoke,
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 9cfc065da6..517cf76831 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -30,6 +30,8 @@
namespace art {
+class Instruction;
+
class HInstructionBuilder : public ValueObject {
public:
HInstructionBuilder(HGraph* graph,
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index e0410dcdb2..4ca0600dba 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -920,6 +920,7 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct
void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
HConstant* input_cst = instruction->GetConstantRight();
HInstruction* input_other = instruction->GetLeastConstantLeft();
+ bool integral_type = Primitive::IsIntegralType(instruction->GetType());
if ((input_cst != nullptr) && input_cst->IsArithmeticZero()) {
// Replace code looking like
// ADD dst, src, 0
@@ -928,7 +929,7 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
// Note that we cannot optimize `x + 0.0` to `x` for floating-point. When
// `x` is `-0.0`, the former expression yields `0.0`, while the later
// yields `-0.0`.
- if (Primitive::IsIntegralType(instruction->GetType())) {
+ if (integral_type) {
instruction->ReplaceWith(input_other);
instruction->GetBlock()->RemoveInstruction(instruction);
RecordSimplification();
@@ -974,10 +975,31 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
// so no need to return.
TryHandleAssociativeAndCommutativeOperation(instruction);
- if ((instruction->GetLeft()->IsSub() || instruction->GetRight()->IsSub()) &&
+ if ((left->IsSub() || right->IsSub()) &&
TrySubtractionChainSimplification(instruction)) {
return;
}
+
+ if (integral_type) {
+ // Replace code patterns looking like
+ // SUB dst1, x, y SUB dst1, x, y
+ // ADD dst2, dst1, y ADD dst2, y, dst1
+ // with
+ // SUB dst1, x, y
+ // ADD instruction is not needed in this case, we may use
+ // one of inputs of SUB instead.
+ if (left->IsSub() && left->InputAt(1) == right) {
+ instruction->ReplaceWith(left->InputAt(0));
+ RecordSimplification();
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ } else if (right->IsSub() && right->InputAt(1) == left) {
+ instruction->ReplaceWith(right->InputAt(0));
+ RecordSimplification();
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+ }
}
void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
@@ -1511,6 +1533,29 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) {
if (TrySubtractionChainSimplification(instruction)) {
return;
}
+
+ if (left->IsAdd()) {
+ // Replace code patterns looking like
+ // ADD dst1, x, y ADD dst1, x, y
+ // SUB dst2, dst1, y SUB dst2, dst1, x
+ // with
+ // ADD dst1, x, y
+ // SUB instruction is not needed in this case, we may use
+ // one of inputs of ADD instead.
+ // It is applicable to integral types only.
+ DCHECK(Primitive::IsIntegralType(type));
+ if (left->InputAt(1) == right) {
+ instruction->ReplaceWith(left->InputAt(0));
+ RecordSimplification();
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ } else if (left->InputAt(0) == right) {
+ instruction->ReplaceWith(left->InputAt(1));
+ RecordSimplification();
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+ }
}
void InstructionSimplifierVisitor::VisitUShr(HUShr* instruction) {
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index cd026b8770..495f3fd232 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -14,8 +14,10 @@
* limitations under the License.
*/
+#include "code_generator.h"
#include "instruction_simplifier_arm.h"
#include "instruction_simplifier_shared.h"
+#include "mirror/array-inl.h"
namespace art {
namespace arm {
@@ -38,6 +40,46 @@ void InstructionSimplifierArmVisitor::VisitAnd(HAnd* instruction) {
}
}
+void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) {
+ size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+ Primitive::Type type = instruction->GetType();
+
+ if (type == Primitive::kPrimLong
+ || type == Primitive::kPrimFloat
+ || type == Primitive::kPrimDouble) {
+ // T32 doesn't support ShiftedRegOffset mem address mode for these types
+ // to enable optimization.
+ return;
+ }
+
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
+}
+
+void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) {
+ size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
+ size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
+ Primitive::Type type = instruction->GetComponentType();
+
+ if (type == Primitive::kPrimLong
+ || type == Primitive::kPrimFloat
+ || type == Primitive::kPrimDouble) {
+ // T32 doesn't support ShiftedRegOffset mem address mode for these types
+ // to enable optimization.
+ return;
+ }
+
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
+}
} // namespace arm
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 14c940eb21..782110c40a 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -38,6 +38,8 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor {
void VisitMul(HMul* instruction) OVERRIDE;
void VisitOr(HOr* instruction) OVERRIDE;
void VisitAnd(HAnd* instruction) OVERRIDE;
+ void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
+ void VisitArraySet(HArraySet* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
};
@@ -46,7 +48,9 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor {
class InstructionSimplifierArm : public HOptimization {
public:
InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
- : HOptimization(graph, "instruction_simplifier_arm", stats) {}
+ : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
+
+ static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm";
void Run() OVERRIDE {
InstructionSimplifierArmVisitor visitor(graph_, stats_);
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 983d31d168..6d107d571f 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -28,56 +28,6 @@ using helpers::CanFitInShifterOperand;
using helpers::HasShifterOperand;
using helpers::ShifterOperandSupportsExtension;
-void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
- HInstruction* array,
- HInstruction* index,
- size_t data_offset) {
- if (kEmitCompilerReadBarrier) {
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
- //
- // TODO: Handle this case properly in the ARM64 code generator and
- // re-enable this optimization; otherwise, remove this TODO.
- // b/26601270
- return;
- }
- if (index->IsConstant() ||
- (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
- // When the index is a constant all the addressing can be fitted in the
- // memory access instruction, so do not split the access.
- return;
- }
- if (access->IsArraySet() &&
- access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
- // The access may require a runtime call or the original array pointer.
- return;
- }
-
- // Proceed to extract the base address computation.
- ArenaAllocator* arena = GetGraph()->GetArena();
-
- HIntConstant* offset = GetGraph()->GetIntConstant(data_offset);
- HArm64IntermediateAddress* address =
- new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc);
- address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
- access->GetBlock()->InsertInstructionBefore(address, access);
- access->ReplaceInput(address, 0);
- // Both instructions must depend on GC to prevent any instruction that can
- // trigger GC to be inserted between the two.
- access->AddSideEffects(SideEffects::DependsOnGC());
- DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
- DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
- // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
- // is an HArm64IntermediateAddress and generate appropriate code.
- // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
- // `HArm64Load` and `HArm64Store`). We defer these changes because these new instructions would
- // not bring any advantages yet.
- // Also see the comments in
- // `InstructionCodeGeneratorARM64::VisitArrayGet()` and
- // `InstructionCodeGeneratorARM64::VisitArraySet()`.
- RecordSimplification();
-}
-
bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
HInstruction* bitfield_op,
bool do_merge) {
@@ -190,19 +140,23 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) {
void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
- TryExtractArrayAccessAddress(instruction,
- instruction->GetArray(),
- instruction->GetIndex(),
- data_offset);
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
}
void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) {
size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
- TryExtractArrayAccessAddress(instruction,
- instruction->GetArray(),
- instruction->GetIndex(),
- data_offset);
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
}
void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 4735f85ab0..f71684efe9 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -35,10 +35,6 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
}
}
- void TryExtractArrayAccessAddress(HInstruction* access,
- HInstruction* array,
- HInstruction* index,
- size_t data_offset);
bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
bool TryMergeIntoShifterOperand(HInstruction* use,
HInstruction* bitfield_op,
@@ -86,8 +82,9 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
class InstructionSimplifierArm64 : public HOptimization {
public:
InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats)
- : HOptimization(graph, "instruction_simplifier_arm64", stats) {}
-
+ : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
+ static constexpr const char* kInstructionSimplifierArm64PassName
+ = "instruction_simplifier_arm64";
void Run() OVERRIDE {
InstructionSimplifierArm64Visitor visitor(graph_, stats_);
visitor.VisitReversePostOrder();
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index dab1ebc16d..8f7778fe68 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -226,4 +226,59 @@ bool TryMergeNegatedInput(HBinaryOperation* op) {
return false;
}
+
+bool TryExtractArrayAccessAddress(HInstruction* access,
+ HInstruction* array,
+ HInstruction* index,
+ size_t data_offset) {
+ if (kEmitCompilerReadBarrier) {
+ // The read barrier instrumentation does not support the
+ // HIntermediateAddress instruction yet.
+ //
+ // TODO: Handle this case properly in the ARM64 and ARM code generator and
+ // re-enable this optimization; otherwise, remove this TODO.
+ // b/26601270
+ return false;
+ }
+ if (index->IsConstant() ||
+ (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
+ // When the index is a constant all the addressing can be fitted in the
+ // memory access instruction, so do not split the access.
+ return false;
+ }
+ if (access->IsArraySet() &&
+ access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
+ // The access may require a runtime call or the original array pointer.
+ return false;
+ }
+
+ // Proceed to extract the base address computation.
+ HGraph* graph = access->GetBlock()->GetGraph();
+ ArenaAllocator* arena = graph->GetArena();
+
+ HIntConstant* offset = graph->GetIntConstant(data_offset);
+ HIntermediateAddress* address =
+ new (arena) HIntermediateAddress(array, offset, kNoDexPc);
+ address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
+ access->GetBlock()->InsertInstructionBefore(address, access);
+ access->ReplaceInput(address, 0);
+ // Both instructions must depend on GC to prevent any instruction that can
+ // trigger GC to be inserted between the two.
+ access->AddSideEffects(SideEffects::DependsOnGC());
+ DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+ DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+ // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
+ // is an HIntermediateAddress and generate appropriate code.
+ // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
+ // `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes
+ // because these new instructions would not bring any advantages yet.
+ // Also see the comments in
+ // `InstructionCodeGeneratorARM::VisitArrayGet()`
+ // `InstructionCodeGeneratorARM::VisitArraySet()`
+ // `InstructionCodeGeneratorARM64::VisitArrayGet()`
+ // `InstructionCodeGeneratorARM64::VisitArraySet()`.
+ return true;
+}
+
+
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index b1fe8f4756..56804f5e90 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -26,6 +26,11 @@ bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
// a negated bitwise instruction.
bool TryMergeNegatedInput(HBinaryOperation* op);
+bool TryExtractArrayAccessAddress(HInstruction* access,
+ HInstruction* array,
+ HInstruction* index,
+ size_t data_offset);
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 3429a8fdbb..1a8eb58857 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -27,9 +27,6 @@ namespace art {
class CompilerDriver;
class DexFile;
-// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751
-static constexpr bool kRoundIsPlusPointFive = false;
-
// Positive floating-point infinities.
static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U;
static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000);
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 579fb9d3bb..0bbc0e54bc 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -41,6 +41,92 @@ ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() {
using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ Register dest = locations->InAt(2).AsRegister<Register>();
+ Location dest_pos = locations->InAt(3);
+ Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
+ Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
+ Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
+ Register tmp = locations->GetTemp(3).AsRegister<Register>();
+
+ __ Bind(GetEntryLabel());
+ // Compute the base destination address in `dst_curr_addr`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(dst_curr_addr, dest, element_size * constant + offset);
+ } else {
+ __ add(dst_curr_addr,
+ dest,
+ ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(dst_curr_addr, offset);
+ }
+
+ Label loop;
+ __ Bind(&loop);
+ __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex));
+ __ MaybeUnpoisonHeapReference(tmp);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // tmp = ReadBarrier::Mark(tmp);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(tmp, SP);
+ DCHECK_NE(tmp, LR);
+ DCHECK_NE(tmp, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary (and not preserved). It thus cannot be used by
+ // any live register in this slow path.
+ DCHECK_NE(src_curr_addr, IP);
+ DCHECK_NE(dst_curr_addr, IP);
+ DCHECK_NE(src_stop_addr, IP);
+ DCHECK_NE(tmp, IP);
+ DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(tmp);
+ __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex));
+ __ cmp(src_curr_addr, ShifterOperand(src_stop_addr));
+ __ b(&loop, NE);
+ __ b(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM);
+};
+
+#undef __
+
bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
Dispatch(invoke);
LocationSummary* res = invoke->GetLocations();
@@ -1201,7 +1287,7 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke,
}
__ LoadFromOffset(kLoadWord, LR, TR,
- QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pIndexOf).Int32Value());
+ QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pIndexOf).Int32Value());
CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
__ blx(LR);
@@ -1212,7 +1298,7 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke,
void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
// best to align the inputs accordingly.
@@ -1232,7 +1318,7 @@ void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
// best to align the inputs accordingly.
@@ -1250,7 +1336,7 @@ void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1270,8 +1356,10 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
codegen_->AddSlowPath(slow_path);
__ b(slow_path->GetEntryLabel(), EQ);
- __ LoadFromOffset(
- kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromBytes).Int32Value());
+ __ LoadFromOffset(kLoadWord,
+ LR,
+ TR,
+ QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromBytes).Int32Value());
CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
__ blx(LR);
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1280,7 +1368,7 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1298,8 +1386,10 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) {
// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
//
// all include a null check on `data` before calling that method.
- __ LoadFromOffset(
- kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromChars).Int32Value());
+ __ LoadFromOffset(kLoadWord,
+ LR,
+ TR,
+ QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromChars).Int32Value());
CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
__ blx(LR);
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1307,7 +1397,7 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1325,7 +1415,7 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke)
__ b(slow_path->GetEntryLabel(), EQ);
__ LoadFromOffset(kLoadWord,
- LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromString).Int32Value());
+ LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromString).Int32Value());
CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
__ blx(LR);
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1333,9 +1423,9 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke)
}
void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1358,6 +1448,13 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
locations->SetInAt(4, Location::RequiresRegister());
}
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP cannot be used in
+ // ReadBarrierSystemArrayCopySlowPathARM64 (because that register
+ // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
+ // temporary register from the register allocator.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
static void CheckPosition(ArmAssembler* assembler,
@@ -1423,9 +1520,9 @@ static void CheckPosition(ArmAssembler* assembler,
}
void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
ArmAssembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1434,18 +1531,22 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = locations->InAt(0).AsRegister<Register>();
Location src_pos = locations->InAt(1);
Register dest = locations->InAt(2).AsRegister<Register>();
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
- Register temp1 = locations->GetTemp(0).AsRegister<Register>();
- Register temp2 = locations->GetTemp(1).AsRegister<Register>();
- Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ Location temp3_loc = locations->GetTemp(2);
+ Register temp3 = temp3_loc.AsRegister<Register>();
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
Label conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -1461,7 +1562,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmp(src, ShifterOperand(dest));
- __ b(slow_path->GetEntryLabel(), EQ);
+ __ b(intrinsic_slow_path->GetEntryLabel(), EQ);
}
// Checked when building locations.
@@ -1473,7 +1574,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
__ b(&conditions_on_positions_validated, NE);
}
__ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
- __ b(slow_path->GetEntryLabel(), GT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), GT);
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1486,19 +1587,19 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
} else {
__ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>()));
}
- __ b(slow_path->GetEntryLabel(), LT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), LT);
}
__ Bind(&conditions_on_positions_validated);
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
- __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
- __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
}
// If the length is negative, bail out.
@@ -1507,7 +1608,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ cmp(length.AsRegister<Register>(), ShifterOperand(0));
- __ b(slow_path->GetEntryLabel(), LT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), LT);
}
// Validity checks: source.
@@ -1515,7 +1616,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -1524,7 +1625,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -1533,112 +1634,287 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
- __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
- bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- __ MaybeUnpoisonHeapReference(temp1);
- __ MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
- }
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
- __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ }
- if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp2->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
- __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
- }
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
+ }
- __ cmp(temp1, ShifterOperand(temp2));
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ Label do_copy;
+ __ b(&do_copy, EQ);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ b(intrinsic_slow_path->GetEntryLabel(), NE);
+ }
+ } else {
+ // Non read barrier code.
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
+ bool did_unpoison = false;
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- Label do_copy;
- __ b(&do_copy, EQ);
- if (!did_unpoison) {
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp2->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ Label do_copy;
+ __ b(&do_copy, EQ);
+ if (!did_unpoison) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
__ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ // No need to unpoison the result, we're comparing against null.
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ b(intrinsic_slow_path->GetEntryLabel(), NE);
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
- // No need to unpoison the result, we're comparing against null.
- __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ b(slow_path->GetEntryLabel(), NE);
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp3` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ }
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
__ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
}
- // Compute base source address, base destination address, and end source address.
-
int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ // Compute the base source address in `temp1`.
if (src_pos.IsConstant()) {
int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
__ AddConstant(temp1, src, element_size * constant + offset);
} else {
- __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2));
+ __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift));
__ AddConstant(temp1, offset);
}
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ AddConstant(temp2, dest, element_size * constant + offset);
- } else {
- __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2));
- __ AddConstant(temp2, offset);
- }
-
+ // Compute the end source address in `temp3`.
if (length.IsConstant()) {
int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
__ AddConstant(temp3, temp1, element_size * constant);
} else {
- __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2));
+ __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift));
}
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- Label loop, done;
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&done, EQ);
- __ Bind(&loop);
- __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
- __ str(IP, Address(temp2, element_size, Address::PostIndex));
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&loop, NE);
- __ Bind(&done);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // The base destination address is computed later, as `temp2` is
+ // used for intermediate computations.
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ Label loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&done, EQ);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `temp2`.
+ __ add(src, src, ShifterOperand(temp2, LSR, 32));
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+ // Carry flag is the last bit shifted out by LSRS.
+ __ b(read_barrier_slow_path->GetEntryLabel(), CS);
+
+ // Fast-path copy.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(temp2, dest, element_size * constant + offset);
+ } else {
+ __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(temp2, offset);
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(temp2, dest, element_size * constant + offset);
+ } else {
+ __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(temp2, offset);
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ Label loop, done;
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&done, EQ);
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -1647,7 +1923,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
Register(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1665,7 +1941,7 @@ static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
LocationSummary* const locations = new (arena) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
const InvokeRuntimeCallingConvention calling_convention;
@@ -1692,7 +1968,7 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke)
DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
LocationSummary* const locations = new (arena) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
const InvokeRuntimeCallingConvention calling_convention;
@@ -1718,7 +1994,7 @@ static void GenFPToFPCall(HInvoke* invoke,
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(0)));
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(1)));
- __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmWordSize>(entry).Int32Value());
+ __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmPointerSize>(entry).Int32Value());
// Native code uses the soft float ABI.
__ vmovrrd(calling_convention.GetRegisterAt(0),
calling_convention.GetRegisterAt(1),
@@ -1744,7 +2020,7 @@ static void GenFPFPToFPCall(HInvoke* invoke,
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(2)));
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(3)));
- __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmWordSize>(entry).Int32Value());
+ __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmPointerSize>(entry).Int32Value());
// Native code uses the soft float ABI.
__ vmovrrd(calling_convention.GetRegisterAt(0),
calling_convention.GetRegisterAt(1),
@@ -1979,6 +2255,51 @@ void IntrinsicCodeGeneratorARM::VisitShortReverseBytes(HInvoke* invoke) {
__ revsh(out, in);
}
+static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmAssembler* assembler) {
+ DCHECK(Primitive::IsIntOrLongType(type)) << type;
+ DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
+ DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
+
+ bool is_long = type == Primitive::kPrimLong;
+ LocationSummary* locations = instr->GetLocations();
+ Location in = locations->InAt(0);
+ Register src_0 = is_long ? in.AsRegisterPairLow<Register>() : in.AsRegister<Register>();
+ Register src_1 = is_long ? in.AsRegisterPairHigh<Register>() : src_0;
+ SRegister tmp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
+ DRegister tmp_d = FromLowSToD(tmp_s);
+ Register out_r = locations->Out().AsRegister<Register>();
+
+ // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
+ // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
+ // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
+ // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
+ __ vmovdrr(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
+ __ vcntd(tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
+ __ vpaddld(tmp_d, tmp_d, 8, /* is_unsigned */ true); // Temp DReg |--c|--c|--c|--c|
+ __ vpaddld(tmp_d, tmp_d, 16, /* is_unsigned */ true); // Temp DReg |------c|------c|
+ if (is_long) {
+ __ vpaddld(tmp_d, tmp_d, 32, /* is_unsigned */ true); // Temp DReg |--------------c|
+ }
+ __ vmovrs(out_r, tmp_s);
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerBitCount(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+ invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerBitCount(HInvoke* invoke) {
+ GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongBitCount(HInvoke* invoke) {
+ VisitIntegerBitCount(invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongBitCount(HInvoke* invoke) {
+ GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
+}
+
void IntrinsicLocationsBuilderARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kNoCall,
@@ -2119,8 +2440,6 @@ void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) {
__ Lsr(out, out, 5);
}
-UNIMPLEMENTED_INTRINSIC(ARM, IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(ARM, LongBitCount)
UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 1d507530aa..91374b3108 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -26,12 +26,15 @@
#include "mirror/string.h"
#include "thread.h"
#include "utils/arm64/assembler_arm64.h"
-#include "utils/arm64/constants_arm64.h"
-#include "vixl/a64/disasm-a64.h"
-#include "vixl/a64/macro-assembler-a64.h"
+using namespace vixl::aarch64; // NOLINT(build/namespaces)
-using namespace vixl; // NOLINT(build/namespaces)
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#pragma GCC diagnostic pop
namespace art {
@@ -57,15 +60,15 @@ ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_
} // namespace
-vixl::MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
- return codegen_->GetAssembler()->vixl_masm_;
+MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
+ return codegen_->GetVIXLAssembler();
}
ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
return codegen_->GetGraph()->GetArena();
}
-#define __ codegen->GetAssembler()->vixl_masm_->
+#define __ codegen->GetVIXLAssembler()->
static void MoveFromReturnRegister(Location trg,
Primitive::Type type,
@@ -141,6 +144,73 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
};
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
+ : SlowPathCodeARM64(instruction), tmp_(tmp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+ Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
+ Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
+ Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+ Register tmp_reg = WRegisterFrom(tmp_);
+
+ __ Bind(GetEntryLabel());
+ vixl::aarch64::Label slow_copy_loop;
+ __ Bind(&slow_copy_loop);
+ __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
+ codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // tmp_reg = ReadBarrier::Mark(tmp_reg);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(tmp_.reg(), LR);
+ DCHECK_NE(tmp_.reg(), WSP);
+ DCHECK_NE(tmp_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary (and not preserved). It thus cannot be used by
+ // any live register in this slow path.
+ DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
+ DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
+ DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
+ DCHECK_NE(tmp_.reg(), IP0);
+ DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
+ // This runtime call does not require a stack map.
+ codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
+ __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&slow_copy_loop, ne);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
+
+ private:
+ Location tmp_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
+};
#undef __
bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
@@ -170,14 +240,14 @@ static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetOut(Location::RequiresFpuRegister());
}
-static void MoveFPToInt(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
Location input = locations->InAt(0);
Location output = locations->Out();
__ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
}
-static void MoveIntToFP(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
Location input = locations->InAt(0);
Location output = locations->Out();
__ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
@@ -222,7 +292,7 @@ static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
static void GenReverseBytes(LocationSummary* locations,
Primitive::Type type,
- vixl::MacroAssembler* masm) {
+ MacroAssembler* masm) {
Location in = locations->InAt(0);
Location out = locations->Out();
@@ -276,7 +346,7 @@ static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
static void GenNumberOfLeadingZeros(LocationSummary* locations,
Primitive::Type type,
- vixl::MacroAssembler* masm) {
+ MacroAssembler* masm) {
DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
Location in = locations->InAt(0);
@@ -303,7 +373,7 @@ void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke)
static void GenNumberOfTrailingZeros(LocationSummary* locations,
Primitive::Type type,
- vixl::MacroAssembler* masm) {
+ MacroAssembler* masm) {
DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
Location in = locations->InAt(0);
@@ -331,7 +401,7 @@ void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke
static void GenReverse(LocationSummary* locations,
Primitive::Type type,
- vixl::MacroAssembler* masm) {
+ MacroAssembler* masm) {
DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
Location in = locations->InAt(0);
@@ -356,7 +426,7 @@ void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
}
-static void GenBitCount(HInvoke* instr, Primitive::Type type, vixl::MacroAssembler* masm) {
+static void GenBitCount(HInvoke* instr, Primitive::Type type, MacroAssembler* masm) {
DCHECK(Primitive::IsIntOrLongType(type)) << type;
DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
@@ -397,7 +467,7 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
-static void MathAbsFP(LocationSummary* locations, bool is64bit, vixl::MacroAssembler* masm) {
+static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
Location in = locations->InAt(0);
Location out = locations->Out();
@@ -433,7 +503,7 @@ static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
static void GenAbsInteger(LocationSummary* locations,
bool is64bit,
- vixl::MacroAssembler* masm) {
+ MacroAssembler* masm) {
Location in = locations->InAt(0);
Location output = locations->Out();
@@ -463,7 +533,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
static void GenMinMaxFP(LocationSummary* locations,
bool is_min,
bool is_double,
- vixl::MacroAssembler* masm) {
+ MacroAssembler* masm) {
Location op1 = locations->InAt(0);
Location op2 = locations->InAt(1);
Location out = locations->Out();
@@ -523,7 +593,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
static void GenMinMax(LocationSummary* locations,
bool is_min,
bool is_long,
- vixl::MacroAssembler* masm) {
+ MacroAssembler* masm) {
Location op1 = locations->InAt(0);
Location op2 = locations->InAt(1);
Location out = locations->Out();
@@ -574,7 +644,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
LocationSummary* locations = invoke->GetLocations();
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
}
@@ -584,7 +654,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
LocationSummary* locations = invoke->GetLocations();
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
}
@@ -594,7 +664,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
LocationSummary* locations = invoke->GetLocations();
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
}
@@ -604,7 +674,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
LocationSummary* locations = invoke->GetLocations();
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
}
@@ -617,7 +687,7 @@ static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* arena, HInvoke* inv
locations->AddTemp(Location::RequiresFpuRegister());
}
-static void GenMathRound(HInvoke* invoke, bool is_double, vixl::MacroAssembler* masm) {
+static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
// Java 8 API definition for Math.round():
// Return the closest long or int to the argument, with ties rounding to positive infinity.
//
@@ -635,13 +705,13 @@ static void GenMathRound(HInvoke* invoke, bool is_double, vixl::MacroAssembler*
FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
- vixl::Label done;
+ vixl::aarch64::Label done;
// Round to nearest integer, ties away from zero.
__ Fcvtas(out_reg, in_reg);
// For positive values, zero or NaN inputs, rounding is done.
- __ Tbz(out_reg, out_reg.size() - 1, &done);
+ __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
// Handle input < 0 cases.
// If input is negative but not a tie, previous result (round to nearest) is valid.
@@ -675,7 +745,7 @@ void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
}
@@ -685,7 +755,7 @@ void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
}
@@ -695,7 +765,7 @@ void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
}
@@ -705,7 +775,7 @@ void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
}
@@ -723,7 +793,7 @@ void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
}
@@ -733,7 +803,7 @@ void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
}
@@ -743,7 +813,7 @@ void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
}
@@ -753,7 +823,7 @@ void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
__ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
}
@@ -767,7 +837,7 @@ void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
codegen_->Load(Primitive::kPrimNot, WRegisterFrom(invoke->GetLocations()->Out()),
- MemOperand(tr, Thread::PeerOffset<8>().Int32Value()));
+ MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
}
static void GenUnsafeGet(HInvoke* invoke,
@@ -778,7 +848,7 @@ static void GenUnsafeGet(HInvoke* invoke,
DCHECK((type == Primitive::kPrimInt) ||
(type == Primitive::kPrimLong) ||
(type == Primitive::kPrimNot));
- vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
Location base_loc = locations->InAt(1);
Register base = WRegisterFrom(base_loc); // Object pointer.
Location offset_loc = locations->InAt(2);
@@ -912,7 +982,7 @@ static void GenUnsafePut(LocationSummary* locations,
bool is_volatile,
bool is_ordered,
CodeGeneratorARM64* codegen) {
- vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
@@ -1031,7 +1101,7 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
}
static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
- vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
Register out = WRegisterFrom(locations->Out()); // Boolean result.
@@ -1070,7 +1140,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
// result = tmp_value != 0;
- vixl::Label loop_head, exit_loop;
+ vixl::aarch64::Label loop_head, exit_loop;
__ Bind(&loop_head);
// TODO: When `type == Primitive::kPrimNot`, add a read barrier for
// the reference stored in the object before attempting the CAS,
@@ -1154,20 +1224,22 @@ void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
- Register str = XRegisterFrom(locations->InAt(0));
- Register arg = XRegisterFrom(locations->InAt(1));
+ Register str = InputRegisterAt(invoke, 0);
+ Register arg = InputRegisterAt(invoke, 1);
+ DCHECK(str.IsW());
+ DCHECK(arg.IsW());
Register out = OutputRegister(invoke);
Register temp0 = WRegisterFrom(locations->GetTemp(0));
Register temp1 = WRegisterFrom(locations->GetTemp(1));
Register temp2 = WRegisterFrom(locations->GetTemp(2));
- vixl::Label loop;
- vixl::Label find_char_diff;
- vixl::Label end;
+ vixl::aarch64::Label loop;
+ vixl::aarch64::Label find_char_diff;
+ vixl::aarch64::Label end;
// Get offsets of count and value fields within a string object.
const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1189,8 +1261,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
__ Subs(out, str, arg);
__ B(&end, eq);
// Load lengths of this and argument strings.
- __ Ldr(temp0, MemOperand(str.X(), count_offset));
- __ Ldr(temp1, MemOperand(arg.X(), count_offset));
+ __ Ldr(temp0, HeapOperand(str, count_offset));
+ __ Ldr(temp1, HeapOperand(arg, count_offset));
// Return zero if both strings are empty.
__ Orr(out, temp0, temp1);
__ Cbz(out, &end);
@@ -1219,8 +1291,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
// Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
__ Bind(&loop);
- __ Ldr(temp4, MemOperand(str.X(), temp1));
- __ Ldr(temp0, MemOperand(arg.X(), temp1));
+ __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
+ __ Ldr(temp0, MemOperand(arg.X(), temp1.X()));
__ Cmp(temp4, temp0);
__ B(ne, &find_char_diff);
__ Add(temp1, temp1, char_size * 4);
@@ -1239,14 +1311,14 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
__ Clz(temp1, temp1);
// If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then
// the difference occurs outside the remaining string data, so just return length diff (out).
- __ Cmp(temp2, Operand(temp1, LSR, 4));
+ __ Cmp(temp2, Operand(temp1.W(), LSR, 4));
__ B(le, &end);
// Extract the characters and calculate the difference.
__ Bic(temp1, temp1, 0xf);
__ Lsr(temp0, temp0, temp1);
__ Lsr(temp4, temp4, temp1);
__ And(temp4, temp4, 0xffff);
- __ Sub(out, temp4, Operand(temp0, UXTH));
+ __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH));
__ Bind(&end);
@@ -1269,7 +1341,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
Register str = WRegisterFrom(locations->InAt(0));
@@ -1281,10 +1353,10 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
Register temp1 = WRegisterFrom(locations->GetTemp(0));
Register temp2 = WRegisterFrom(locations->GetTemp(1));
- vixl::Label loop;
- vixl::Label end;
- vixl::Label return_true;
- vixl::Label return_false;
+ vixl::aarch64::Label loop;
+ vixl::aarch64::Label end;
+ vixl::aarch64::Label return_true;
+ vixl::aarch64::Label return_false;
// Get offsets of count, value, and class fields within a string object.
const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1357,7 +1429,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
}
static void GenerateVisitStringIndexOf(HInvoke* invoke,
- vixl::MacroAssembler* masm,
+ MacroAssembler* masm,
CodeGeneratorARM64* codegen,
ArenaAllocator* allocator,
bool start_at_zero) {
@@ -1394,7 +1466,7 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke,
__ Mov(tmp_reg, 0);
}
- __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pIndexOf).Int32Value()));
+ __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pIndexOf).Int32Value()));
CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
__ Blr(lr);
@@ -1405,7 +1477,7 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke,
void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
// best to align the inputs accordingly.
@@ -1425,7 +1497,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
// best to align the inputs accordingly.
@@ -1443,7 +1515,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1454,7 +1526,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invo
}
void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
Register byte_array = WRegisterFrom(locations->InAt(0));
@@ -1464,7 +1536,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke)
__ B(eq, slow_path->GetEntryLabel());
__ Ldr(lr,
- MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromBytes).Int32Value()));
+ MemOperand(tr,
+ QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromBytes).Int32Value()));
CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
__ Blr(lr);
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1473,7 +1546,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke)
void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1483,7 +1556,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invo
}
void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
// No need to emit code checking whether `locations->InAt(2)` is a null
// pointer, as callers of the native method
@@ -1492,7 +1565,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke)
//
// all include a null check on `data` before calling that method.
__ Ldr(lr,
- MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromChars).Int32Value()));
+ MemOperand(tr,
+ QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromChars).Int32Value()));
CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
__ Blr(lr);
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1500,7 +1574,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke)
void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1508,7 +1582,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* inv
}
void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
Register string_to_copy = WRegisterFrom(locations->InAt(0));
@@ -1518,7 +1592,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke
__ B(eq, slow_path->GetEntryLabel());
__ Ldr(lr,
- MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromString).Int32Value()));
+ MemOperand(tr,
+ QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromString).Int32Value()));
CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
__ Blr(lr);
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1531,7 +1606,7 @@ static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
LocationSummary* const locations = new (arena) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
@@ -1546,7 +1621,7 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke)
DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
LocationSummary* const locations = new (arena) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
@@ -1556,10 +1631,11 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke)
}
static void GenFPToFPCall(HInvoke* invoke,
- vixl::MacroAssembler* masm,
+ MacroAssembler* masm,
CodeGeneratorARM64* codegen,
QuickEntrypointEnum entry) {
- __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64WordSize>(entry).Int32Value()));
+ __ Ldr(lr, MemOperand(tr,
+ GetThreadOffset<kArm64PointerSize>(entry).Int32Value()));
__ Blr(lr);
codegen->RecordPcInfo(invoke, invoke->GetDexPc());
}
@@ -1716,7 +1792,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
// Check assumption that sizeof(Char) is 2 (used in scaling below).
@@ -1756,9 +1832,9 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Sub(num_chr, srcEnd, srcBegin);
// Do the copy.
- vixl::Label loop;
- vixl::Label done;
- vixl::Label remainder;
+ vixl::aarch64::Label loop;
+ vixl::aarch64::Label done;
+ vixl::aarch64::Label remainder;
// Early out for valid zero-length retrievals.
__ Cbz(num_chr, &done);
@@ -1773,9 +1849,9 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
// Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
// (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
__ Bind(&loop);
- __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, vixl::PostIndex));
+ __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
__ Subs(num_chr, num_chr, 8);
- __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, vixl::PostIndex));
+ __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
__ B(ge, &loop);
__ Adds(num_chr, num_chr, 8);
@@ -1784,9 +1860,9 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
// Main loop for < 8 character case and remainder handling. Loads and stores one
// 16-bit Java character at a time.
__ Bind(&remainder);
- __ Ldrh(tmp1, MemOperand(src_ptr, char_size, vixl::PostIndex));
+ __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
__ Subs(num_chr, num_chr, 1);
- __ Strh(tmp1, MemOperand(dst_ptr, char_size, vixl::PostIndex));
+ __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
__ B(gt, &remainder);
__ Bind(&done);
@@ -1800,7 +1876,7 @@ static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
uint32_t at,
HInstruction* input) {
HIntConstant* const_input = input->AsIntConstant();
- if (const_input != nullptr && !vixl::Assembler::IsImmAddSub(const_input->GetValue())) {
+ if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
locations->SetInAt(at, Location::RequiresRegister());
} else {
locations->SetInAt(at, Location::RegisterOrConstant(input));
@@ -1847,7 +1923,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
}
-static void CheckSystemArrayCopyPosition(vixl::MacroAssembler* masm,
+static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
const Location& pos,
const Register& input,
const Location& length,
@@ -1880,7 +1956,7 @@ static void CheckSystemArrayCopyPosition(vixl::MacroAssembler* masm,
} else {
// Check that pos >= 0.
Register pos_reg = WRegisterFrom(pos);
- __ Tbnz(pos_reg, pos_reg.size() - 1, slow_path->GetEntryLabel());
+ __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
// Check that pos <= length(input) && (length(input) - pos) >= length.
__ Ldr(temp, MemOperand(input, length_offset));
@@ -1893,7 +1969,7 @@ static void CheckSystemArrayCopyPosition(vixl::MacroAssembler* masm,
// Compute base source address, base destination address, and end source address
// for System.arraycopy* intrinsics.
-static void GenSystemArrayCopyAddresses(vixl::MacroAssembler* masm,
+static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
Primitive::Type type,
const Register& src,
const Location& src_pos,
@@ -1934,7 +2010,7 @@ static void GenSystemArrayCopyAddresses(vixl::MacroAssembler* masm,
}
void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
Register src = XRegisterFrom(locations->InAt(0));
Location src_pos = locations->InAt(1);
@@ -2007,12 +2083,12 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
const int32_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
UseScratchRegisterScope temps(masm);
Register tmp = temps.AcquireW();
- vixl::Label loop, done;
+ vixl::aarch64::Label loop, done;
__ Bind(&loop);
__ Cmp(src_curr_addr, src_stop_addr);
__ B(&done, eq);
- __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, vixl::PostIndex));
- __ Strh(tmp, MemOperand(dst_curr_addr, char_size, vixl::PostIndex));
+ __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
+ __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
__ B(&loop);
__ Bind(&done);
@@ -2026,9 +2102,9 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128;
// We want to use two temporary registers in order to reduce the register pressure in arm64.
// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2081,20 +2157,29 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP0, obtained from the VIXL scratch register
+ // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
+ // (because that register is clobbered by ReadBarrierMarkRegX
+ // entry points). Get an extra temporary register from the
+ // register allocator.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
- vixl::MacroAssembler* masm = GetVIXLAssembler();
+ MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = XRegisterFrom(locations->InAt(0));
Location src_pos = locations->InAt(1);
@@ -2102,12 +2187,14 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
Register temp1 = WRegisterFrom(locations->GetTemp(0));
+ Location temp1_loc = LocationFrom(temp1);
Register temp2 = WRegisterFrom(locations->GetTemp(1));
+ Location temp2_loc = LocationFrom(temp2);
- SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
- vixl::Label conditions_on_positions_validated;
+ vixl::aarch64::Label conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
// If source and destination are the same, we go to slow path if we need to do
@@ -2121,7 +2208,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ Cmp(src, dest);
- __ B(slow_path->GetEntryLabel(), eq);
+ __ B(intrinsic_slow_path->GetEntryLabel(), eq);
}
// Checked when building locations.
DCHECK(!optimizations.GetDestinationIsSource()
@@ -2132,7 +2219,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
__ B(&conditions_on_positions_validated, ne);
}
__ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
- __ B(slow_path->GetEntryLabel(), gt);
+ __ B(intrinsic_slow_path->GetEntryLabel(), gt);
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2141,19 +2228,19 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
}
__ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
- __ B(slow_path->GetEntryLabel(), lt);
+ __ B(intrinsic_slow_path->GetEntryLabel(), lt);
}
__ Bind(&conditions_on_positions_validated);
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
- __ Cbz(src, slow_path->GetEntryLabel());
+ __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
- __ Cbz(dest, slow_path->GetEntryLabel());
+ __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
}
// We have already checked in the LocationsBuilder for the constant case.
@@ -2161,17 +2248,17 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
// If the length is negative, bail out.
- __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
+ __ Tbnz(WRegisterFrom(length), kWRegSize - 1, intrinsic_slow_path->GetEntryLabel());
// If the length >= 128 then (currently) prefer native implementation.
__ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
- __ B(slow_path->GetEntryLabel(), ge);
+ __ B(intrinsic_slow_path->GetEntryLabel(), ge);
}
// Validity checks: source.
CheckSystemArrayCopyPosition(masm,
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -2180,90 +2267,236 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
{
// We use a block to end the scratch scope before the write barrier, thus
// freeing the temporary registers so they can be used in `MarkGCCard`.
UseScratchRegisterScope temps(masm);
+ // Note: Because it is acquired from VIXL's scratch register pool,
+ // `temp3` might be IP0, and thus cannot be used as `ref` argument
+ // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
+ // calls below (see ReadBarrierMarkSlowPathARM64 for more details).
Register temp3 = temps.AcquireW();
+
if (!optimizations.GetDoesNotNeedTypeCheck()) {
// Check whether all elements of the source array are assignable to the component
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ Ldr(temp1, MemOperand(dest, class_offset));
- __ Ldr(temp2, MemOperand(src, class_offset));
- bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
- }
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ Ldr(temp3, HeapOperand(temp1, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ src.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ temp1,
+ component_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+ __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ }
- if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp2->component_type_
- __ Ldr(temp3, HeapOperand(temp2, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
- }
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ dest.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ temp1,
+ component_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ src.W(),
+ class_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ Cmp(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ vixl::aarch64::Label do_copy;
+ __ B(&do_copy, eq);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ temp1,
+ component_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ Ldr(temp1, HeapOperand(temp1, super_offset));
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ B(intrinsic_slow_path->GetEntryLabel(), ne);
+ }
+ } else {
+ // Non read barrier code.
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ Ldr(temp1, MemOperand(dest, class_offset));
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ Ldr(temp2, MemOperand(src, class_offset));
+ bool did_unpoison = false;
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ Ldr(temp3, HeapOperand(temp1, component_offset));
+ __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp2->component_type_
+ __ Ldr(temp3, HeapOperand(temp2, component_offset));
+ __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
- __ Cmp(temp1, temp2);
+ __ Cmp(temp1, temp2);
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- vixl::Label do_copy;
- __ B(&do_copy, eq);
- if (!did_unpoison) {
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ vixl::aarch64::Label do_copy;
+ __ B(&do_copy, eq);
+ if (!did_unpoison) {
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ Ldr(temp1, HeapOperand(temp1, component_offset));
codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ Ldr(temp1, HeapOperand(temp1, super_offset));
+ // No need to unpoison the result, we're comparing against null.
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ B(intrinsic_slow_path->GetEntryLabel(), ne);
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ Ldr(temp1, HeapOperand(temp1, component_offset));
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ Ldr(temp1, HeapOperand(temp1, super_offset));
- // No need to unpoison the result, we're comparing against null.
- __ Cbnz(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ B(slow_path->GetEntryLabel(), ne);
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ Ldr(temp1, HeapOperand(src.W(), class_offset));
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ Ldr(temp3, HeapOperand(temp1, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ src.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ temp1,
+ component_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ Ldr(temp1, HeapOperand(src.W(), class_offset));
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ Ldr(temp2, HeapOperand(temp1, component_offset));
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+ }
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
+ __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
}
Register src_curr_addr = temp1.X();
Register dst_curr_addr = temp2.X();
- Register src_stop_addr = temp3.X();
+ Register src_stop_addr;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP0, obtained from the VIXL scratch
+ // register pool as `temp3`, cannot be used in
+ // ReadBarrierSystemArrayCopySlowPathARM64 (because that
+ // register is clobbered by ReadBarrierMarkRegX entry points).
+ // So another temporary register allocated by the register
+ // allocator instead.
+ DCHECK_EQ(LocationFrom(temp3).reg(), IP0);
+ src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+ } else {
+ src_stop_addr = temp3.X();
+ }
GenSystemArrayCopyAddresses(masm,
Primitive::kPrimNot,
@@ -2276,30 +2509,103 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
dst_curr_addr,
src_stop_addr);
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- vixl::Label loop, done;
const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
- __ Bind(&loop);
- __ Cmp(src_curr_addr, src_stop_addr);
- __ B(&done, eq);
- {
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ vixl::aarch64::Label loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&done, eq);
+
Register tmp = temps.AcquireW();
- __ Ldr(tmp, MemOperand(src_curr_addr, element_size, vixl::PostIndex));
- __ Str(tmp, MemOperand(dst_curr_addr, element_size, vixl::PostIndex));
+ // Make sure `tmp` is not IP0, as it is clobbered by
+ // ReadBarrierMarkRegX entry points in
+ // ReadBarrierSystemArrayCopySlowPathARM64.
+ DCHECK_NE(LocationFrom(tmp).reg(), IP0);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `tmp`.
+ __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCodeARM64* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+ __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&loop, ne);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ vixl::aarch64::Label loop, done;
+ __ Bind(&loop);
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&done, eq);
+ {
+ Register tmp = temps.AcquireW();
+ __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+ __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ }
+ __ B(&loop);
+ __ Bind(&done);
}
- __ B(&loop);
- __ Bind(&done);
}
// We only need one card marking on the destination array.
codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
static void GenIsInfinite(LocationSummary* locations,
bool is64bit,
- vixl::MacroAssembler* masm) {
+ MacroAssembler* masm) {
Operand infinity;
Register out;
@@ -2311,7 +2617,7 @@ static void GenIsInfinite(LocationSummary* locations,
out = WRegisterFrom(locations->Out());
}
- const Register zero = vixl::Assembler::AppropriateZeroRegFor(out);
+ const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out);
MoveFPToInt(locations, is64bit, masm);
__ Eor(out, out, infinity);
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index d47448a9c3..525153621b 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -20,10 +20,11 @@
#include "intrinsics.h"
namespace vixl {
+namespace aarch64 {
class MacroAssembler;
-} // namespace vixl
+}} // namespace vixl::aarch64
namespace art {
@@ -73,7 +74,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
private:
- vixl::MacroAssembler* GetVIXLAssembler();
+ vixl::aarch64::MacroAssembler* GetVIXLAssembler();
ArenaAllocator* GetAllocator();
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 7137fd9c11..6e5eb6622b 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1875,7 +1875,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
// int java.lang.String.compareTo(String anotherString)
void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1899,8 +1899,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) {
__ LoadFromOffset(kLoadWord,
T9,
TR,
- QUICK_ENTRYPOINT_OFFSET(kMipsWordSize,
- pStringCompareTo).Int32Value());
+ QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pStringCompareTo).Int32Value());
__ Jalr(T9);
__ Nop();
__ Bind(slow_path->GetExitLabel());
@@ -2059,7 +2058,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
__ LoadFromOffset(kLoadWord,
T9,
TR,
- QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pIndexOf).Int32Value());
+ QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pIndexOf).Int32Value());
__ Jalr(T9);
__ Nop();
@@ -2071,7 +2070,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
// int java.lang.String.indexOf(int ch)
void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime
// calling convention. So it's best to align the inputs accordingly.
@@ -2096,7 +2095,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringIndexOf(HInvoke* invoke) {
// int java.lang.String.indexOf(int ch, int fromIndex)
void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime
// calling convention. So it's best to align the inputs accordingly.
@@ -2122,7 +2121,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
// java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2145,7 +2144,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromBytes(HInvoke* invoke)
__ LoadFromOffset(kLoadWord,
T9,
TR,
- QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromBytes).Int32Value());
+ QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromBytes).Int32Value());
__ Jalr(T9);
__ Nop();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2155,7 +2154,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromBytes(HInvoke* invoke)
// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromChars(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2178,7 +2177,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromChars(HInvoke* invoke)
__ LoadFromOffset(kLoadWord,
T9,
TR,
- QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromChars).Int32Value());
+ QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromChars).Int32Value());
__ Jalr(T9);
__ Nop();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2187,7 +2186,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromChars(HInvoke* invoke)
// java.lang.StringFactory.newStringFromString(String toCopy)
void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2207,7 +2206,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromString(HInvoke* invoke)
__ LoadFromOffset(kLoadWord,
T9,
TR,
- QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromString).Int32Value());
+ QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromString).Int32Value());
__ Jalr(T9);
__ Nop();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index cc4971b8f8..1e18540e1a 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1519,7 +1519,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
// int java.lang.String.compareTo(String anotherString)
void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1543,7 +1543,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) {
__ LoadFromOffset(kLoadDoubleword,
T9,
TR,
- QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pStringCompareTo).Int32Value());
+ QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pStringCompareTo).Int32Value());
__ Jalr(T9);
__ Nop();
__ Bind(slow_path->GetExitLabel());
@@ -1694,7 +1694,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
__ LoadFromOffset(kLoadDoubleword,
T9,
TR,
- QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pIndexOf).Int32Value());
+ QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pIndexOf).Int32Value());
CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
__ Jalr(T9);
__ Nop();
@@ -1707,7 +1707,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
// int java.lang.String.indexOf(int ch)
void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime
// calling convention. So it's best to align the inputs accordingly.
@@ -1728,7 +1728,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) {
// int java.lang.String.indexOf(int ch, int fromIndex)
void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime
// calling convention. So it's best to align the inputs accordingly.
@@ -1748,7 +1748,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
// java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1771,7 +1771,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke
__ LoadFromOffset(kLoadDoubleword,
T9,
TR,
- QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+ QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
pAllocStringFromBytes).Int32Value());
CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
__ Jalr(T9);
@@ -1783,7 +1783,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke
// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1805,7 +1805,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromChars(HInvoke* invoke
__ LoadFromOffset(kLoadDoubleword,
T9,
TR,
- QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+ QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
pAllocStringFromChars).Int32Value());
CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
__ Jalr(T9);
@@ -1816,7 +1816,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromChars(HInvoke* invoke
// java.lang.StringFactory.newStringFromString(String toCopy)
void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1836,7 +1836,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromString(HInvoke* invok
__ LoadFromOffset(kLoadDoubleword,
T9,
TR,
- QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
+ QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
pAllocStringFromString).Int32Value());
CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
__ Jalr(T9);
@@ -1879,6 +1879,84 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) {
GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
}
+static void GenHighestOneBit(LocationSummary* locations,
+ Primitive::Type type,
+ Mips64Assembler* assembler) {
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << PrettyDescriptor(type);
+
+ GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+ if (type == Primitive::kPrimLong) {
+ __ Dclz(TMP, in);
+ __ LoadConst64(AT, INT64_C(0x8000000000000000));
+ __ Dsrlv(out, AT, TMP);
+ } else {
+ __ Clz(TMP, in);
+ __ LoadConst32(AT, 0x80000000);
+ __ Srlv(out, AT, TMP);
+ }
+ // For either value of "type", when "in" is zero, "out" should also
+ // be zero. Without this extra "and" operation, when "in" is zero,
+ // "out" would be either Integer.MIN_VALUE, or Long.MIN_VALUE because
+ // the MIPS logical shift operations "dsrlv", and "srlv" don't use
+ // the shift amount (TMP) directly; they use either (TMP % 64) or
+ // (TMP % 32), respectively.
+ __ And(out, out, in);
+}
+
+// int java.lang.Integer.highestOneBit(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.highestOneBit(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongHighestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongHighestOneBit(HInvoke* invoke) {
+ GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void GenLowestOneBit(LocationSummary* locations,
+ Primitive::Type type,
+ Mips64Assembler* assembler) {
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << PrettyDescriptor(type);
+
+ GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+ if (type == Primitive::kPrimLong) {
+ __ Dsubu(TMP, ZERO, in);
+ } else {
+ __ Subu(TMP, ZERO, in);
+ }
+ __ And(out, TMP, in);
+}
+
+// int java.lang.Integer.lowestOneBit(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.lowestOneBit(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongLowestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongLowestOneBit(HInvoke* invoke) {
+ GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(MIPS64, StringGetCharsNoCheck)
UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopyChar)
@@ -1902,11 +1980,6 @@ UNIMPLEMENTED_INTRINSIC(MIPS64, MathSinh)
UNIMPLEMENTED_INTRINSIC(MIPS64, MathTan)
UNIMPLEMENTED_INTRINSIC(MIPS64, MathTanh)
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS64, LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS64, LongLowestOneBit)
-
// 1.8.
UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddInt)
UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 812bdf550e..49d6c1952c 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -70,6 +70,105 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ Register src = locations->InAt(0).AsRegister<Register>();
+ Location src_pos = locations->InAt(1);
+ Register dest = locations->InAt(2).AsRegister<Register>();
+ Location dest_pos = locations->InAt(3);
+ Location length = locations->InAt(4);
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+ __ Bind(GetEntryLabel());
+ // In this code path, registers `temp1`, `temp2`, and `temp3`
+ // (resp.) are not used for the base source address, the base
+ // destination address, and the end source address (resp.), as in
+ // other SystemArrayCopy intrinsic code paths. Instead they are
+ // (resp.) used for:
+ // - the loop index (`i`);
+ // - the source index (`src_index`) and the loaded (source)
+ // reference (`value`); and
+ // - the destination index (`dest_index`).
+
+ // i = 0
+ __ xorl(temp1, temp1);
+ NearLabel loop;
+ __ Bind(&loop);
+ // value = src_array[i + src_pos]
+ if (src_pos.IsConstant()) {
+ int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+ int32_t adjusted_offset = offset + constant * element_size;
+ __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
+ } else {
+ __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+ __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
+ }
+ __ MaybeUnpoisonHeapReference(temp2);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // value = ReadBarrier::Mark(value)
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(temp2, ESP);
+ DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
+ // This runtime call does not require a stack map.
+ x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(temp2);
+ // dest_array[i + dest_pos] = value
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ int32_t adjusted_offset = offset + constant * element_size;
+ __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
+ } else {
+ __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+ __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
+ }
+ // ++i
+ __ addl(temp1, Immediate(1));
+ // if (i != length) goto loop
+ x86_codegen->GenerateIntCompare(temp1_loc, length);
+ __ j(kNotEqual, &loop);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
+};
+
+#undef __
+
#define __ assembler->
static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
@@ -706,7 +805,7 @@ static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
// We have to fall back to a call to the intrinsic.
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kCall);
+ LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
locations->SetOut(Location::FpuRegisterLocation(XMM0));
@@ -752,20 +851,20 @@ void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
}
-// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
-// as it needs 64 bit instructions.
void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
- // See intrinsics.h.
- if (!kRoundIsPlusPointFive) {
- return;
- }
-
// Do we have instruction support?
if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
+ HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+ DCHECK(static_or_direct != nullptr);
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresFpuRegister());
+ if (static_or_direct->HasSpecialInput() &&
+ invoke->InputAt(
+ static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
locations->SetOut(Location::RequiresRegister());
locations->AddTemp(Location::RequiresFpuRegister());
locations->AddTemp(Location::RequiresFpuRegister());
@@ -774,7 +873,7 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
// We have to fall back to a call to the intrinsic.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall);
+ LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
locations->SetOut(Location::RegisterLocation(EAX));
@@ -784,54 +883,61 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
LocationSummary* locations = invoke->GetLocations();
- if (locations->WillCall()) {
+ if (locations->WillCall()) { // TODO: can we reach this?
InvokeOutOfLineIntrinsic(codegen_, invoke);
return;
}
- // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
Register out = locations->Out().AsRegister<Register>();
- XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
- NearLabel done, nan;
+ NearLabel skip_incr, done;
X86Assembler* assembler = GetAssembler();
- // Generate 0.5 into inPlusPointFive.
- __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
- __ movd(inPlusPointFive, out);
-
- // Add in the input.
- __ addss(inPlusPointFive, in);
-
- // And truncate to an integer.
- __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
-
+ // Since no direct x86 rounding instruction matches the required semantics,
+ // this intrinsic is implemented as follows:
+ // result = floor(in);
+ // if (in - result >= 0.5f)
+ // result = result + 1.0f;
+ __ movss(t2, in);
+ __ roundss(t1, in, Immediate(1));
+ __ subss(t2, t1);
+ if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
+ // Direct constant area available.
+ Register constant_area = locations->InAt(1).AsRegister<Register>();
+ __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area));
+ __ j(kBelow, &skip_incr);
+ __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area));
+ __ Bind(&skip_incr);
+ } else {
+ // No constant area: go through stack.
+ __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
+ __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
+ __ comiss(t2, Address(ESP, 4));
+ __ j(kBelow, &skip_incr);
+ __ addss(t1, Address(ESP, 0));
+ __ Bind(&skip_incr);
+ __ addl(ESP, Immediate(8));
+ }
+
+ // Final conversion to an integer. Unfortunately this also does not have a
+ // direct x86 instruction, since NaN should map to 0 and large positive
+ // values need to be clipped to the extreme value.
__ movl(out, Immediate(kPrimIntMax));
- // maxInt = int-to-float(out)
- __ cvtsi2ss(maxInt, out);
-
- // if inPlusPointFive >= maxInt goto done
- __ comiss(inPlusPointFive, maxInt);
- __ j(kAboveEqual, &done);
-
- // if input == NaN goto nan
- __ j(kUnordered, &nan);
-
- // output = float-to-int-truncate(input)
- __ cvttss2si(out, inPlusPointFive);
- __ jmp(&done);
- __ Bind(&nan);
-
- // output = 0
- __ xorl(out, out);
+ __ cvtsi2ss(t2, out);
+ __ comiss(t1, t2);
+ __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
+ __ movl(out, Immediate(0)); // does not change flags
+ __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
+ __ cvttss2si(out, t1);
__ Bind(&done);
}
static void CreateFPToFPCallLocations(ArenaAllocator* arena,
HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -857,7 +963,7 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntry
}
// Now do the actual call.
- __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(entry)));
+ __ fs()->call(Address::Absolute(GetThreadOffset<kX86PointerSize>(entry)));
// Extract the return value from the FP stack.
__ fstpl(Address(ESP, 0));
@@ -985,7 +1091,7 @@ void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -1216,7 +1322,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
// The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1237,7 +1343,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pStringCompareTo)));
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pStringCompareTo)));
__ Bind(slow_path->GetExitLabel());
}
@@ -1490,7 +1596,7 @@ void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1510,7 +1616,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes)));
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromBytes)));
CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
@@ -1518,7 +1624,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1536,14 +1642,14 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
//
// all include a null check on `data` before calling that method.
- __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars)));
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromChars)));
CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1560,7 +1666,8 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke)
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString)));
+ __ fs()->call(
+ Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromString)));
CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
@@ -1801,7 +1908,7 @@ void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
Register out = invoke->GetLocations()->Out().AsRegister<Register>();
- GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
+ GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
}
static void GenUnsafeGet(HInvoke* invoke,
@@ -2670,9 +2777,9 @@ static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1)
}
void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2702,9 +2809,9 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
X86Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2713,17 +2820,21 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = locations->InAt(0).AsRegister<Register>();
Location src_pos = locations->InAt(1);
Register dest = locations->InAt(2).AsRegister<Register>();
Location dest_pos = locations->InAt(3);
- Location length = locations->InAt(4);
- Register temp1 = locations->GetTemp(0).AsRegister<Register>();
- Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Location length_arg = locations->InAt(4);
+ Location length = length_arg;
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
NearLabel conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -2739,7 +2850,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmpl(src, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2747,7 +2858,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, &conditions_on_positions_validated);
}
__ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
- __ j(kGreater, slow_path->GetEntryLabel());
+ __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2757,10 +2868,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
__ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
} else {
__ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
}
@@ -2769,16 +2880,17 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
__ testl(src, src);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
__ testl(dest, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
- Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+ Location temp3_loc = locations->GetTemp(2);
+ Register temp3 = temp3_loc.AsRegister<Register>();
if (length.IsStackSlot()) {
__ movl(temp3, Address(ESP, length.GetStackIndex()));
length = Location::RegisterLocation(temp3);
@@ -2790,7 +2902,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
// Validity checks: source.
@@ -2798,7 +2910,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -2807,7 +2919,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -2816,72 +2928,159 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
+
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ testl(temp1, temp1);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
__ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(dest, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp2 = temp1->component_type_
- __ movl(temp2, Address(temp1, component_offset));
- __ testl(temp2, temp2);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp2);
- __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- // Re-poison the heap reference to make the compare instruction below
- // compare two poisoned references.
- __ PoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (length.Equals(Location::RegisterLocation(temp3))) {
+ // When Baker read barriers are enabled, register `temp3`,
+ // which in the present case contains the `length` parameter,
+ // will be overwritten below. Make the `length` location
+ // reference the original stack location; it will be moved
+ // back to `temp3` later if necessary.
+ DCHECK(length_arg.IsStackSlot());
+ length = length_arg;
+ }
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(temp2, temp2);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ cmpl(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ NearLabel do_copy;
+ __ j(kEqual, &do_copy);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
} else {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(dest, class_offset));
- }
+ // Non read barrier code.
- // Note: if poisoning is on, we are here comparing two poisoned references.
- __ cmpl(temp1, Address(src, class_offset));
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ movl(temp1, Address(dest, class_offset));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ movl(temp2, Address(temp1, component_offset));
+ __ testl(temp2, temp2);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp2);
+ __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ // Re-poison the heap reference to make the compare instruction below
+ // compare two poisoned references.
+ __ PoisonHeapReference(temp1);
+ }
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- NearLabel do_copy;
- __ j(kEqual, &do_copy);
+ // Note: if heap poisoning is on, we are comparing two poisoned references here.
+ __ cmpl(temp1, Address(src, class_offset));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ NearLabel do_copy;
+ __ j(kEqual, &do_copy);
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
+ }
+ } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+ // Bail out if the source is not a non primitive array.
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
__ MaybeUnpoisonHeapReference(temp1);
// /* HeapReference<Class> */ temp1 = temp1->component_type_
__ movl(temp1, Address(temp1, component_offset));
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
__ MaybeUnpoisonHeapReference(temp1);
- __ cmpl(Address(temp1, super_offset), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
}
- } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ testl(temp1, temp1);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp1);
__ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- // Compute base source address, base destination address, and end source address.
+ // Compute the base source address in `temp1`.
int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
DCHECK_EQ(element_size, 4);
uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
@@ -2892,35 +3091,136 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
}
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp2, Address(dest, element_size * constant + offset));
- } else {
- __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // If it is needed (in the case of the fast-path loop), the base
+ // destination address is computed later, as `temp2` is used for
+ // intermediate computations.
- if (length.IsConstant()) {
- int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp3, Address(temp1, element_size * constant));
+ // Compute the end source address in `temp3`.
+ if (length.IsConstant()) {
+ int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp3, Address(temp1, element_size * constant));
+ } else {
+ if (length.IsStackSlot()) {
+ // Location `length` is again pointing at a stack slot, as
+ // register `temp3` (which was containing the length parameter
+ // earlier) has been overwritten; restore it now
+ DCHECK(length.Equals(length_arg));
+ __ movl(temp3, Address(ESP, length.GetStackIndex()));
+ length = Location::RegisterLocation(temp3);
+ }
+ __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+ }
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // for (size_t i = 0; i != length; ++i) {
+ // dest_array[dest_pos + i] =
+ // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
+ // }
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ NearLabel loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ movl(temp2, Address(src, monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86 memory model.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp2, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+
+ // Set the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp2, Address(dest, element_size * constant + offset));
+ } else {
+ __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ pushl(Address(temp1, 0));
+ __ cfi().AdjustCFAOffset(4);
+ __ popl(Address(temp2, 0));
+ __ cfi().AdjustCFAOffset(-4);
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
} else {
- __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
- }
-
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- NearLabel loop, done;
- __ cmpl(temp1, temp3);
- __ j(kEqual, &done);
- __ Bind(&loop);
- __ pushl(Address(temp1, 0));
- __ cfi().AdjustCFAOffset(4);
- __ popl(Address(temp2, 0));
- __ cfi().AdjustCFAOffset(-4);
- __ addl(temp1, Immediate(element_size));
- __ addl(temp2, Immediate(element_size));
- __ cmpl(temp1, temp3);
- __ j(kNotEqual, &loop);
- __ Bind(&done);
+ // Non read barrier code.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp2, Address(dest, element_size * constant + offset));
+ } else {
+ __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+ }
+
+ // Compute the end source address in `temp3`.
+ if (length.IsConstant()) {
+ int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp3, Address(temp1, element_size * constant));
+ } else {
+ __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ NearLabel loop, done;
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+ __ Bind(&loop);
+ __ pushl(Address(temp1, 0));
+ __ cfi().AdjustCFAOffset(4);
+ __ popl(Address(temp2, 0));
+ __ cfi().AdjustCFAOffset(-4);
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -2929,7 +3229,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
Register(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 891aaf5ff9..311e1cd6eb 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -64,6 +64,65 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+ CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
+ CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
+
+ __ Bind(GetEntryLabel());
+ NearLabel loop;
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ // TODO: Inline the mark bit check before calling the runtime?
+ // TMP = ReadBarrier::Mark(TMP);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
+ // This runtime call does not require a stack map.
+ x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(CpuRegister(TMP));
+ __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
+ __ addl(src_curr_addr, Immediate(element_size));
+ __ addl(dst_curr_addr, Immediate(element_size));
+ __ cmpl(src_curr_addr, src_stop_addr);
+ __ j(kNotEqual, &loop);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
+};
+
+#undef __
+
#define __ assembler->
static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -526,7 +585,7 @@ static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
// We have to fall back to a call to the intrinsic.
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kCall);
+ LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
locations->SetOut(Location::FpuRegisterLocation(XMM0));
@@ -583,12 +642,13 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresRegister());
locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
return;
}
// We have to fall back to a call to the intrinsic.
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kCall);
+ LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
locations->SetOut(Location::RegisterLocation(RAX));
@@ -597,10 +657,7 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
}
void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
- // See intrinsics.h.
- if (kRoundIsPlusPointFive) {
- CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
- }
+ CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
@@ -610,47 +667,41 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
return;
}
- // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- NearLabel done, nan;
+ XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ NearLabel skip_incr, done;
X86_64Assembler* assembler = GetAssembler();
- // Load 0.5 into inPlusPointFive.
- __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
-
- // Add in the input.
- __ addss(inPlusPointFive, in);
-
- // And truncate to an integer.
- __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
-
- // Load maxInt into out.
- codegen_->Load64BitValue(out, kPrimIntMax);
-
- // if inPlusPointFive >= maxInt goto done
- __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
- __ j(kAboveEqual, &done);
-
- // if input == NaN goto nan
- __ j(kUnordered, &nan);
-
- // output = float-to-int-truncate(input)
- __ cvttss2si(out, inPlusPointFive);
- __ jmp(&done);
- __ Bind(&nan);
-
- // output = 0
- __ xorl(out, out);
+ // Since no direct x86 rounding instruction matches the required semantics,
+ // this intrinsic is implemented as follows:
+ // result = floor(in);
+ // if (in - result >= 0.5f)
+ // result = result + 1.0f;
+ __ movss(t2, in);
+ __ roundss(t1, in, Immediate(1));
+ __ subss(t2, t1);
+ __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
+ __ j(kBelow, &skip_incr);
+ __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
+ __ Bind(&skip_incr);
+
+ // Final conversion to an integer. Unfortunately this also does not have a
+ // direct x86 instruction, since NaN should map to 0 and large positive
+ // values need to be clipped to the extreme value.
+ codegen_->Load32BitValue(out, kPrimIntMax);
+ __ cvtsi2ss(t2, out);
+ __ comiss(t1, t2);
+ __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
+ __ movl(out, Immediate(0)); // does not change flags
+ __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
+ __ cvttss2si(out, t1);
__ Bind(&done);
}
void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
- // See intrinsics.h.
- if (kRoundIsPlusPointFive) {
- CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
- }
+ CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
@@ -660,46 +711,43 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
return;
}
- // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- NearLabel done, nan;
+ XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ NearLabel skip_incr, done;
X86_64Assembler* assembler = GetAssembler();
- // Load 0.5 into inPlusPointFive.
- __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
-
- // Add in the input.
- __ addsd(inPlusPointFive, in);
-
- // And truncate to an integer.
- __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
-
- // Load maxLong into out.
+ // Since no direct x86 rounding instruction matches the required semantics,
+ // this intrinsic is implemented as follows:
+ // result = floor(in);
+ // if (in - result >= 0.5)
+ // result = result + 1.0f;
+ __ movsd(t2, in);
+ __ roundsd(t1, in, Immediate(1));
+ __ subsd(t2, t1);
+ __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
+ __ j(kBelow, &skip_incr);
+ __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
+ __ Bind(&skip_incr);
+
+ // Final conversion to an integer. Unfortunately this also does not have a
+ // direct x86 instruction, since NaN should map to 0 and large positive
+ // values need to be clipped to the extreme value.
codegen_->Load64BitValue(out, kPrimLongMax);
-
- // if inPlusPointFive >= maxLong goto done
- __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
- __ j(kAboveEqual, &done);
-
- // if input == NaN goto nan
- __ j(kUnordered, &nan);
-
- // output = double-to-long-truncate(input)
- __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true);
- __ jmp(&done);
- __ Bind(&nan);
-
- // output = 0
- __ xorl(out, out);
+ __ cvtsi2sd(t2, out, /* is64bit */ true);
+ __ comisd(t1, t2);
+ __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
+ __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit
+ __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
+ __ cvttsd2si(out, t1, /* is64bit */ true);
__ Bind(&done);
}
static void CreateFPToFPCallLocations(ArenaAllocator* arena,
HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -720,7 +768,7 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
DCHECK(invoke->IsInvokeStaticOrDirect());
X86_64Assembler* assembler = codegen->GetAssembler();
- __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64WordSize>(entry), true));
+ __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64PointerSize>(entry), true));
codegen->RecordPcInfo(invoke, invoke->GetDexPc());
}
@@ -839,7 +887,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
@@ -1064,9 +1112,9 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1074,9 +1122,9 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
X86_64Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1085,18 +1133,23 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
Location src_pos = locations->InAt(1);
CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
- CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
- CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
- CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
+ Location temp1_loc = locations->GetTemp(0);
+ CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
+ Location temp2_loc = locations->GetTemp(1);
+ CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+ Location temp3_loc = locations->GetTemp(2);
+ CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
+ Location TMP_loc = Location::RegisterLocation(TMP);
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
NearLabel conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -1112,7 +1165,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmpl(src, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1120,7 +1173,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, &conditions_on_positions_validated);
}
__ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
- __ j(kGreater, slow_path->GetEntryLabel());
+ __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1130,10 +1183,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
__ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
} else {
__ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
}
@@ -1142,13 +1195,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
__ testl(src, src);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
__ testl(dest, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
// If the length is negative, bail out.
@@ -1157,7 +1210,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
// Validity checks: source.
@@ -1165,7 +1218,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -1174,7 +1227,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -1183,38 +1236,80 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ movl(temp1, Address(dest, class_offset));
- __ movl(temp2, Address(src, class_offset));
+
bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- __ MaybeUnpoisonHeapReference(temp1);
- __ MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, temp3_loc, /* needs_null_check */ false);
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // If heap poisoning is enabled, `temp1` and `temp2` have been
+ // unpoisoned by the the previous calls to
+ // GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ movl(temp1, Address(dest, class_offset));
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ movl(temp2, Address(src, class_offset));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
}
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ TMP = temp1->component_type_
- __ movl(CpuRegister(TMP), Address(temp1, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `TMP` has been unpoisoned by
+ // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ TMP = temp2->component_type_
- __ movl(CpuRegister(TMP), Address(temp2, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ TMP = temp2->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp2, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `TMP` has been unpoisoned by
+ // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ TMP = temp2->component_type_
+ __ movl(CpuRegister(TMP), Address(temp2, component_offset));
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
__ cmpl(temp1, temp2);
@@ -1222,34 +1317,56 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (optimizations.GetDestinationIsTypedObjectArray()) {
NearLabel do_copy;
__ j(kEqual, &do_copy);
- if (!did_unpoison) {
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ } else {
+ if (!did_unpoison) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
__ MaybeUnpoisonHeapReference(temp1);
+ // No need to unpoison the following heap reference load, as
+ // we're comparing against null.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ movl(temp1, Address(temp1, super_offset));
- // No need to unpoison the result, we're comparing against null.
- __ testl(temp1, temp1);
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
__ Bind(&do_copy);
} else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ TMP = temp1->component_type_
- __ movl(CpuRegister(TMP), Address(temp1, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+ // No need to unpoison `TMP` now, as we're comparing against null.
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
// Compute base source address, base destination address, and end source address.
@@ -1277,19 +1394,86 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
}
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- NearLabel loop, done;
- __ cmpl(temp1, temp3);
- __ j(kEqual, &done);
- __ Bind(&loop);
- __ movl(CpuRegister(TMP), Address(temp1, 0));
- __ movl(Address(temp2, 0), CpuRegister(TMP));
- __ addl(temp1, Immediate(element_size));
- __ addl(temp2, Immediate(element_size));
- __ cmpl(temp1, temp3);
- __ j(kNotEqual, &loop);
- __ Bind(&done);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ NearLabel loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ movl(CpuRegister(TMP), Address(src, monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86-64 memory model.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(CpuRegister(TMP), Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(temp1, 0));
+ __ movl(Address(temp2, 0), CpuRegister(TMP));
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ NearLabel loop, done;
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(temp1, 0));
+ __ movl(Address(temp2, 0), CpuRegister(TMP));
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -1298,12 +1482,12 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
CpuRegister(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1324,7 +1508,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo),
+ __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, pStringCompareTo),
/* no_rip */ true));
__ Bind(slow_path->GetExitLabel());
}
@@ -1577,7 +1761,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1597,7 +1781,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes),
+ __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
+ pAllocStringFromBytes),
/* no_rip */ true));
CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1606,7 +1791,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke
void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainOnly,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1624,7 +1809,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke
// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
//
// all include a null check on `data` before calling that method.
- __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars),
+ __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
+ pAllocStringFromChars),
/* no_rip */ true));
CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1632,7 +1818,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke
void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1649,7 +1835,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invok
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString),
+ __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
+ pAllocStringFromString),
/* no_rip */ true));
CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1875,7 +2062,7 @@ void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke)
void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
- GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(),
+ GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
/* no_rip */ true));
}
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 3f27c911be..5fdfb9b6ca 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -376,6 +376,10 @@ class Location : public ValueObject {
return PolicyField::Decode(GetPayload());
}
+ bool RequiresRegisterKind() const {
+ return GetPolicy() == kRequiresRegister || GetPolicy() == kRequiresFpuRegister;
+ }
+
uintptr_t GetEncoding() const {
return GetPayload();
}
@@ -480,8 +484,9 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> {
public:
enum CallKind {
kNoCall,
+ kCallOnMainAndSlowPath,
kCallOnSlowPath,
- kCall
+ kCallOnMainOnly
};
LocationSummary(HInstruction* instruction,
@@ -540,10 +545,29 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> {
Location Out() const { return output_; }
- bool CanCall() const { return call_kind_ != kNoCall; }
- bool WillCall() const { return call_kind_ == kCall; }
- bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; }
- bool NeedsSafepoint() const { return CanCall(); }
+ bool CanCall() const {
+ return call_kind_ != kNoCall;
+ }
+
+ bool WillCall() const {
+ return call_kind_ == kCallOnMainOnly || call_kind_ == kCallOnMainAndSlowPath;
+ }
+
+ bool CallsOnSlowPath() const {
+ return call_kind_ == kCallOnSlowPath || call_kind_ == kCallOnMainAndSlowPath;
+ }
+
+ bool OnlyCallsOnSlowPath() const {
+ return call_kind_ == kCallOnSlowPath;
+ }
+
+ bool CallsOnMainAndSlowPath() const {
+ return call_kind_ == kCallOnMainAndSlowPath;
+ }
+
+ bool NeedsSafepoint() const {
+ return CanCall();
+ }
void SetStackBit(uint32_t index) {
stack_mask_->SetBit(index);
@@ -629,8 +653,7 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> {
// Whether these are locations for an intrinsified call.
bool intrinsified_;
- ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
- ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
+ friend class RegisterAllocatorTest;
DISALLOW_COPY_AND_ASSIGN(LocationSummary);
};
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d557f42968..2808e1b5fc 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2632,4 +2632,23 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) {
}
}
+std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind) {
+ switch (kind) {
+ case MemBarrierKind::kAnyStore:
+ return os << "AnyStore";
+ case MemBarrierKind::kLoadAny:
+ return os << "LoadAny";
+ case MemBarrierKind::kStoreStore:
+ return os << "StoreStore";
+ case MemBarrierKind::kAnyAny:
+ return os << "AnyAny";
+ case MemBarrierKind::kNTStoreStore:
+ return os << "NTStoreStore";
+
+ default:
+ LOG(FATAL) << "Unknown MemBarrierKind: " << static_cast<int>(kind);
+ UNREACHABLE();
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0f0ef26ea9..dfa8276651 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -25,7 +25,6 @@
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/stl_util.h"
-#include "dex/compiler_enums.h"
#include "dex_file.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "handle.h"
@@ -1289,7 +1288,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
#else
#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \
M(BitwiseNegatedRight, Instruction) \
- M(MultiplyAccumulate, Instruction)
+ M(MultiplyAccumulate, Instruction) \
+ M(IntermediateAddress, Instruction)
#endif
#ifndef ART_ENABLE_CODEGEN_arm
@@ -1303,8 +1303,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
#else
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \
- M(Arm64DataProcWithShifterOp, Instruction) \
- M(Arm64IntermediateAddress, Instruction)
+ M(Arm64DataProcWithShifterOp, Instruction)
#endif
#ifndef ART_ENABLE_CODEGEN_mips
@@ -5626,9 +5625,12 @@ inline uint32_t HLoadClass::GetDexCacheElementOffset() const {
// Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
- // The special input is used for PC-relative loads on some architectures.
+ // The special input is used for PC-relative loads on some architectures,
+ // including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
- GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind();
+ GetLoadKind() == LoadKind::kDexCachePcRelative ||
+ GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
+ GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
DCHECK(special_input_.GetInstruction() == nullptr);
special_input_ = HUserRecord<HInstruction*>(special_input);
special_input->AddUseAt(this, 0);
@@ -5836,9 +5838,12 @@ inline uint32_t HLoadString::GetDexCacheElementOffset() const {
// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
- // The special input is used for PC-relative loads on some architectures.
+ // The special input is used for PC-relative loads on some architectures,
+ // including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
- GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind();
+ GetLoadKind() == LoadKind::kDexCachePcRelative ||
+ GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
+ GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
// HLoadString::GetInputRecords() returns an empty array at this point,
// so use the GetInputRecords() from the base class to set the input record.
DCHECK(special_input_.GetInstruction() == nullptr);
@@ -6305,6 +6310,32 @@ class HCheckCast FINAL : public HTemplateInstruction<2> {
DISALLOW_COPY_AND_ASSIGN(HCheckCast);
};
+/**
+ * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
+ * @details We define the combined barrier types that are actually required
+ * by the Java Memory Model, rather than using exactly the terminology from
+ * the JSR-133 cookbook. These should, in many cases, be replaced by acquire/release
+ * primitives. Note that the JSR-133 cookbook generally does not deal with
+ * store atomicity issues, and the recipes there are not always entirely sufficient.
+ * The current recipe is as follows:
+ * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store.
+ * -# Use AnyAny barrier after volatile store. (StoreLoad is as expensive.)
+ * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load.
+ * -# Use StoreStore barrier after all stores but before return from any constructor whose
+ * class has final fields.
+ * -# Use NTStoreStore to order non-temporal stores with respect to all later
+ * store-to-memory instructions. Only generated together with non-temporal stores.
+ */
+enum MemBarrierKind {
+ kAnyStore,
+ kLoadAny,
+ kStoreStore,
+ kAnyAny,
+ kNTStoreStore,
+ kLastBarrierKind = kNTStoreStore
+};
+std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
+
class HMemoryBarrier FINAL : public HTemplateInstruction<0> {
public:
explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc)
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 06b073c3e2..3f88717c2a 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -94,32 +94,6 @@ class HArm64DataProcWithShifterOp FINAL : public HExpression<2> {
std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
-// This instruction computes an intermediate address pointing in the 'middle' of an object. The
-// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
-// never used across anything that can trigger GC.
-class HArm64IntermediateAddress FINAL : public HExpression<2> {
- public:
- HArm64IntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
- : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
- SetRawInputAt(0, base_address);
- SetRawInputAt(1, offset);
- }
-
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
- }
- bool IsActualObject() const OVERRIDE { return false; }
-
- HInstruction* GetBaseAddress() const { return InputAt(0); }
- HInstruction* GetOffset() const { return InputAt(1); }
-
- DECLARE_INSTRUCTION(Arm64IntermediateAddress);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
-};
-
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index f2d5cf3253..8bd8667f84 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -113,6 +113,34 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation {
DISALLOW_COPY_AND_ASSIGN(HBitwiseNegatedRight);
};
+
+// This instruction computes an intermediate address pointing in the 'middle' of an object. The
+// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
+// never used across anything that can trigger GC.
+class HIntermediateAddress FINAL : public HExpression<2> {
+ public:
+ HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
+ : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
+ SetRawInputAt(0, base_address);
+ SetRawInputAt(1, offset);
+ }
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ return true;
+ }
+ bool IsActualObject() const OVERRIDE { return false; }
+
+ HInstruction* GetBaseAddress() const { return InputAt(0); }
+ HInstruction* GetOffset() const { return InputAt(1); }
+
+ DECLARE_INSTRUCTION(IntermediateAddress);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
+};
+
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 2f59d4cd5b..0819fb01ac 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -37,7 +37,10 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> {
virtual ~HOptimization() {}
- // Return the name of the pass.
+ // Return the name of the pass. Pass names for a single HOptimization should be of form
+ // <optimization_name> or <optimization_name>$<pass_name> for common <optimization_name> prefix.
+ // Example: 'instruction_simplifier', 'instruction_simplifier$after_bce',
+ // 'instruction_simplifier$before_codegen'.
const char* GetPassName() const { return pass_name_; }
// Perform the analysis itself.
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index a6d234d739..8c0231e1aa 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -157,13 +157,26 @@ class OptimizingCFITest : public CFITest {
TestImpl(isa, #isa, expected_asm, expected_cfi); \
}
+#ifdef ART_ENABLE_CODEGEN_arm
TEST_ISA(kThumb2)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
TEST_ISA(kArm64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
TEST_ISA(kX86)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
TEST_ISA(kX86_64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
TEST_ISA(kMips)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
TEST_ISA(kMips64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm
TEST_F(OptimizingCFITest, kThumb2Adjust) {
std::vector<uint8_t> expected_asm(
expected_asm_kThumb2_adjust,
@@ -184,7 +197,9 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) {
Finish();
Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi);
}
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
TEST_F(OptimizingCFITest, kMipsAdjust) {
// One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
static constexpr size_t kNumNops = 1u + (1u << 15);
@@ -212,7 +227,9 @@ TEST_F(OptimizingCFITest, kMipsAdjust) {
Finish();
Check(kMips, "kMips_adjust", expected_asm, expected_cfi);
}
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
TEST_F(OptimizingCFITest, kMips64Adjust) {
// One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
static constexpr size_t kNumNops = 1u + (1u << 15);
@@ -240,6 +257,7 @@ TEST_F(OptimizingCFITest, kMips64Adjust) {
Finish();
Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi);
}
+#endif
#endif // ART_TARGET_ANDROID
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d703b0f94f..a1da20bae4 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -37,6 +37,10 @@
#include "pc_relative_fixups_x86.h"
#endif
+#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
+#include "x86_memory_gen.h"
+#endif
+
#include "art_method-inl.h"
#include "base/arena_allocator.h"
#include "base/arena_containers.h"
@@ -77,7 +81,7 @@
#include "oat_quick_method_header.h"
#include "prepare_for_register_allocation.h"
#include "reference_type_propagation.h"
-#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
#include "select_generator.h"
#include "sharpening.h"
#include "side_effects_analysis.h"
@@ -91,6 +95,8 @@ namespace art {
static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB;
+static constexpr const char* kPassNameSeparator = "$";
+
/**
* Used by the code generator, to allocate the code in a vector.
*/
@@ -174,6 +180,7 @@ class PassObserver : public ValueObject {
private:
void StartPass(const char* pass_name) {
+ VLOG(compiler) << "Starting pass: " << pass_name;
// Dump graph first, then start timer.
if (visualizer_enabled_) {
visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_);
@@ -262,7 +269,7 @@ class PassScope : public ValueObject {
class OptimizingCompiler FINAL : public Compiler {
public:
explicit OptimizingCompiler(CompilerDriver* driver);
- ~OptimizingCompiler();
+ ~OptimizingCompiler() OVERRIDE;
bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const OVERRIDE;
@@ -277,8 +284,13 @@ class OptimizingCompiler FINAL : public Compiler {
CompiledMethod* JniCompile(uint32_t access_flags,
uint32_t method_idx,
- const DexFile& dex_file) const OVERRIDE {
- return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
+ const DexFile& dex_file,
+ JniOptimizationFlags optimization_flags) const OVERRIDE {
+ return ArtQuickJniCompileMethod(GetCompilerDriver(),
+ access_flags,
+ method_idx,
+ dex_file,
+ optimization_flags);
}
uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE
@@ -302,6 +314,18 @@ class OptimizingCompiler FINAL : public Compiler {
SHARED_REQUIRES(Locks::mutator_lock_);
private:
+ void RunOptimizations(HGraph* graph,
+ CodeGenerator* codegen,
+ CompilerDriver* driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ PassObserver* pass_observer,
+ StackHandleScopeCollection* handles) const;
+
+ void RunOptimizations(HOptimization* optimizations[],
+ size_t length,
+ PassObserver* pass_observer) const;
+
+ private:
// Create a 'CompiledMethod' for an optimized graph.
CompiledMethod* Emit(ArenaAllocator* arena,
CodeVectorAllocator* code_allocator,
@@ -329,6 +353,18 @@ class OptimizingCompiler FINAL : public Compiler {
ArtMethod* method,
bool osr) const;
+ void MaybeRunInliner(HGraph* graph,
+ CodeGenerator* codegen,
+ CompilerDriver* driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ PassObserver* pass_observer,
+ StackHandleScopeCollection* handles) const;
+
+ void RunArchOptimizations(InstructionSet instruction_set,
+ HGraph* graph,
+ CodeGenerator* codegen,
+ PassObserver* pass_observer) const;
+
std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
std::unique_ptr<std::ostream> visualizer_output_;
@@ -392,22 +428,143 @@ static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
|| instruction_set == kX86_64;
}
-static void RunOptimizations(HOptimization* optimizations[],
- size_t length,
- PassObserver* pass_observer) {
+static HOptimization* BuildOptimization(
+ const std::string& opt_name,
+ ArenaAllocator* arena,
+ HGraph* graph,
+ OptimizingCompilerStats* stats,
+ CodeGenerator* codegen,
+ CompilerDriver* driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ StackHandleScopeCollection* handles,
+ SideEffectsAnalysis* most_recent_side_effects,
+ HInductionVarAnalysis* most_recent_induction) {
+ if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
+ CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
+ return new (arena) BoundsCheckElimination(graph,
+ *most_recent_side_effects,
+ most_recent_induction);
+ } else if (opt_name == GVNOptimization::kGlobalValueNumberingPassName) {
+ CHECK(most_recent_side_effects != nullptr);
+ return new (arena) GVNOptimization(graph, *most_recent_side_effects);
+ } else if (opt_name == HConstantFolding::kConstantFoldingPassName) {
+ return new (arena) HConstantFolding(graph);
+ } else if (opt_name == HDeadCodeElimination::kDeadCodeEliminationPassName) {
+ return new (arena) HDeadCodeElimination(graph, stats);
+ } else if (opt_name == HInliner::kInlinerPassName) {
+ size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
+ return new (arena) HInliner(graph, // outer_graph
+ graph, // outermost_graph
+ codegen,
+ dex_compilation_unit, // outer_compilation_unit
+ dex_compilation_unit, // outermost_compilation_unit
+ driver,
+ handles,
+ stats,
+ number_of_dex_registers,
+ /* depth */ 0);
+ } else if (opt_name == HSharpening::kSharpeningPassName) {
+ return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
+ } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
+ return new (arena) HSelectGenerator(graph, stats);
+ } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
+ return new (arena) HInductionVarAnalysis(graph);
+ } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
+ return new (arena) InstructionSimplifier(graph, stats);
+ } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
+ return new (arena) IntrinsicsRecognizer(graph, driver, stats);
+ } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
+ CHECK(most_recent_side_effects != nullptr);
+ return new (arena) LICM(graph, *most_recent_side_effects, stats);
+ } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
+ CHECK(most_recent_side_effects != nullptr);
+ return new (arena) LoadStoreElimination(graph, *most_recent_side_effects);
+ } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
+ return new (arena) SideEffectsAnalysis(graph);
+#ifdef ART_ENABLE_CODEGEN_arm
+ } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
+ return new (arena) arm::DexCacheArrayFixups(graph, stats);
+ } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) {
+ return new (arena) arm::InstructionSimplifierArm(graph, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+ } else if (opt_name == arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName) {
+ return new (arena) arm64::InstructionSimplifierArm64(graph, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+ } else if (opt_name == mips::DexCacheArrayFixups::kDexCacheArrayFixupsMipsPassName) {
+ return new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
+ } else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) {
+ return new (arena) mips::PcRelativeFixups(graph, codegen, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+ } else if (opt_name == x86::PcRelativeFixups::kPcRelativeFixupsX86PassName) {
+ return new (arena) x86::PcRelativeFixups(graph, codegen, stats);
+ } else if (opt_name == x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName) {
+ return new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
+#endif
+ }
+ return nullptr;
+}
+
+static ArenaVector<HOptimization*> BuildOptimizations(
+ const std::vector<std::string>& pass_names,
+ ArenaAllocator* arena,
+ HGraph* graph,
+ OptimizingCompilerStats* stats,
+ CodeGenerator* codegen,
+ CompilerDriver* driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ StackHandleScopeCollection* handles) {
+ // Few HOptimizations constructors require SideEffectsAnalysis or HInductionVarAnalysis
+ // instances. This method assumes that each of them expects the nearest instance preceeding it
+ // in the pass name list.
+ SideEffectsAnalysis* most_recent_side_effects = nullptr;
+ HInductionVarAnalysis* most_recent_induction = nullptr;
+ ArenaVector<HOptimization*> ret(arena->Adapter());
+ for (std::string pass_name : pass_names) {
+ size_t pos = pass_name.find(kPassNameSeparator); // Strip suffix to get base pass name.
+ std::string opt_name = pos == std::string::npos ? pass_name : pass_name.substr(0, pos);
+
+ HOptimization* opt = BuildOptimization(
+ opt_name,
+ arena,
+ graph,
+ stats,
+ codegen,
+ driver,
+ dex_compilation_unit,
+ handles,
+ most_recent_side_effects,
+ most_recent_induction);
+ CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\"";
+ ret.push_back(opt);
+
+ if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
+ most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt);
+ } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
+ most_recent_induction = down_cast<HInductionVarAnalysis*>(opt);
+ }
+ }
+ return ret;
+}
+
+void OptimizingCompiler::RunOptimizations(HOptimization* optimizations[],
+ size_t length,
+ PassObserver* pass_observer) const {
for (size_t i = 0; i < length; ++i) {
PassScope scope(optimizations[i]->GetPassName(), pass_observer);
optimizations[i]->Run();
}
}
-static void MaybeRunInliner(HGraph* graph,
- CodeGenerator* codegen,
- CompilerDriver* driver,
- OptimizingCompilerStats* stats,
- const DexCompilationUnit& dex_compilation_unit,
- PassObserver* pass_observer,
- StackHandleScopeCollection* handles) {
+void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
+ CodeGenerator* codegen,
+ CompilerDriver* driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ PassObserver* pass_observer,
+ StackHandleScopeCollection* handles) const {
+ OptimizingCompilerStats* stats = compilation_stats_.get();
const CompilerOptions& compiler_options = driver->GetCompilerOptions();
bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
&& (compiler_options.GetInlineMaxCodeUnits() > 0);
@@ -416,11 +573,11 @@ static void MaybeRunInliner(HGraph* graph,
}
size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
HInliner* inliner = new (graph->GetArena()) HInliner(
- graph,
- graph,
+ graph, // outer_graph
+ graph, // outermost_graph
codegen,
- dex_compilation_unit,
- dex_compilation_unit,
+ dex_compilation_unit, // outer_compilation_unit
+ dex_compilation_unit, // outermost_compilation_unit
driver,
handles,
stats,
@@ -431,11 +588,12 @@ static void MaybeRunInliner(HGraph* graph,
RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
}
-static void RunArchOptimizations(InstructionSet instruction_set,
- HGraph* graph,
- CodeGenerator* codegen,
- OptimizingCompilerStats* stats,
- PassObserver* pass_observer) {
+void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
+ HGraph* graph,
+ CodeGenerator* codegen,
+ PassObserver* pass_observer) const {
+ UNUSED(codegen); // To avoid compilation error when compiling for svelte
+ OptimizingCompilerStats* stats = compilation_stats_.get();
ArenaAllocator* arena = graph->GetArena();
switch (instruction_set) {
#ifdef ART_ENABLE_CODEGEN_arm
@@ -444,8 +602,12 @@ static void RunArchOptimizations(InstructionSet instruction_set,
arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
arm::InstructionSimplifierArm* simplifier =
new (arena) arm::InstructionSimplifierArm(graph, stats);
+ SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
+ GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
HOptimization* arm_optimizations[] = {
simplifier,
+ side_effects,
+ gvn,
fixups
};
RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
@@ -457,7 +619,7 @@ static void RunArchOptimizations(InstructionSet instruction_set,
arm64::InstructionSimplifierArm64* simplifier =
new (arena) arm64::InstructionSimplifierArm64(graph, stats);
SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
- GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch");
+ GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
HOptimization* arm64_optimizations[] = {
simplifier,
side_effects,
@@ -472,7 +634,7 @@ static void RunArchOptimizations(InstructionSet instruction_set,
mips::PcRelativeFixups* pc_relative_fixups =
new (arena) mips::PcRelativeFixups(graph, codegen, stats);
mips::DexCacheArrayFixups* dex_cache_array_fixups =
- new (arena) mips::DexCacheArrayFixups(graph, stats);
+ new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
HOptimization* mips_optimizations[] = {
pc_relative_fixups,
dex_cache_array_fixups
@@ -485,13 +647,27 @@ static void RunArchOptimizations(InstructionSet instruction_set,
case kX86: {
x86::PcRelativeFixups* pc_relative_fixups =
new (arena) x86::PcRelativeFixups(graph, codegen, stats);
+ x86::X86MemoryOperandGeneration* memory_gen =
+ new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
HOptimization* x86_optimizations[] = {
- pc_relative_fixups
+ pc_relative_fixups,
+ memory_gen
};
RunOptimizations(x86_optimizations, arraysize(x86_optimizations), pass_observer);
break;
}
#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+ case kX86_64: {
+ x86::X86MemoryOperandGeneration* memory_gen =
+ new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
+ HOptimization* x86_64_optimizations[] = {
+ memory_gen
+ };
+ RunOptimizations(x86_64_optimizations, arraysize(x86_64_optimizations), pass_observer);
+ break;
+ }
+#endif
default:
break;
}
@@ -500,7 +676,8 @@ static void RunArchOptimizations(InstructionSet instruction_set,
NO_INLINE // Avoid increasing caller's frame size by large stack-allocated objects.
static void AllocateRegisters(HGraph* graph,
CodeGenerator* codegen,
- PassObserver* pass_observer) {
+ PassObserver* pass_observer,
+ RegisterAllocator::Strategy strategy) {
{
PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName,
pass_observer);
@@ -513,27 +690,42 @@ static void AllocateRegisters(HGraph* graph,
}
{
PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
- RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+ RegisterAllocator::Create(graph->GetArena(), codegen, liveness, strategy)->AllocateRegisters();
}
}
-static void RunOptimizations(HGraph* graph,
- CodeGenerator* codegen,
- CompilerDriver* driver,
- OptimizingCompilerStats* stats,
- const DexCompilationUnit& dex_compilation_unit,
- PassObserver* pass_observer,
- StackHandleScopeCollection* handles) {
+void OptimizingCompiler::RunOptimizations(HGraph* graph,
+ CodeGenerator* codegen,
+ CompilerDriver* driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ PassObserver* pass_observer,
+ StackHandleScopeCollection* handles) const {
+ OptimizingCompilerStats* stats = compilation_stats_.get();
ArenaAllocator* arena = graph->GetArena();
+ if (driver->GetCompilerOptions().GetPassesToRun() != nullptr) {
+ ArenaVector<HOptimization*> optimizations = BuildOptimizations(
+ *driver->GetCompilerOptions().GetPassesToRun(),
+ arena,
+ graph,
+ stats,
+ codegen,
+ driver,
+ dex_compilation_unit,
+ handles);
+ RunOptimizations(&optimizations[0], optimizations.size(), pass_observer);
+ return;
+ }
+
HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
- graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
+ graph, stats, "dead_code_elimination$initial");
HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
- graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
+ graph, stats, "dead_code_elimination$final");
HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
- HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
- HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
+ HConstantFolding* fold2 = new (arena) HConstantFolding(
+ graph, "constant_folding$after_inlining");
+ HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
LICM* licm = new (arena) LICM(graph, *side_effects, stats);
@@ -542,9 +734,9 @@ static void RunOptimizations(HGraph* graph,
BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
- graph, stats, "instruction_simplifier_after_bce");
+ graph, stats, "instruction_simplifier$after_bce");
InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
- graph, stats, "instruction_simplifier_before_codegen");
+ graph, stats, "instruction_simplifier$before_codegen");
IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats);
HOptimization* optimizations1[] = {
@@ -556,7 +748,7 @@ static void RunOptimizations(HGraph* graph,
};
RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
- MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
+ MaybeRunInliner(graph, codegen, driver, dex_compilation_unit, pass_observer, handles);
HOptimization* optimizations2[] = {
// SelectGenerator depends on the InstructionSimplifier removing
@@ -579,8 +771,7 @@ static void RunOptimizations(HGraph* graph,
};
RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
- RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, stats, pass_observer);
- AllocateRegisters(graph, codegen, pass_observer);
+ RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, pass_observer);
}
static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
@@ -791,11 +982,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
RunOptimizations(graph,
codegen.get(),
compiler_driver,
- compilation_stats_.get(),
dex_compilation_unit,
&pass_observer,
&handles);
+ RegisterAllocator::Strategy regalloc_strategy =
+ compiler_options.GetRegisterAllocationStrategy();
+ AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy);
+
codegen->Compile(code_allocator);
pass_observer.DumpDisassembly();
}
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 9cc6ea45d0..c8d1ce0bd5 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -65,6 +65,7 @@ enum MethodCompilationStat {
kInlinedInvokeVirtualOrInterface,
kImplicitNullCheckGenerated,
kExplicitNullCheckGenerated,
+ kSimplifyIf,
kLastStat
};
@@ -143,6 +144,7 @@ class OptimizingCompilerStats {
case kInlinedInvokeVirtualOrInterface: name = "InlinedInvokeVirtualOrInterface"; break;
case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break;
case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
+ case kSimplifyIf: name = "SimplifyIf"; break;
case kLastStat:
LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index ba405cdb69..c6acc45581 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -37,6 +37,10 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
// entry block) and relieve some pressure on the register allocator
// while avoiding recalculation of the base in a loop.
base_->MoveBeforeFirstUserAndOutOfLoops();
+ // Computing the base for PC-relative literals will clobber RA with
+ // the NAL instruction on R2. Take a note of this before generating
+ // the method entry.
+ codegen_->ClobberRA();
}
}
@@ -58,6 +62,36 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
DCHECK(base_ != nullptr);
}
+ void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+ HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
+ switch (load_kind) {
+ case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+ case HLoadClass::LoadKind::kBootImageAddress:
+ case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+ // Add a base register for PC-relative literals on R2.
+ InitializePCRelativeBasePointer();
+ load_class->AddSpecialInput(base_);
+ break;
+ default:
+ break;
+ }
+ }
+
+ void VisitLoadString(HLoadString* load_string) OVERRIDE {
+ HLoadString::LoadKind load_kind = load_string->GetLoadKind();
+ switch (load_kind) {
+ case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+ case HLoadString::LoadKind::kBootImageAddress:
+ case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ // Add a base register for PC-relative literals on R2.
+ InitializePCRelativeBasePointer();
+ load_string->AddSpecialInput(base_);
+ break;
+ default:
+ break;
+ }
+ }
+
void HandleInvoke(HInvoke* invoke) {
// If this is an invoke-static/-direct with PC-relative dex cache array
// addressing, we need the PC-relative address base.
@@ -77,7 +111,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
// method pointer from the invoke.
if (invoke_static_or_direct->HasCurrentMethodInput()) {
DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
- CHECK(!has_extra_input); // TODO: review this.
+ CHECK(!has_extra_input);
return;
}
@@ -116,7 +150,6 @@ void PcRelativeFixups::Run() {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_);
if (mips_codegen->GetInstructionSetFeatures().IsR6()) {
// Do nothing for R6 because it has PC-relative addressing.
- // TODO: review. Move this check into RunArchOptimizations()?
return;
}
if (graph_->HasIrreducibleLoops()) {
diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h
index 1e8b071bb3..5a7397bf9d 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.h
+++ b/compiler/optimizing/pc_relative_fixups_mips.h
@@ -32,6 +32,8 @@ class PcRelativeFixups : public HOptimization {
: HOptimization(graph, "pc_relative_fixups_mips", stats),
codegen_(codegen) {}
+ static constexpr const char* kPcRelativeFixupsMipsPassName = "pc_relative_fixups_mips";
+
void Run() OVERRIDE;
private:
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 921f3dfff6..ad0921d7e6 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -227,6 +227,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
case Intrinsics::kMathMaxFloatFloat:
case Intrinsics::kMathMinDoubleDouble:
case Intrinsics::kMathMinFloatFloat:
+ case Intrinsics::kMathRoundFloat:
if (!base_added) {
DCHECK(invoke_static_or_direct != nullptr);
DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h
index 03de2fcece..72fa71ea94 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.h
+++ b/compiler/optimizing/pc_relative_fixups_x86.h
@@ -29,9 +29,11 @@ namespace x86 {
class PcRelativeFixups : public HOptimization {
public:
PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
- : HOptimization(graph, "pc_relative_fixups_x86", stats),
+ : HOptimization(graph, kPcRelativeFixupsX86PassName, stats),
codegen_(codegen) {}
+ static constexpr const char* kPcRelativeFixupsX86PassName = "pc_relative_fixups_x86";
+
void Run() OVERRIDE;
private:
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 965d5ee4f9..e96ab1918c 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -16,6 +16,7 @@
#include "reference_type_propagation.h"
+#include "base/enums.h"
#include "class_linker-inl.h"
#include "mirror/class-inl.h"
#include "mirror/dex_cache.h"
@@ -775,7 +776,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitInvoke(HInvoke* instr) {
ClassLinker* cl = Runtime::Current()->GetClassLinker();
mirror::DexCache* dex_cache =
FindDexCacheWithHint(soa.Self(), instr->GetDexFile(), hint_dex_cache_);
- size_t pointer_size = cl->GetImagePointerSize();
+ PointerSize pointer_size = cl->GetImagePointerSize();
ArtMethod* method = dex_cache->GetResolvedMethod(instr->GetDexMethodIndex(), pointer_size);
mirror::Class* klass = (method == nullptr) ? nullptr : method->GetReturnType(false, pointer_size);
SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
new file mode 100644
index 0000000000..34502869e4
--- /dev/null
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -0,0 +1,653 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocation_resolver.h"
+
+#include "code_generator.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+RegisterAllocationResolver::RegisterAllocationResolver(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& liveness)
+ : allocator_(allocator),
+ codegen_(codegen),
+ liveness_(liveness) {}
+
+void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs,
+ size_t max_safepoint_live_fp_regs,
+ size_t reserved_out_slots,
+ size_t int_spill_slots,
+ size_t long_spill_slots,
+ size_t float_spill_slots,
+ size_t double_spill_slots,
+ size_t catch_phi_spill_slots,
+ const ArenaVector<LiveInterval*>& temp_intervals) {
+ size_t spill_slots = int_spill_slots
+ + long_spill_slots
+ + float_spill_slots
+ + double_spill_slots
+ + catch_phi_spill_slots;
+
+ // Computes frame size and spill mask.
+ codegen_->InitializeCodeGeneration(spill_slots,
+ max_safepoint_live_core_regs,
+ max_safepoint_live_fp_regs,
+ reserved_out_slots, // Includes slot(s) for the art method.
+ codegen_->GetGraph()->GetLinearOrder());
+
+ // Resolve outputs, including stack locations.
+ // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
+ for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+ HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+ LiveInterval* current = instruction->GetLiveInterval();
+ LocationSummary* locations = instruction->GetLocations();
+ Location location = locations->Out();
+ if (instruction->IsParameterValue()) {
+ // Now that we know the frame size, adjust the parameter's location.
+ if (location.IsStackSlot()) {
+ location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+ current->SetSpillSlot(location.GetStackIndex());
+ locations->UpdateOut(location);
+ } else if (location.IsDoubleStackSlot()) {
+ location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+ current->SetSpillSlot(location.GetStackIndex());
+ locations->UpdateOut(location);
+ } else if (current->HasSpillSlot()) {
+ current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
+ }
+ } else if (instruction->IsCurrentMethod()) {
+ // The current method is always at offset 0.
+ DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0));
+ } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+ DCHECK(current->HasSpillSlot());
+ size_t slot = current->GetSpillSlot()
+ + spill_slots
+ + reserved_out_slots
+ - catch_phi_spill_slots;
+ current->SetSpillSlot(slot * kVRegSize);
+ } else if (current->HasSpillSlot()) {
+ // Adjust the stack slot, now that we know the number of them for each type.
+ // The way this implementation lays out the stack is the following:
+ // [parameter slots ]
+ // [catch phi spill slots ]
+ // [double spill slots ]
+ // [long spill slots ]
+ // [float spill slots ]
+ // [int/ref values ]
+ // [maximum out values ] (number of arguments for calls)
+ // [art method ].
+ size_t slot = current->GetSpillSlot();
+ switch (current->GetType()) {
+ case Primitive::kPrimDouble:
+ slot += long_spill_slots;
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimLong:
+ slot += float_spill_slots;
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimFloat:
+ slot += int_spill_slots;
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimShort:
+ slot += reserved_out_slots;
+ break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected type for interval " << current->GetType();
+ }
+ current->SetSpillSlot(slot * kVRegSize);
+ }
+
+ Location source = current->ToLocation();
+
+ if (location.IsUnallocated()) {
+ if (location.GetPolicy() == Location::kSameAsFirstInput) {
+ if (locations->InAt(0).IsUnallocated()) {
+ locations->SetInAt(0, source);
+ } else {
+ DCHECK(locations->InAt(0).Equals(source));
+ }
+ }
+ locations->UpdateOut(source);
+ } else {
+ DCHECK(source.Equals(location));
+ }
+ }
+
+ // Connect siblings and resolve inputs.
+ for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+ HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+ ConnectSiblings(instruction->GetLiveInterval(),
+ max_safepoint_live_core_regs + max_safepoint_live_fp_regs);
+ }
+
+ // Resolve non-linear control flow across branches. Order does not matter.
+ for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ if (block->IsCatchBlock() ||
+ (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+ // Instructions live at the top of catch blocks or irreducible loop header
+ // were forced to spill.
+ if (kIsDebugBuild) {
+ BitVector* live = liveness_.GetLiveInSet(*block);
+ for (uint32_t idx : live->Indexes()) {
+ LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+ LiveInterval* sibling = interval->GetSiblingAt(block->GetLifetimeStart());
+ // `GetSiblingAt` returns the sibling that contains a position, but there could be
+ // a lifetime hole in it. `CoversSlow` returns whether the interval is live at that
+ // position.
+ if ((sibling != nullptr) && sibling->CoversSlow(block->GetLifetimeStart())) {
+ DCHECK(!sibling->HasRegister());
+ }
+ }
+ }
+ } else {
+ BitVector* live = liveness_.GetLiveInSet(*block);
+ for (uint32_t idx : live->Indexes()) {
+ LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+ for (HBasicBlock* predecessor : block->GetPredecessors()) {
+ ConnectSplitSiblings(interval, predecessor, block);
+ }
+ }
+ }
+ }
+
+ // Resolve phi inputs. Order does not matter.
+ for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+ HBasicBlock* current = it.Current();
+ if (current->IsCatchBlock()) {
+ // Catch phi values are set at runtime by the exception delivery mechanism.
+ } else {
+ for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+ HInstruction* phi = inst_it.Current();
+ for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
+ HBasicBlock* predecessor = current->GetPredecessors()[i];
+ DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
+ HInstruction* input = phi->InputAt(i);
+ Location source = input->GetLiveInterval()->GetLocationAt(
+ predecessor->GetLifetimeEnd() - 1);
+ Location destination = phi->GetLiveInterval()->ToLocation();
+ InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
+ }
+ }
+ }
+ }
+
+ // Resolve temp locations.
+ for (LiveInterval* temp : temp_intervals) {
+ if (temp->IsHighInterval()) {
+ // High intervals can be skipped, they are already handled by the low interval.
+ continue;
+ }
+ HInstruction* at = liveness_.GetTempUser(temp);
+ size_t temp_index = liveness_.GetTempIndex(temp);
+ LocationSummary* locations = at->GetLocations();
+ switch (temp->GetType()) {
+ case Primitive::kPrimInt:
+ locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister()));
+ break;
+
+ case Primitive::kPrimDouble:
+ if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+ Location location = Location::FpuRegisterPairLocation(
+ temp->GetRegister(), temp->GetHighInterval()->GetRegister());
+ locations->SetTempAt(temp_index, location);
+ } else {
+ locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister()));
+ }
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type for temporary location "
+ << temp->GetType();
+ }
+ }
+}
+
+void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval,
+ size_t max_safepoint_live_regs) {
+ LiveInterval* current = interval;
+ if (current->HasSpillSlot()
+ && current->HasRegister()
+ // Currently, we spill unconditionnally the current method in the code generators.
+ && !interval->GetDefinedBy()->IsCurrentMethod()) {
+ // We spill eagerly, so move must be at definition.
+ InsertMoveAfter(interval->GetDefinedBy(),
+ interval->ToLocation(),
+ interval->NeedsTwoSpillSlots()
+ ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
+ : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+ }
+ UsePosition* use = current->GetFirstUse();
+ UsePosition* env_use = current->GetFirstEnvironmentUse();
+
+ // Walk over all siblings, updating locations of use positions, and
+ // connecting them when they are adjacent.
+ do {
+ Location source = current->ToLocation();
+
+ // Walk over all uses covered by this interval, and update the location
+ // information.
+
+ LiveRange* range = current->GetFirstRange();
+ while (range != nullptr) {
+ while (use != nullptr && use->GetPosition() < range->GetStart()) {
+ DCHECK(use->IsSynthesized());
+ use = use->GetNext();
+ }
+ while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+ DCHECK(!use->GetIsEnvironment());
+ DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
+ if (!use->IsSynthesized()) {
+ LocationSummary* locations = use->GetUser()->GetLocations();
+ Location expected_location = locations->InAt(use->GetInputIndex());
+ // The expected (actual) location may be invalid in case the input is unused. Currently
+ // this only happens for intrinsics.
+ if (expected_location.IsValid()) {
+ if (expected_location.IsUnallocated()) {
+ locations->SetInAt(use->GetInputIndex(), source);
+ } else if (!expected_location.IsConstant()) {
+ AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
+ }
+ } else {
+ DCHECK(use->GetUser()->IsInvoke());
+ DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+ }
+ }
+ use = use->GetNext();
+ }
+
+ // Walk over the environment uses, and update their locations.
+ while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
+ env_use = env_use->GetNext();
+ }
+
+ while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
+ DCHECK(current->CoversSlow(env_use->GetPosition())
+ || (env_use->GetPosition() == range->GetEnd()));
+ HEnvironment* environment = env_use->GetEnvironment();
+ environment->SetLocationAt(env_use->GetInputIndex(), source);
+ env_use = env_use->GetNext();
+ }
+
+ range = range->GetNext();
+ }
+
+ // If the next interval starts just after this one, and has a register,
+ // insert a move.
+ LiveInterval* next_sibling = current->GetNextSibling();
+ if (next_sibling != nullptr
+ && next_sibling->HasRegister()
+ && current->GetEnd() == next_sibling->GetStart()) {
+ Location destination = next_sibling->ToLocation();
+ InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
+ }
+
+ for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
+ safepoint_position != nullptr;
+ safepoint_position = safepoint_position->GetNext()) {
+ DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
+
+ LocationSummary* locations = safepoint_position->GetLocations();
+ if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
+ DCHECK(interval->GetDefinedBy()->IsActualObject())
+ << interval->GetDefinedBy()->DebugName()
+ << "@" << safepoint_position->GetInstruction()->DebugName();
+ locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
+ }
+
+ switch (source.GetKind()) {
+ case Location::kRegister: {
+ locations->AddLiveRegister(source);
+ if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
+ DCHECK_LE(locations->GetNumberOfLiveRegisters(),
+ max_safepoint_live_regs);
+ }
+ if (current->GetType() == Primitive::kPrimNot) {
+ DCHECK(interval->GetDefinedBy()->IsActualObject())
+ << interval->GetDefinedBy()->DebugName()
+ << "@" << safepoint_position->GetInstruction()->DebugName();
+ locations->SetRegisterBit(source.reg());
+ }
+ break;
+ }
+ case Location::kFpuRegister: {
+ locations->AddLiveRegister(source);
+ break;
+ }
+
+ case Location::kRegisterPair:
+ case Location::kFpuRegisterPair: {
+ locations->AddLiveRegister(source.ToLow());
+ locations->AddLiveRegister(source.ToHigh());
+ break;
+ }
+ case Location::kStackSlot: // Fall-through
+ case Location::kDoubleStackSlot: // Fall-through
+ case Location::kConstant: {
+ // Nothing to do.
+ break;
+ }
+ default: {
+ LOG(FATAL) << "Unexpected location for object";
+ }
+ }
+ }
+ current = next_sibling;
+ } while (current != nullptr);
+
+ if (kIsDebugBuild) {
+ // Following uses can only be synthesized uses.
+ while (use != nullptr) {
+ DCHECK(use->IsSynthesized());
+ use = use->GetNext();
+ }
+ }
+}
+
+static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
+ HInstruction* instruction) {
+ return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
+ (instruction->IsConstant() || instruction->IsCurrentMethod());
+}
+
+void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval,
+ HBasicBlock* from,
+ HBasicBlock* to) const {
+ if (interval->GetNextSibling() == nullptr) {
+ // Nothing to connect. The whole range was allocated to the same location.
+ return;
+ }
+
+ // Find the intervals that cover `from` and `to`.
+ size_t destination_position = to->GetLifetimeStart();
+ size_t source_position = from->GetLifetimeEnd() - 1;
+ LiveInterval* destination = interval->GetSiblingAt(destination_position);
+ LiveInterval* source = interval->GetSiblingAt(source_position);
+
+ if (destination == source) {
+ // Interval was not split.
+ return;
+ }
+
+ LiveInterval* parent = interval->GetParent();
+ HInstruction* defined_by = parent->GetDefinedBy();
+ if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+ (destination == nullptr || !destination->CoversSlow(destination_position))) {
+ // Our live_in fixed point calculation has found that the instruction is live
+ // in the `to` block because it will eventually enter an irreducible loop. Our
+ // live interval computation however does not compute a fixed point, and
+ // therefore will not have a location for that instruction for `to`.
+ // Because the instruction is a constant or the ArtMethod, we don't need to
+ // do anything: it will be materialized in the irreducible loop.
+ DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
+ << defined_by->DebugName() << ":" << defined_by->GetId()
+ << " " << from->GetBlockId() << " -> " << to->GetBlockId();
+ return;
+ }
+
+ if (!destination->HasRegister()) {
+ // Values are eagerly spilled. Spill slot already contains appropriate value.
+ return;
+ }
+
+ Location location_source;
+ // `GetSiblingAt` returns the interval whose start and end cover `position`,
+ // but does not check whether the interval is inactive at that position.
+ // The only situation where the interval is inactive at that position is in the
+ // presence of irreducible loops for constants and ArtMethod.
+ if (codegen_->GetGraph()->HasIrreducibleLoops() &&
+ (source == nullptr || !source->CoversSlow(source_position))) {
+ DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+ if (defined_by->IsConstant()) {
+ location_source = defined_by->GetLocations()->Out();
+ } else {
+ DCHECK(defined_by->IsCurrentMethod());
+ location_source = parent->NeedsTwoSpillSlots()
+ ? Location::DoubleStackSlot(parent->GetSpillSlot())
+ : Location::StackSlot(parent->GetSpillSlot());
+ }
+ } else {
+ DCHECK(source != nullptr);
+ DCHECK(source->CoversSlow(source_position));
+ DCHECK(destination->CoversSlow(destination_position));
+ location_source = source->ToLocation();
+ }
+
+ // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
+ // we need to put the moves at the entry of `to`.
+ if (from->GetNormalSuccessors().size() == 1) {
+ InsertParallelMoveAtExitOf(from,
+ defined_by,
+ location_source,
+ destination->ToLocation());
+ } else {
+ DCHECK_EQ(to->GetPredecessors().size(), 1u);
+ InsertParallelMoveAtEntryOf(to,
+ defined_by,
+ location_source,
+ destination->ToLocation());
+ }
+}
+
+static bool IsValidDestination(Location destination) {
+ return destination.IsRegister()
+ || destination.IsRegisterPair()
+ || destination.IsFpuRegister()
+ || destination.IsFpuRegisterPair()
+ || destination.IsStackSlot()
+ || destination.IsDoubleStackSlot();
+}
+
+void RegisterAllocationResolver::AddMove(HParallelMove* move,
+ Location source,
+ Location destination,
+ HInstruction* instruction,
+ Primitive::Type type) const {
+ if (type == Primitive::kPrimLong
+ && codegen_->ShouldSplitLongMoves()
+ // The parallel move resolver knows how to deal with long constants.
+ && !source.IsConstant()) {
+ move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
+ move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
+ } else {
+ move->AddMove(source, destination, type, instruction);
+ }
+}
+
+void RegisterAllocationResolver::AddInputMoveFor(HInstruction* input,
+ HInstruction* user,
+ Location source,
+ Location destination) const {
+ if (source.Equals(destination)) return;
+
+ DCHECK(!user->IsPhi());
+
+ HInstruction* previous = user->GetPrevious();
+ HParallelMove* move = nullptr;
+ if (previous == nullptr
+ || !previous->IsParallelMove()
+ || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(user->GetLifetimePosition());
+ user->GetBlock()->InsertInstructionBefore(move, user);
+ } else {
+ move = previous->AsParallelMove();
+ }
+ DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
+ AddMove(move, source, destination, nullptr, input->GetType());
+}
+
+static bool IsInstructionStart(size_t position) {
+ return (position & 1) == 0;
+}
+
+static bool IsInstructionEnd(size_t position) {
+ return (position & 1) == 1;
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAt(size_t position,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const {
+ DCHECK(IsValidDestination(destination)) << destination;
+ if (source.Equals(destination)) return;
+
+ HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
+ HParallelMove* move;
+ if (at == nullptr) {
+ if (IsInstructionStart(position)) {
+ // Block boundary, don't do anything the connection of split siblings will handle it.
+ return;
+ } else {
+ // Move must happen before the first instruction of the block.
+ at = liveness_.GetInstructionFromPosition((position + 1) / 2);
+ // Note that parallel moves may have already been inserted, so we explicitly
+ // ask for the first instruction of the block: `GetInstructionFromPosition` does
+ // not contain the `HParallelMove` instructions.
+ at = at->GetBlock()->GetFirstInstruction();
+
+ if (at->GetLifetimePosition() < position) {
+ // We may insert moves for split siblings and phi spills at the beginning of the block.
+ // Since this is a different lifetime position, we need to go to the next instruction.
+ DCHECK(at->IsParallelMove());
+ at = at->GetNext();
+ }
+
+ if (at->GetLifetimePosition() != position) {
+ DCHECK_GT(at->GetLifetimePosition(), position);
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ at->GetBlock()->InsertInstructionBefore(move, at);
+ } else {
+ DCHECK(at->IsParallelMove());
+ move = at->AsParallelMove();
+ }
+ }
+ } else if (IsInstructionEnd(position)) {
+ // Move must happen after the instruction.
+ DCHECK(!at->IsControlFlow());
+ move = at->GetNext()->AsParallelMove();
+ // This is a parallel move for connecting siblings in a same block. We need to
+ // differentiate it with moves for connecting blocks, and input moves.
+ if (move == nullptr || move->GetLifetimePosition() > position) {
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
+ }
+ } else {
+ // Move must happen before the instruction.
+ HInstruction* previous = at->GetPrevious();
+ if (previous == nullptr
+ || !previous->IsParallelMove()
+ || previous->GetLifetimePosition() != position) {
+ // If the previous is a parallel move, then its position must be lower
+ // than the given `position`: it was added just after the non-parallel
+ // move instruction that precedes `instruction`.
+ DCHECK(previous == nullptr
+ || !previous->IsParallelMove()
+ || previous->GetLifetimePosition() < position);
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ at->GetBlock()->InsertInstructionBefore(move, at);
+ } else {
+ move = previous->AsParallelMove();
+ }
+ }
+ DCHECK_EQ(move->GetLifetimePosition(), position);
+ AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAtExitOf(HBasicBlock* block,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const {
+ DCHECK(IsValidDestination(destination)) << destination;
+ if (source.Equals(destination)) return;
+
+ DCHECK_EQ(block->GetNormalSuccessors().size(), 1u);
+ HInstruction* last = block->GetLastInstruction();
+ // We insert moves at exit for phi predecessors and connecting blocks.
+ // A block ending with an if or a packed switch cannot branch to a block
+ // with phis because we do not allow critical edges. It can also not connect
+ // a split interval between two blocks: the move has to happen in the successor.
+ DCHECK(!last->IsIf() && !last->IsPackedSwitch());
+ HInstruction* previous = last->GetPrevious();
+ HParallelMove* move;
+ // This is a parallel move for connecting blocks. We need to differentiate
+ // it with moves for connecting siblings in a same block, and output moves.
+ size_t position = last->GetLifetimePosition();
+ if (previous == nullptr || !previous->IsParallelMove()
+ || previous->AsParallelMove()->GetLifetimePosition() != position) {
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ block->InsertInstructionBefore(move, last);
+ } else {
+ move = previous->AsParallelMove();
+ }
+ AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertParallelMoveAtEntryOf(HBasicBlock* block,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const {
+ DCHECK(IsValidDestination(destination)) << destination;
+ if (source.Equals(destination)) return;
+
+ HInstruction* first = block->GetFirstInstruction();
+ HParallelMove* move = first->AsParallelMove();
+ size_t position = block->GetLifetimeStart();
+ // This is a parallel move for connecting blocks. We need to differentiate
+ // it with moves for connecting siblings in a same block, and input moves.
+ if (move == nullptr || move->GetLifetimePosition() != position) {
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ block->InsertInstructionBefore(move, first);
+ }
+ AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+void RegisterAllocationResolver::InsertMoveAfter(HInstruction* instruction,
+ Location source,
+ Location destination) const {
+ DCHECK(IsValidDestination(destination)) << destination;
+ if (source.Equals(destination)) return;
+
+ if (instruction->IsPhi()) {
+ InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination);
+ return;
+ }
+
+ size_t position = instruction->GetLifetimePosition() + 1;
+ HParallelMove* move = instruction->GetNext()->AsParallelMove();
+ // This is a parallel move for moving the output of an instruction. We need
+ // to differentiate with input moves, moves for connecting siblings in a
+ // and moves for connecting blocks.
+ if (move == nullptr || move->GetLifetimePosition() != position) {
+ move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
+ instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
+ }
+ AddMove(move, source, destination, instruction, instruction->GetType());
+}
+
+} // namespace art
diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h
new file mode 100644
index 0000000000..6ceb9bc955
--- /dev/null
+++ b/compiler/optimizing/register_allocation_resolver.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
+
+#include "base/arena_containers.h"
+#include "base/value_object.h"
+#include "primitive.h"
+
+namespace art {
+
+class ArenaAllocator;
+class CodeGenerator;
+class HBasicBlock;
+class HInstruction;
+class HParallelMove;
+class LiveInterval;
+class Location;
+class SsaLivenessAnalysis;
+
+/**
+ * Reconciles the locations assigned to live intervals with the location
+ * summary of each instruction, and inserts moves to resolve split intervals,
+ * nonlinear control flow, and phi inputs.
+ */
+class RegisterAllocationResolver : ValueObject {
+ public:
+ RegisterAllocationResolver(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& liveness);
+
+ void Resolve(size_t max_safepoint_live_core_regs,
+ size_t max_safepoint_live_fp_regs,
+ size_t reserved_out_slots, // Includes slot(s) for the art method.
+ size_t int_spill_slots,
+ size_t long_spill_slots,
+ size_t float_spill_slots,
+ size_t double_spill_slots,
+ size_t catch_phi_spill_slots,
+ const ArenaVector<LiveInterval*>& temp_intervals);
+
+ private:
+ // Connect adjacent siblings within blocks, and resolve inputs along the way.
+ // Uses max_safepoint_live_regs to check that we did not underestimate the
+ // number of live registers at safepoints.
+ void ConnectSiblings(LiveInterval* interval, size_t max_safepoint_live_regs);
+
+ // Connect siblings between block entries and exits.
+ void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
+
+ // Helper methods for inserting parallel moves in the graph.
+ void InsertParallelMoveAtExitOf(HBasicBlock* block,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const;
+ void InsertParallelMoveAtEntryOf(HBasicBlock* block,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const;
+ void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
+ void AddInputMoveFor(HInstruction* input,
+ HInstruction* user,
+ Location source,
+ Location destination) const;
+ void InsertParallelMoveAt(size_t position,
+ HInstruction* instruction,
+ Location source,
+ Location destination) const;
+ void AddMove(HParallelMove* move,
+ Location source,
+ Location destination,
+ HInstruction* instruction,
+ Primitive::Type type) const;
+
+ ArenaAllocator* const allocator_;
+ CodeGenerator* const codegen_;
+ const SsaLivenessAnalysis& liveness_;
+
+ DISALLOW_COPY_AND_ASSIGN(RegisterAllocationResolver);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 9d99668484..5b768d5d67 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,65 +21,33 @@
#include "base/bit_vector-inl.h"
#include "code_generator.h"
+#include "register_allocator_graph_color.h"
+#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
-namespace art {
-
-static constexpr size_t kMaxLifetimePosition = -1;
-static constexpr size_t kDefaultNumberOfSpillSlots = 4;
-// For simplicity, we implement register pairs as (reg, reg + 1).
-// Note that this is a requirement for double registers on ARM, since we
-// allocate SRegister.
-static int GetHighForLowRegister(int reg) { return reg + 1; }
-static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
-static bool IsLowOfUnalignedPairInterval(LiveInterval* low) {
- return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
-}
+namespace art {
RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
CodeGenerator* codegen,
const SsaLivenessAnalysis& liveness)
- : allocator_(allocator),
- codegen_(codegen),
- liveness_(liveness),
- unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- unhandled_(nullptr),
- handled_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- active_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- catch_phi_spill_slots_(0),
- safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- processing_core_registers_(false),
- number_of_registers_(-1),
- registers_array_(nullptr),
- blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
- blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
- reserved_out_slots_(0),
- maximum_number_of_live_core_registers_(0),
- maximum_number_of_live_fp_registers_(0) {
- temp_intervals_.reserve(4);
- int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
- long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
- float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
- double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+ : allocator_(allocator),
+ codegen_(codegen),
+ liveness_(liveness) {}
- codegen->SetupBlockedRegisters();
- physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
- physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
- // Always reserve for the current method and the graph's max out registers.
- // TODO: compute it instead.
- // ArtMethod* takes 2 vregs for 64 bits.
- reserved_out_slots_ = InstructionSetPointerSize(codegen->GetInstructionSet()) / kVRegSize +
- codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
+RegisterAllocator* RegisterAllocator::Create(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis,
+ Strategy strategy) {
+ switch (strategy) {
+ case kRegisterAllocatorLinearScan:
+ return new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis);
+ case kRegisterAllocatorGraphColor:
+ return new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis);
+ default:
+ LOG(FATAL) << "Invalid register allocation strategy: " << strategy;
+ UNREACHABLE();
+ }
}
bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED,
@@ -93,328 +61,6 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UN
|| instruction_set == kX86_64;
}
-static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
- if (interval == nullptr) return false;
- bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
- && (interval->GetType() != Primitive::kPrimFloat);
- return processing_core_registers == is_core_register;
-}
-
-void RegisterAllocator::AllocateRegisters() {
- AllocateRegistersInternal();
- Resolve();
-
- if (kIsDebugBuild) {
- processing_core_registers_ = true;
- ValidateInternal(true);
- processing_core_registers_ = false;
- ValidateInternal(true);
- // Check that the linear order is still correct with regards to lifetime positions.
- // Since only parallel moves have been inserted during the register allocation,
- // these checks are mostly for making sure these moves have been added correctly.
- size_t current_liveness = 0;
- for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
- for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
- HInstruction* instruction = inst_it.Current();
- DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
- current_liveness = instruction->GetLifetimePosition();
- }
- for (HInstructionIterator inst_it(block->GetInstructions());
- !inst_it.Done();
- inst_it.Advance()) {
- HInstruction* instruction = inst_it.Current();
- DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName();
- current_liveness = instruction->GetLifetimePosition();
- }
- }
- }
-}
-
-void RegisterAllocator::BlockRegister(Location location, size_t start, size_t end) {
- int reg = location.reg();
- DCHECK(location.IsRegister() || location.IsFpuRegister());
- LiveInterval* interval = location.IsRegister()
- ? physical_core_register_intervals_[reg]
- : physical_fp_register_intervals_[reg];
- Primitive::Type type = location.IsRegister()
- ? Primitive::kPrimInt
- : Primitive::kPrimFloat;
- if (interval == nullptr) {
- interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
- if (location.IsRegister()) {
- physical_core_register_intervals_[reg] = interval;
- } else {
- physical_fp_register_intervals_[reg] = interval;
- }
- }
- DCHECK(interval->GetRegister() == reg);
- interval->AddRange(start, end);
-}
-
-void RegisterAllocator::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
- for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
- if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
- BlockRegister(Location::RegisterLocation(i), start, end);
- }
- }
- for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
- if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
- BlockRegister(Location::FpuRegisterLocation(i), start, end);
- }
- }
-}
-
-void RegisterAllocator::AllocateRegistersInternal() {
- // Iterate post-order, to ensure the list is sorted, and the last added interval
- // is the one with the lowest start position.
- for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
- for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
- back_it.Advance()) {
- ProcessInstruction(back_it.Current());
- }
- for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
- ProcessInstruction(inst_it.Current());
- }
-
- if (block->IsCatchBlock() ||
- (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
- // By blocking all registers at the top of each catch block or irreducible loop, we force
- // intervals belonging to the live-in set of the catch/header block to be spilled.
- // TODO(ngeoffray): Phis in this block could be allocated in register.
- size_t position = block->GetLifetimeStart();
- BlockRegisters(position, position + 1);
- }
- }
-
- number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
- registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
- kArenaAllocRegisterAllocator);
- processing_core_registers_ = true;
- unhandled_ = &unhandled_core_intervals_;
- for (LiveInterval* fixed : physical_core_register_intervals_) {
- if (fixed != nullptr) {
- // Fixed interval is added to inactive_ instead of unhandled_.
- // It's also the only type of inactive interval whose start position
- // can be after the current interval during linear scan.
- // Fixed interval is never split and never moves to unhandled_.
- inactive_.push_back(fixed);
- }
- }
- LinearScan();
-
- inactive_.clear();
- active_.clear();
- handled_.clear();
-
- number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
- registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
- kArenaAllocRegisterAllocator);
- processing_core_registers_ = false;
- unhandled_ = &unhandled_fp_intervals_;
- for (LiveInterval* fixed : physical_fp_register_intervals_) {
- if (fixed != nullptr) {
- // Fixed interval is added to inactive_ instead of unhandled_.
- // It's also the only type of inactive interval whose start position
- // can be after the current interval during linear scan.
- // Fixed interval is never split and never moves to unhandled_.
- inactive_.push_back(fixed);
- }
- }
- LinearScan();
-}
-
-void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- size_t position = instruction->GetLifetimePosition();
-
- if (locations == nullptr) return;
-
- // Create synthesized intervals for temporaries.
- for (size_t i = 0; i < locations->GetTempCount(); ++i) {
- Location temp = locations->GetTemp(i);
- if (temp.IsRegister() || temp.IsFpuRegister()) {
- BlockRegister(temp, position, position + 1);
- // Ensure that an explicit temporary register is marked as being allocated.
- codegen_->AddAllocatedRegister(temp);
- } else {
- DCHECK(temp.IsUnallocated());
- switch (temp.GetPolicy()) {
- case Location::kRequiresRegister: {
- LiveInterval* interval =
- LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
- temp_intervals_.push_back(interval);
- interval->AddTempUse(instruction, i);
- unhandled_core_intervals_.push_back(interval);
- break;
- }
-
- case Location::kRequiresFpuRegister: {
- LiveInterval* interval =
- LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
- temp_intervals_.push_back(interval);
- interval->AddTempUse(instruction, i);
- if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
- interval->AddHighInterval(/* is_temp */ true);
- LiveInterval* high = interval->GetHighInterval();
- temp_intervals_.push_back(high);
- unhandled_fp_intervals_.push_back(high);
- }
- unhandled_fp_intervals_.push_back(interval);
- break;
- }
-
- default:
- LOG(FATAL) << "Unexpected policy for temporary location "
- << temp.GetPolicy();
- }
- }
- }
-
- bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
- && (instruction->GetType() != Primitive::kPrimFloat);
-
- if (locations->NeedsSafepoint()) {
- if (codegen_->IsLeafMethod()) {
- // TODO: We do this here because we do not want the suspend check to artificially
- // create live registers. We should find another place, but this is currently the
- // simplest.
- DCHECK(instruction->IsSuspendCheckEntry());
- instruction->GetBlock()->RemoveInstruction(instruction);
- return;
- }
- safepoints_.push_back(instruction);
- if (locations->OnlyCallsOnSlowPath()) {
- // We add a synthesized range at this position to record the live registers
- // at this position. Ideally, we could just update the safepoints when locations
- // are updated, but we currently need to know the full stack size before updating
- // locations (because of parameters and the fact that we don't have a frame pointer).
- // And knowing the full stack size requires to know the maximum number of live
- // registers at calls in slow paths.
- // By adding the following interval in the algorithm, we can compute this
- // maximum before updating locations.
- LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
- interval->AddRange(position, position + 1);
- AddSorted(&unhandled_core_intervals_, interval);
- AddSorted(&unhandled_fp_intervals_, interval);
- }
- }
-
- if (locations->WillCall()) {
- BlockRegisters(position, position + 1, /* caller_save_only */ true);
- }
-
- for (size_t i = 0; i < locations->GetInputCount(); ++i) {
- Location input = locations->InAt(i);
- if (input.IsRegister() || input.IsFpuRegister()) {
- BlockRegister(input, position, position + 1);
- } else if (input.IsPair()) {
- BlockRegister(input.ToLow(), position, position + 1);
- BlockRegister(input.ToHigh(), position, position + 1);
- }
- }
-
- LiveInterval* current = instruction->GetLiveInterval();
- if (current == nullptr) return;
-
- ArenaVector<LiveInterval*>& unhandled = core_register
- ? unhandled_core_intervals_
- : unhandled_fp_intervals_;
-
- DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back()));
-
- if (codegen_->NeedsTwoRegisters(current->GetType())) {
- current->AddHighInterval();
- }
-
- for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
- HInstruction* safepoint = safepoints_[safepoint_index - 1u];
- size_t safepoint_position = safepoint->GetLifetimePosition();
-
- // Test that safepoints are ordered in the optimal way.
- DCHECK(safepoint_index == safepoints_.size() ||
- safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
-
- if (safepoint_position == current->GetStart()) {
- // The safepoint is for this instruction, so the location of the instruction
- // does not need to be saved.
- DCHECK_EQ(safepoint_index, safepoints_.size());
- DCHECK_EQ(safepoint, instruction);
- continue;
- } else if (current->IsDeadAt(safepoint_position)) {
- break;
- } else if (!current->Covers(safepoint_position)) {
- // Hole in the interval.
- continue;
- }
- current->AddSafepoint(safepoint);
- }
- current->ResetSearchCache();
-
- // Some instructions define their output in fixed register/stack slot. We need
- // to ensure we know these locations before doing register allocation. For a
- // given register, we create an interval that covers these locations. The register
- // will be unavailable at these locations when trying to allocate one for an
- // interval.
- //
- // The backwards walking ensures the ranges are ordered on increasing start positions.
- Location output = locations->Out();
- if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) {
- Location first = locations->InAt(0);
- if (first.IsRegister() || first.IsFpuRegister()) {
- current->SetFrom(position + 1);
- current->SetRegister(first.reg());
- } else if (first.IsPair()) {
- current->SetFrom(position + 1);
- current->SetRegister(first.low());
- LiveInterval* high = current->GetHighInterval();
- high->SetRegister(first.high());
- high->SetFrom(position + 1);
- }
- } else if (output.IsRegister() || output.IsFpuRegister()) {
- // Shift the interval's start by one to account for the blocked register.
- current->SetFrom(position + 1);
- current->SetRegister(output.reg());
- BlockRegister(output, position, position + 1);
- } else if (output.IsPair()) {
- current->SetFrom(position + 1);
- current->SetRegister(output.low());
- LiveInterval* high = current->GetHighInterval();
- high->SetRegister(output.high());
- high->SetFrom(position + 1);
- BlockRegister(output.ToLow(), position, position + 1);
- BlockRegister(output.ToHigh(), position, position + 1);
- } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
- current->SetSpillSlot(output.GetStackIndex());
- } else {
- DCHECK(output.IsUnallocated() || output.IsConstant());
- }
-
- if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
- AllocateSpillSlotForCatchPhi(instruction->AsPhi());
- }
-
- // If needed, add interval to the list of unhandled intervals.
- if (current->HasSpillSlot() || instruction->IsConstant()) {
- // Split just before first register use.
- size_t first_register_use = current->FirstRegisterUse();
- if (first_register_use != kNoLifetime) {
- LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
- // Don't add directly to `unhandled`, it needs to be sorted and the start
- // of this new interval might be after intervals already in the list.
- AddSorted(&unhandled, split);
- } else {
- // Nothing to do, we won't allocate a register for this value.
- }
- } else {
- // Don't add directly to `unhandled`, temp or safepoint intervals
- // for this instruction may have been added, and those can be
- // processed first.
- AddSorted(&unhandled, current);
- }
-}
-
class AllRangesIterator : public ValueObject {
public:
explicit AllRangesIterator(LiveInterval* interval)
@@ -442,36 +88,6 @@ class AllRangesIterator : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
};
-bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
- // To simplify unit testing, we eagerly create the array of intervals, and
- // call the helper method.
- ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
- for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
- HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
- if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
- intervals.push_back(instruction->GetLiveInterval());
- }
- }
-
- const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
- ? &physical_core_register_intervals_
- : &physical_fp_register_intervals_;
- for (LiveInterval* fixed : *physical_register_intervals) {
- if (fixed != nullptr) {
- intervals.push_back(fixed);
- }
- }
-
- for (LiveInterval* temp : temp_intervals_) {
- if (ShouldProcess(processing_core_registers_, temp)) {
- intervals.push_back(temp);
- }
- }
-
- return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
- allocator_, processing_core_registers_, log_fatal_on_failure);
-}
-
bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
size_t number_of_spill_slots,
size_t number_of_out_slots,
@@ -550,6 +166,19 @@ bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& inte
} else {
codegen.DumpFloatingPointRegister(message, current->GetRegister());
}
+ for (LiveInterval* interval : intervals) {
+ if (interval->HasRegister()
+ && interval->GetRegister() == current->GetRegister()
+ && interval->CoversSlow(j)) {
+ message << std::endl;
+ if (interval->GetDefinedBy() != nullptr) {
+ message << interval->GetDefinedBy()->GetKind() << " ";
+ } else {
+ message << "physical ";
+ }
+ interval->Dump(message);
+ }
+ }
LOG(FATAL) << message.str();
} else {
return false;
@@ -564,638 +193,30 @@ bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& inte
return true;
}
-void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
- interval->Dump(stream);
- stream << ": ";
- if (interval->HasRegister()) {
- if (interval->IsFloatingPoint()) {
- codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
- } else {
- codegen_->DumpCoreRegister(stream, interval->GetRegister());
- }
- } else {
- stream << "spilled";
- }
- stream << std::endl;
-}
-
-void RegisterAllocator::DumpAllIntervals(std::ostream& stream) const {
- stream << "inactive: " << std::endl;
- for (LiveInterval* inactive_interval : inactive_) {
- DumpInterval(stream, inactive_interval);
- }
- stream << "active: " << std::endl;
- for (LiveInterval* active_interval : active_) {
- DumpInterval(stream, active_interval);
- }
- stream << "unhandled: " << std::endl;
- auto unhandled = (unhandled_ != nullptr) ?
- unhandled_ : &unhandled_core_intervals_;
- for (LiveInterval* unhandled_interval : *unhandled) {
- DumpInterval(stream, unhandled_interval);
- }
- stream << "handled: " << std::endl;
- for (LiveInterval* handled_interval : handled_) {
- DumpInterval(stream, handled_interval);
- }
-}
-
-// By the book implementation of a linear scan register allocator.
-void RegisterAllocator::LinearScan() {
- while (!unhandled_->empty()) {
- // (1) Remove interval with the lowest start position from unhandled.
- LiveInterval* current = unhandled_->back();
- unhandled_->pop_back();
-
- // Make sure the interval is an expected state.
- DCHECK(!current->IsFixed() && !current->HasSpillSlot());
- // Make sure we are going in the right order.
- DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart());
- // Make sure a low interval is always with a high.
- DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval());
- // Make sure a high interval is always with a low.
- DCHECK(current->IsLowInterval() ||
- unhandled_->empty() ||
- !unhandled_->back()->IsHighInterval());
-
- size_t position = current->GetStart();
-
- // Remember the inactive_ size here since the ones moved to inactive_ from
- // active_ below shouldn't need to be re-checked.
- size_t inactive_intervals_to_handle = inactive_.size();
-
- // (2) Remove currently active intervals that are dead at this position.
- // Move active intervals that have a lifetime hole at this position
- // to inactive.
- auto active_kept_end = std::remove_if(
- active_.begin(),
- active_.end(),
- [this, position](LiveInterval* interval) {
- if (interval->IsDeadAt(position)) {
- handled_.push_back(interval);
- return true;
- } else if (!interval->Covers(position)) {
- inactive_.push_back(interval);
- return true;
- } else {
- return false; // Keep this interval.
- }
- });
- active_.erase(active_kept_end, active_.end());
-
- // (3) Remove currently inactive intervals that are dead at this position.
- // Move inactive intervals that cover this position to active.
- auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
- auto inactive_kept_end = std::remove_if(
- inactive_.begin(),
- inactive_to_handle_end,
- [this, position](LiveInterval* interval) {
- DCHECK(interval->GetStart() < position || interval->IsFixed());
- if (interval->IsDeadAt(position)) {
- handled_.push_back(interval);
- return true;
- } else if (interval->Covers(position)) {
- active_.push_back(interval);
- return true;
- } else {
- return false; // Keep this interval.
- }
- });
- inactive_.erase(inactive_kept_end, inactive_to_handle_end);
-
- if (current->IsSlowPathSafepoint()) {
- // Synthesized interval to record the maximum number of live registers
- // at safepoints. No need to allocate a register for it.
- if (processing_core_registers_) {
- maximum_number_of_live_core_registers_ =
- std::max(maximum_number_of_live_core_registers_, active_.size());
- } else {
- maximum_number_of_live_fp_registers_ =
- std::max(maximum_number_of_live_fp_registers_, active_.size());
- }
- DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
- continue;
- }
-
- if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
- DCHECK(!current->HasRegister());
- // Allocating the low part was unsucessful. The splitted interval for the high part
- // will be handled next (it is in the `unhandled_` list).
- continue;
- }
-
- // (4) Try to find an available register.
- bool success = TryAllocateFreeReg(current);
-
- // (5) If no register could be found, we need to spill.
- if (!success) {
- success = AllocateBlockedReg(current);
- }
-
- // (6) If the interval had a register allocated, add it to the list of active
- // intervals.
- if (success) {
- codegen_->AddAllocatedRegister(processing_core_registers_
- ? Location::RegisterLocation(current->GetRegister())
- : Location::FpuRegisterLocation(current->GetRegister()));
- active_.push_back(current);
- if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
- current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
- }
- }
- }
-}
-
-static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) {
- DCHECK(!interval->IsHighInterval());
- // Note that the same instruction may occur multiple times in the input list,
- // so `free_until` may have changed already.
- // Since `position` is not the current scan position, we need to use CoversSlow.
- if (interval->IsDeadAt(position)) {
- // Set the register to be free. Note that inactive intervals might later
- // update this.
- free_until[interval->GetRegister()] = kMaxLifetimePosition;
- if (interval->HasHighInterval()) {
- DCHECK(interval->GetHighInterval()->IsDeadAt(position));
- free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition;
- }
- } else if (!interval->CoversSlow(position)) {
- // The interval becomes inactive at `defined_by`. We make its register
- // available only until the next use strictly after `defined_by`.
- free_until[interval->GetRegister()] = interval->FirstUseAfter(position);
+LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
+ DCHECK_GE(position, interval->GetStart());
+ DCHECK(!interval->IsDeadAt(position));
+ if (position == interval->GetStart()) {
+ // Spill slot will be allocated when handling `interval` again.
+ interval->ClearRegister();
if (interval->HasHighInterval()) {
- DCHECK(!interval->GetHighInterval()->CoversSlow(position));
- free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()];
- }
- }
-}
-
-// Find a free register. If multiple are found, pick the register that
-// is free the longest.
-bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) {
- size_t* free_until = registers_array_;
-
- // First set all registers to be free.
- for (size_t i = 0; i < number_of_registers_; ++i) {
- free_until[i] = kMaxLifetimePosition;
- }
-
- // For each active interval, set its register to not free.
- for (LiveInterval* interval : active_) {
- DCHECK(interval->HasRegister());
- free_until[interval->GetRegister()] = 0;
- }
-
- // An interval that starts an instruction (that is, it is not split), may
- // re-use the registers used by the inputs of that instruciton, based on the
- // location summary.
- HInstruction* defined_by = current->GetDefinedBy();
- if (defined_by != nullptr && !current->IsSplit()) {
- LocationSummary* locations = defined_by->GetLocations();
- if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
- HInputsRef inputs = defined_by->GetInputs();
- for (size_t i = 0; i < inputs.size(); ++i) {
- // Take the last interval of the input. It is the location of that interval
- // that will be used at `defined_by`.
- LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
- // Note that interval may have not been processed yet.
- // TODO: Handle non-split intervals last in the work list.
- if (locations->InAt(i).IsValid()
- && interval->HasRegister()
- && interval->SameRegisterKind(*current)) {
- // The input must be live until the end of `defined_by`, to comply to
- // the linear scan algorithm. So we use `defined_by`'s end lifetime
- // position to check whether the input is dead or is inactive after
- // `defined_by`.
- DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
- size_t position = defined_by->GetLifetimePosition() + 1;
- FreeIfNotCoverAt(interval, position, free_until);
- }
- }
- }
- }
-
- // For each inactive interval, set its register to be free until
- // the next intersection with `current`.
- for (LiveInterval* inactive : inactive_) {
- // Temp/Slow-path-safepoint interval has no holes.
- DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
- if (!current->IsSplit() && !inactive->IsFixed()) {
- // Neither current nor inactive are fixed.
- // Thanks to SSA, a non-split interval starting in a hole of an
- // inactive interval should never intersect with that inactive interval.
- // Only if it's not fixed though, because fixed intervals don't come from SSA.
- DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
- continue;
- }
-
- DCHECK(inactive->HasRegister());
- if (free_until[inactive->GetRegister()] == 0) {
- // Already used by some active interval. No need to intersect.
- continue;
- }
- size_t next_intersection = inactive->FirstIntersectionWith(current);
- if (next_intersection != kNoLifetime) {
- free_until[inactive->GetRegister()] =
- std::min(free_until[inactive->GetRegister()], next_intersection);
- }
- }
-
- int reg = kNoRegister;
- if (current->HasRegister()) {
- // Some instructions have a fixed register output.
- reg = current->GetRegister();
- if (free_until[reg] == 0) {
- DCHECK(current->IsHighInterval());
- // AllocateBlockedReg will spill the holder of the register.
- return false;
- }
- } else {
- DCHECK(!current->IsHighInterval());
- int hint = current->FindFirstRegisterHint(free_until, liveness_);
- if ((hint != kNoRegister)
- // For simplicity, if the hint we are getting for a pair cannot be used,
- // we are just going to allocate a new pair.
- && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) {
- DCHECK(!IsBlocked(hint));
- reg = hint;
- } else if (current->IsLowInterval()) {
- reg = FindAvailableRegisterPair(free_until, current->GetStart());
- } else {
- reg = FindAvailableRegister(free_until, current);
- }
- }
-
- DCHECK_NE(reg, kNoRegister);
- // If we could not find a register, we need to spill.
- if (free_until[reg] == 0) {
- return false;
- }
-
- if (current->IsLowInterval()) {
- // If the high register of this interval is not available, we need to spill.
- int high_reg = current->GetHighInterval()->GetRegister();
- if (high_reg == kNoRegister) {
- high_reg = GetHighForLowRegister(reg);
- }
- if (free_until[high_reg] == 0) {
- return false;
- }
- }
-
- current->SetRegister(reg);
- if (!current->IsDeadAt(free_until[reg])) {
- // If the register is only available for a subset of live ranges
- // covered by `current`, split `current` before the position where
- // the register is not available anymore.
- LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]);
- DCHECK(split != nullptr);
- AddSorted(unhandled_, split);
- }
- return true;
-}
-
-bool RegisterAllocator::IsBlocked(int reg) const {
- return processing_core_registers_
- ? blocked_core_registers_[reg]
- : blocked_fp_registers_[reg];
-}
-
-int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
- int reg = kNoRegister;
- // Pick the register pair that is used the last.
- for (size_t i = 0; i < number_of_registers_; ++i) {
- if (IsBlocked(i)) continue;
- if (!IsLowRegister(i)) continue;
- int high_register = GetHighForLowRegister(i);
- if (IsBlocked(high_register)) continue;
- int existing_high_register = GetHighForLowRegister(reg);
- if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
- && next_use[high_register] >= next_use[existing_high_register])) {
- reg = i;
- if (next_use[i] == kMaxLifetimePosition
- && next_use[high_register] == kMaxLifetimePosition) {
- break;
- }
- } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
- // If one of the current register is known to be unavailable, just unconditionally
- // try a new one.
- reg = i;
- }
- }
- return reg;
-}
-
-bool RegisterAllocator::IsCallerSaveRegister(int reg) const {
- return processing_core_registers_
- ? !codegen_->IsCoreCalleeSaveRegister(reg)
- : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
-}
-
-int RegisterAllocator::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
- // We special case intervals that do not span a safepoint to try to find a caller-save
- // register if one is available. We iterate from 0 to the number of registers,
- // so if there are caller-save registers available at the end, we continue the iteration.
- bool prefers_caller_save = !current->HasWillCallSafepoint();
- int reg = kNoRegister;
- for (size_t i = 0; i < number_of_registers_; ++i) {
- if (IsBlocked(i)) {
- // Register cannot be used. Continue.
- continue;
- }
-
- // Best case: we found a register fully available.
- if (next_use[i] == kMaxLifetimePosition) {
- if (prefers_caller_save && !IsCallerSaveRegister(i)) {
- // We can get shorter encodings on some platforms by using
- // small register numbers. So only update the candidate if the previous
- // one was not available for the whole method.
- if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) {
- reg = i;
- }
- // Continue the iteration in the hope of finding a caller save register.
- continue;
- } else {
- reg = i;
- // We know the register is good enough. Return it.
- break;
- }
- }
-
- // If we had no register before, take this one as a reference.
- if (reg == kNoRegister) {
- reg = i;
- continue;
- }
-
- // Pick the register that is used the last.
- if (next_use[i] > next_use[reg]) {
- reg = i;
- continue;
- }
- }
- return reg;
-}
-
-// Remove interval and its other half if any. Return iterator to the following element.
-static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf(
- ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) {
- DCHECK(intervals->begin() <= pos && pos < intervals->end());
- LiveInterval* interval = *pos;
- if (interval->IsLowInterval()) {
- DCHECK(pos + 1 < intervals->end());
- DCHECK_EQ(*(pos + 1), interval->GetHighInterval());
- return intervals->erase(pos, pos + 2);
- } else if (interval->IsHighInterval()) {
- DCHECK(intervals->begin() < pos);
- DCHECK_EQ(*(pos - 1), interval->GetLowInterval());
- return intervals->erase(pos - 1, pos + 1);
- } else {
- return intervals->erase(pos);
- }
-}
-
-bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
- size_t first_register_use,
- size_t* next_use) {
- for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
- LiveInterval* active = *it;
- DCHECK(active->HasRegister());
- if (active->IsFixed()) continue;
- if (active->IsHighInterval()) continue;
- if (first_register_use > next_use[active->GetRegister()]) continue;
-
- // Split the first interval found that is either:
- // 1) A non-pair interval.
- // 2) A pair interval whose high is not low + 1.
- // 3) A pair interval whose low is not even.
- if (!active->IsLowInterval() ||
- IsLowOfUnalignedPairInterval(active) ||
- !IsLowRegister(active->GetRegister())) {
- LiveInterval* split = Split(active, position);
- if (split != active) {
- handled_.push_back(active);
- }
- RemoveIntervalAndPotentialOtherHalf(&active_, it);
- AddSorted(unhandled_, split);
- return true;
- }
- }
- return false;
-}
-
-// Find the register that is used the last, and spill the interval
-// that holds it. If the first use of `current` is after that register
-// we spill `current` instead.
-bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
- size_t first_register_use = current->FirstRegisterUse();
- if (current->HasRegister()) {
- DCHECK(current->IsHighInterval());
- // The low interval has allocated the register for the high interval. In
- // case the low interval had to split both intervals, we may end up in a
- // situation where the high interval does not have a register use anymore.
- // We must still proceed in order to split currently active and inactive
- // uses of the high interval's register, and put the high interval in the
- // active set.
- DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr));
- } else if (first_register_use == kNoLifetime) {
- AllocateSpillSlotFor(current);
- return false;
- }
-
- // First set all registers as not being used.
- size_t* next_use = registers_array_;
- for (size_t i = 0; i < number_of_registers_; ++i) {
- next_use[i] = kMaxLifetimePosition;
- }
-
- // For each active interval, find the next use of its register after the
- // start of current.
- for (LiveInterval* active : active_) {
- DCHECK(active->HasRegister());
- if (active->IsFixed()) {
- next_use[active->GetRegister()] = current->GetStart();
- } else {
- size_t use = active->FirstRegisterUseAfter(current->GetStart());
- if (use != kNoLifetime) {
- next_use[active->GetRegister()] = use;
- }
- }
- }
-
- // For each inactive interval, find the next use of its register after the
- // start of current.
- for (LiveInterval* inactive : inactive_) {
- // Temp/Slow-path-safepoint interval has no holes.
- DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
- if (!current->IsSplit() && !inactive->IsFixed()) {
- // Neither current nor inactive are fixed.
- // Thanks to SSA, a non-split interval starting in a hole of an
- // inactive interval should never intersect with that inactive interval.
- // Only if it's not fixed though, because fixed intervals don't come from SSA.
- DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
- continue;
- }
- DCHECK(inactive->HasRegister());
- size_t next_intersection = inactive->FirstIntersectionWith(current);
- if (next_intersection != kNoLifetime) {
- if (inactive->IsFixed()) {
- next_use[inactive->GetRegister()] =
- std::min(next_intersection, next_use[inactive->GetRegister()]);
- } else {
- size_t use = inactive->FirstUseAfter(current->GetStart());
- if (use != kNoLifetime) {
- next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
- }
- }
- }
- }
-
- int reg = kNoRegister;
- bool should_spill = false;
- if (current->HasRegister()) {
- DCHECK(current->IsHighInterval());
- reg = current->GetRegister();
- // When allocating the low part, we made sure the high register was available.
- DCHECK_LT(first_register_use, next_use[reg]);
- } else if (current->IsLowInterval()) {
- reg = FindAvailableRegisterPair(next_use, first_register_use);
- // We should spill if both registers are not available.
- should_spill = (first_register_use >= next_use[reg])
- || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
- } else {
- DCHECK(!current->IsHighInterval());
- reg = FindAvailableRegister(next_use, current);
- should_spill = (first_register_use >= next_use[reg]);
- }
-
- DCHECK_NE(reg, kNoRegister);
- if (should_spill) {
- DCHECK(!current->IsHighInterval());
- bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1));
- if (is_allocation_at_use_site) {
- if (!current->IsLowInterval()) {
- DumpInterval(std::cerr, current);
- DumpAllIntervals(std::cerr);
- // This situation has the potential to infinite loop, so we make it a non-debug CHECK.
- HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2);
- CHECK(false) << "There is not enough registers available for "
- << current->GetParent()->GetDefinedBy()->DebugName() << " "
- << current->GetParent()->GetDefinedBy()->GetId()
- << " at " << first_register_use - 1 << " "
- << (at == nullptr ? "" : at->DebugName());
- }
-
- // If we're allocating a register for `current` because the instruction at
- // that position requires it, but we think we should spill, then there are
- // non-pair intervals or unaligned pair intervals blocking the allocation.
- // We split the first interval found, and put ourselves first in the
- // `unhandled_` list.
- bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(),
- first_register_use,
- next_use);
- DCHECK(success);
- LiveInterval* existing = unhandled_->back();
- DCHECK(existing->IsHighInterval());
- DCHECK_EQ(existing->GetLowInterval(), current);
- unhandled_->push_back(current);
- } else {
- // If the first use of that instruction is after the last use of the found
- // register, we split this interval just before its first register use.
- AllocateSpillSlotFor(current);
- LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
- DCHECK(current != split);
- AddSorted(unhandled_, split);
+ interval->GetHighInterval()->ClearRegister();
+ } else if (interval->HasLowInterval()) {
+ interval->GetLowInterval()->ClearRegister();
}
- return false;
+ return interval;
} else {
- // Use this register and spill the active and inactives interval that
- // have that register.
- current->SetRegister(reg);
-
- for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
- LiveInterval* active = *it;
- if (active->GetRegister() == reg) {
- DCHECK(!active->IsFixed());
- LiveInterval* split = Split(active, current->GetStart());
- if (split != active) {
- handled_.push_back(active);
- }
- RemoveIntervalAndPotentialOtherHalf(&active_, it);
- AddSorted(unhandled_, split);
- break;
- }
- }
-
- // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body.
- for (auto it = inactive_.begin(); it != inactive_.end(); ) {
- LiveInterval* inactive = *it;
- bool erased = false;
- if (inactive->GetRegister() == reg) {
- if (!current->IsSplit() && !inactive->IsFixed()) {
- // Neither current nor inactive are fixed.
- // Thanks to SSA, a non-split interval starting in a hole of an
- // inactive interval should never intersect with that inactive interval.
- // Only if it's not fixed though, because fixed intervals don't come from SSA.
- DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
- } else {
- size_t next_intersection = inactive->FirstIntersectionWith(current);
- if (next_intersection != kNoLifetime) {
- if (inactive->IsFixed()) {
- LiveInterval* split = Split(current, next_intersection);
- DCHECK_NE(split, current);
- AddSorted(unhandled_, split);
- } else {
- // Split at the start of `current`, which will lead to splitting
- // at the end of the lifetime hole of `inactive`.
- LiveInterval* split = Split(inactive, current->GetStart());
- // If it's inactive, it must start before the current interval.
- DCHECK_NE(split, inactive);
- it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it);
- erased = true;
- handled_.push_back(inactive);
- AddSorted(unhandled_, split);
- }
- }
- }
- }
- // If we have erased the element, `it` already points to the next element.
- // Otherwise we need to move to the next element.
- if (!erased) {
- ++it;
- }
- }
-
- return true;
- }
-}
-
-void RegisterAllocator::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
- DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
- size_t insert_at = 0;
- for (size_t i = array->size(); i > 0; --i) {
- LiveInterval* current = (*array)[i - 1u];
- // High intervals must be processed right after their low equivalent.
- if (current->StartsAfter(interval) && !current->IsHighInterval()) {
- insert_at = i;
- break;
- } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
- // Ensure the slow path interval is the last to be processed at its location: we want the
- // interval to know all live registers at this location.
- DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
- insert_at = i;
- break;
+ LiveInterval* new_interval = interval->SplitAt(position);
+ if (interval->HasHighInterval()) {
+ LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
+ new_interval->SetHighInterval(high);
+ high->SetLowInterval(new_interval);
+ } else if (interval->HasLowInterval()) {
+ LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
+ new_interval->SetLowInterval(low);
+ low->SetHighInterval(new_interval);
}
- }
-
- // Insert the high interval before the low, to ensure the low is processed before.
- auto insert_pos = array->begin() + insert_at;
- if (interval->HasHighInterval()) {
- array->insert(insert_pos, { interval->GetHighInterval(), interval });
- } else if (interval->HasLowInterval()) {
- array->insert(insert_pos, { interval, interval->GetLowInterval() });
- } else {
- array->insert(insert_pos, interval);
+ return new_interval;
}
}
@@ -1258,748 +279,4 @@ LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t fro
return Split(interval, block_to->GetLifetimeStart());
}
-LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
- DCHECK_GE(position, interval->GetStart());
- DCHECK(!interval->IsDeadAt(position));
- if (position == interval->GetStart()) {
- // Spill slot will be allocated when handling `interval` again.
- interval->ClearRegister();
- if (interval->HasHighInterval()) {
- interval->GetHighInterval()->ClearRegister();
- } else if (interval->HasLowInterval()) {
- interval->GetLowInterval()->ClearRegister();
- }
- return interval;
- } else {
- LiveInterval* new_interval = interval->SplitAt(position);
- if (interval->HasHighInterval()) {
- LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
- new_interval->SetHighInterval(high);
- high->SetLowInterval(new_interval);
- } else if (interval->HasLowInterval()) {
- LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
- new_interval->SetLowInterval(low);
- low->SetHighInterval(new_interval);
- }
- return new_interval;
- }
-}
-
-void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
- if (interval->IsHighInterval()) {
- // The low interval already took care of allocating the spill slot.
- DCHECK(!interval->GetLowInterval()->HasRegister());
- DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot());
- return;
- }
-
- LiveInterval* parent = interval->GetParent();
-
- // An instruction gets a spill slot for its entire lifetime. If the parent
- // of this interval already has a spill slot, there is nothing to do.
- if (parent->HasSpillSlot()) {
- return;
- }
-
- HInstruction* defined_by = parent->GetDefinedBy();
- DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi());
-
- if (defined_by->IsParameterValue()) {
- // Parameters have their own stack slot.
- parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
- return;
- }
-
- if (defined_by->IsCurrentMethod()) {
- parent->SetSpillSlot(0);
- return;
- }
-
- if (defined_by->IsConstant()) {
- // Constants don't need a spill slot.
- return;
- }
-
- ArenaVector<size_t>* spill_slots = nullptr;
- switch (interval->GetType()) {
- case Primitive::kPrimDouble:
- spill_slots = &double_spill_slots_;
- break;
- case Primitive::kPrimLong:
- spill_slots = &long_spill_slots_;
- break;
- case Primitive::kPrimFloat:
- spill_slots = &float_spill_slots_;
- break;
- case Primitive::kPrimNot:
- case Primitive::kPrimInt:
- case Primitive::kPrimChar:
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimShort:
- spill_slots = &int_spill_slots_;
- break;
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
- }
-
- // Find an available spill slot.
- size_t slot = 0;
- for (size_t e = spill_slots->size(); slot < e; ++slot) {
- if ((*spill_slots)[slot] <= parent->GetStart()
- && (slot == (e - 1) || (*spill_slots)[slot + 1] <= parent->GetStart())) {
- break;
- }
- }
-
- size_t end = interval->GetLastSibling()->GetEnd();
- if (parent->NeedsTwoSpillSlots()) {
- if (slot + 2u > spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->resize(slot + 2u, end);
- }
- (*spill_slots)[slot] = end;
- (*spill_slots)[slot + 1] = end;
- } else {
- if (slot == spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->push_back(end);
- } else {
- (*spill_slots)[slot] = end;
- }
- }
-
- // Note that the exact spill slot location will be computed when we resolve,
- // that is when we know the number of spill slots for each type.
- parent->SetSpillSlot(slot);
-}
-
-static bool IsValidDestination(Location destination) {
- return destination.IsRegister()
- || destination.IsRegisterPair()
- || destination.IsFpuRegister()
- || destination.IsFpuRegisterPair()
- || destination.IsStackSlot()
- || destination.IsDoubleStackSlot();
-}
-
-void RegisterAllocator::AllocateSpillSlotForCatchPhi(HPhi* phi) {
- LiveInterval* interval = phi->GetLiveInterval();
-
- HInstruction* previous_phi = phi->GetPrevious();
- DCHECK(previous_phi == nullptr ||
- previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
- << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
-
- if (phi->IsVRegEquivalentOf(previous_phi)) {
- // This is an equivalent of the previous phi. We need to assign the same
- // catch phi slot.
- DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
- interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
- } else {
- // Allocate a new spill slot for this catch phi.
- // TODO: Reuse spill slots when intervals of phis from different catch
- // blocks do not overlap.
- interval->SetSpillSlot(catch_phi_spill_slots_);
- catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
- }
-}
-
-void RegisterAllocator::AddMove(HParallelMove* move,
- Location source,
- Location destination,
- HInstruction* instruction,
- Primitive::Type type) const {
- if (type == Primitive::kPrimLong
- && codegen_->ShouldSplitLongMoves()
- // The parallel move resolver knows how to deal with long constants.
- && !source.IsConstant()) {
- move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
- move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
- } else {
- move->AddMove(source, destination, type, instruction);
- }
-}
-
-void RegisterAllocator::AddInputMoveFor(HInstruction* input,
- HInstruction* user,
- Location source,
- Location destination) const {
- if (source.Equals(destination)) return;
-
- DCHECK(!user->IsPhi());
-
- HInstruction* previous = user->GetPrevious();
- HParallelMove* move = nullptr;
- if (previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(user->GetLifetimePosition());
- user->GetBlock()->InsertInstructionBefore(move, user);
- } else {
- move = previous->AsParallelMove();
- }
- DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
- AddMove(move, source, destination, nullptr, input->GetType());
-}
-
-static bool IsInstructionStart(size_t position) {
- return (position & 1) == 0;
-}
-
-static bool IsInstructionEnd(size_t position) {
- return (position & 1) == 1;
-}
-
-void RegisterAllocator::InsertParallelMoveAt(size_t position,
- HInstruction* instruction,
- Location source,
- Location destination) const {
- DCHECK(IsValidDestination(destination)) << destination;
- if (source.Equals(destination)) return;
-
- HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
- HParallelMove* move;
- if (at == nullptr) {
- if (IsInstructionStart(position)) {
- // Block boundary, don't do anything the connection of split siblings will handle it.
- return;
- } else {
- // Move must happen before the first instruction of the block.
- at = liveness_.GetInstructionFromPosition((position + 1) / 2);
- // Note that parallel moves may have already been inserted, so we explicitly
- // ask for the first instruction of the block: `GetInstructionFromPosition` does
- // not contain the `HParallelMove` instructions.
- at = at->GetBlock()->GetFirstInstruction();
-
- if (at->GetLifetimePosition() < position) {
- // We may insert moves for split siblings and phi spills at the beginning of the block.
- // Since this is a different lifetime position, we need to go to the next instruction.
- DCHECK(at->IsParallelMove());
- at = at->GetNext();
- }
-
- if (at->GetLifetimePosition() != position) {
- DCHECK_GT(at->GetLifetimePosition(), position);
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- at->GetBlock()->InsertInstructionBefore(move, at);
- } else {
- DCHECK(at->IsParallelMove());
- move = at->AsParallelMove();
- }
- }
- } else if (IsInstructionEnd(position)) {
- // Move must happen after the instruction.
- DCHECK(!at->IsControlFlow());
- move = at->GetNext()->AsParallelMove();
- // This is a parallel move for connecting siblings in a same block. We need to
- // differentiate it with moves for connecting blocks, and input moves.
- if (move == nullptr || move->GetLifetimePosition() > position) {
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
- }
- } else {
- // Move must happen before the instruction.
- HInstruction* previous = at->GetPrevious();
- if (previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() != position) {
- // If the previous is a parallel move, then its position must be lower
- // than the given `position`: it was added just after the non-parallel
- // move instruction that precedes `instruction`.
- DCHECK(previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() < position);
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- at->GetBlock()->InsertInstructionBefore(move, at);
- } else {
- move = previous->AsParallelMove();
- }
- }
- DCHECK_EQ(move->GetLifetimePosition(), position);
- AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
- HInstruction* instruction,
- Location source,
- Location destination) const {
- DCHECK(IsValidDestination(destination)) << destination;
- if (source.Equals(destination)) return;
-
- DCHECK_EQ(block->GetNormalSuccessors().size(), 1u);
- HInstruction* last = block->GetLastInstruction();
- // We insert moves at exit for phi predecessors and connecting blocks.
- // A block ending with an if or a packed switch cannot branch to a block
- // with phis because we do not allow critical edges. It can also not connect
- // a split interval between two blocks: the move has to happen in the successor.
- DCHECK(!last->IsIf() && !last->IsPackedSwitch());
- HInstruction* previous = last->GetPrevious();
- HParallelMove* move;
- // This is a parallel move for connecting blocks. We need to differentiate
- // it with moves for connecting siblings in a same block, and output moves.
- size_t position = last->GetLifetimePosition();
- if (previous == nullptr || !previous->IsParallelMove()
- || previous->AsParallelMove()->GetLifetimePosition() != position) {
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- block->InsertInstructionBefore(move, last);
- } else {
- move = previous->AsParallelMove();
- }
- AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
- HInstruction* instruction,
- Location source,
- Location destination) const {
- DCHECK(IsValidDestination(destination)) << destination;
- if (source.Equals(destination)) return;
-
- HInstruction* first = block->GetFirstInstruction();
- HParallelMove* move = first->AsParallelMove();
- size_t position = block->GetLifetimeStart();
- // This is a parallel move for connecting blocks. We need to differentiate
- // it with moves for connecting siblings in a same block, and input moves.
- if (move == nullptr || move->GetLifetimePosition() != position) {
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- block->InsertInstructionBefore(move, first);
- }
- AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
- Location source,
- Location destination) const {
- DCHECK(IsValidDestination(destination)) << destination;
- if (source.Equals(destination)) return;
-
- if (instruction->IsPhi()) {
- InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination);
- return;
- }
-
- size_t position = instruction->GetLifetimePosition() + 1;
- HParallelMove* move = instruction->GetNext()->AsParallelMove();
- // This is a parallel move for moving the output of an instruction. We need
- // to differentiate with input moves, moves for connecting siblings in a
- // and moves for connecting blocks.
- if (move == nullptr || move->GetLifetimePosition() != position) {
- move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(position);
- instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
- }
- AddMove(move, source, destination, instruction, instruction->GetType());
-}
-
-void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
- LiveInterval* current = interval;
- if (current->HasSpillSlot()
- && current->HasRegister()
- // Currently, we spill unconditionnally the current method in the code generators.
- && !interval->GetDefinedBy()->IsCurrentMethod()) {
- // We spill eagerly, so move must be at definition.
- InsertMoveAfter(interval->GetDefinedBy(),
- interval->ToLocation(),
- interval->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
- : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
- }
- UsePosition* use = current->GetFirstUse();
- UsePosition* env_use = current->GetFirstEnvironmentUse();
-
- // Walk over all siblings, updating locations of use positions, and
- // connecting them when they are adjacent.
- do {
- Location source = current->ToLocation();
-
- // Walk over all uses covered by this interval, and update the location
- // information.
-
- LiveRange* range = current->GetFirstRange();
- while (range != nullptr) {
- while (use != nullptr && use->GetPosition() < range->GetStart()) {
- DCHECK(use->IsSynthesized());
- use = use->GetNext();
- }
- while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
- DCHECK(!use->GetIsEnvironment());
- DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
- if (!use->IsSynthesized()) {
- LocationSummary* locations = use->GetUser()->GetLocations();
- Location expected_location = locations->InAt(use->GetInputIndex());
- // The expected (actual) location may be invalid in case the input is unused. Currently
- // this only happens for intrinsics.
- if (expected_location.IsValid()) {
- if (expected_location.IsUnallocated()) {
- locations->SetInAt(use->GetInputIndex(), source);
- } else if (!expected_location.IsConstant()) {
- AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
- }
- } else {
- DCHECK(use->GetUser()->IsInvoke());
- DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
- }
- }
- use = use->GetNext();
- }
-
- // Walk over the environment uses, and update their locations.
- while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
- env_use = env_use->GetNext();
- }
-
- while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
- DCHECK(current->CoversSlow(env_use->GetPosition())
- || (env_use->GetPosition() == range->GetEnd()));
- HEnvironment* environment = env_use->GetEnvironment();
- environment->SetLocationAt(env_use->GetInputIndex(), source);
- env_use = env_use->GetNext();
- }
-
- range = range->GetNext();
- }
-
- // If the next interval starts just after this one, and has a register,
- // insert a move.
- LiveInterval* next_sibling = current->GetNextSibling();
- if (next_sibling != nullptr
- && next_sibling->HasRegister()
- && current->GetEnd() == next_sibling->GetStart()) {
- Location destination = next_sibling->ToLocation();
- InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
- }
-
- for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
- safepoint_position != nullptr;
- safepoint_position = safepoint_position->GetNext()) {
- DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
-
- LocationSummary* locations = safepoint_position->GetLocations();
- if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
- DCHECK(interval->GetDefinedBy()->IsActualObject())
- << interval->GetDefinedBy()->DebugName()
- << "@" << safepoint_position->GetInstruction()->DebugName();
- locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
- }
-
- switch (source.GetKind()) {
- case Location::kRegister: {
- locations->AddLiveRegister(source);
- if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
- DCHECK_LE(locations->GetNumberOfLiveRegisters(),
- maximum_number_of_live_core_registers_ +
- maximum_number_of_live_fp_registers_);
- }
- if (current->GetType() == Primitive::kPrimNot) {
- DCHECK(interval->GetDefinedBy()->IsActualObject())
- << interval->GetDefinedBy()->DebugName()
- << "@" << safepoint_position->GetInstruction()->DebugName();
- locations->SetRegisterBit(source.reg());
- }
- break;
- }
- case Location::kFpuRegister: {
- locations->AddLiveRegister(source);
- break;
- }
-
- case Location::kRegisterPair:
- case Location::kFpuRegisterPair: {
- locations->AddLiveRegister(source.ToLow());
- locations->AddLiveRegister(source.ToHigh());
- break;
- }
- case Location::kStackSlot: // Fall-through
- case Location::kDoubleStackSlot: // Fall-through
- case Location::kConstant: {
- // Nothing to do.
- break;
- }
- default: {
- LOG(FATAL) << "Unexpected location for object";
- }
- }
- }
- current = next_sibling;
- } while (current != nullptr);
-
- if (kIsDebugBuild) {
- // Following uses can only be synthesized uses.
- while (use != nullptr) {
- DCHECK(use->IsSynthesized());
- use = use->GetNext();
- }
- }
-}
-
-static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
- HInstruction* instruction) {
- return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
- (instruction->IsConstant() || instruction->IsCurrentMethod());
-}
-
-void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
- HBasicBlock* from,
- HBasicBlock* to) const {
- if (interval->GetNextSibling() == nullptr) {
- // Nothing to connect. The whole range was allocated to the same location.
- return;
- }
-
- // Find the intervals that cover `from` and `to`.
- size_t destination_position = to->GetLifetimeStart();
- size_t source_position = from->GetLifetimeEnd() - 1;
- LiveInterval* destination = interval->GetSiblingAt(destination_position);
- LiveInterval* source = interval->GetSiblingAt(source_position);
-
- if (destination == source) {
- // Interval was not split.
- return;
- }
-
- LiveInterval* parent = interval->GetParent();
- HInstruction* defined_by = parent->GetDefinedBy();
- if (codegen_->GetGraph()->HasIrreducibleLoops() &&
- (destination == nullptr || !destination->CoversSlow(destination_position))) {
- // Our live_in fixed point calculation has found that the instruction is live
- // in the `to` block because it will eventually enter an irreducible loop. Our
- // live interval computation however does not compute a fixed point, and
- // therefore will not have a location for that instruction for `to`.
- // Because the instruction is a constant or the ArtMethod, we don't need to
- // do anything: it will be materialized in the irreducible loop.
- DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
- << defined_by->DebugName() << ":" << defined_by->GetId()
- << " " << from->GetBlockId() << " -> " << to->GetBlockId();
- return;
- }
-
- if (!destination->HasRegister()) {
- // Values are eagerly spilled. Spill slot already contains appropriate value.
- return;
- }
-
- Location location_source;
- // `GetSiblingAt` returns the interval whose start and end cover `position`,
- // but does not check whether the interval is inactive at that position.
- // The only situation where the interval is inactive at that position is in the
- // presence of irreducible loops for constants and ArtMethod.
- if (codegen_->GetGraph()->HasIrreducibleLoops() &&
- (source == nullptr || !source->CoversSlow(source_position))) {
- DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
- if (defined_by->IsConstant()) {
- location_source = defined_by->GetLocations()->Out();
- } else {
- DCHECK(defined_by->IsCurrentMethod());
- location_source = parent->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(parent->GetSpillSlot())
- : Location::StackSlot(parent->GetSpillSlot());
- }
- } else {
- DCHECK(source != nullptr);
- DCHECK(source->CoversSlow(source_position));
- DCHECK(destination->CoversSlow(destination_position));
- location_source = source->ToLocation();
- }
-
- // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
- // we need to put the moves at the entry of `to`.
- if (from->GetNormalSuccessors().size() == 1) {
- InsertParallelMoveAtExitOf(from,
- defined_by,
- location_source,
- destination->ToLocation());
- } else {
- DCHECK_EQ(to->GetPredecessors().size(), 1u);
- InsertParallelMoveAtEntryOf(to,
- defined_by,
- location_source,
- destination->ToLocation());
- }
-}
-
-void RegisterAllocator::Resolve() {
- codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(),
- maximum_number_of_live_core_registers_,
- maximum_number_of_live_fp_registers_,
- reserved_out_slots_,
- codegen_->GetGraph()->GetLinearOrder());
-
- // Adjust the Out Location of instructions.
- // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
- for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
- HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
- LiveInterval* current = instruction->GetLiveInterval();
- LocationSummary* locations = instruction->GetLocations();
- Location location = locations->Out();
- if (instruction->IsParameterValue()) {
- // Now that we know the frame size, adjust the parameter's location.
- if (location.IsStackSlot()) {
- location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
- current->SetSpillSlot(location.GetStackIndex());
- locations->UpdateOut(location);
- } else if (location.IsDoubleStackSlot()) {
- location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
- current->SetSpillSlot(location.GetStackIndex());
- locations->UpdateOut(location);
- } else if (current->HasSpillSlot()) {
- current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
- }
- } else if (instruction->IsCurrentMethod()) {
- // The current method is always at offset 0.
- DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0));
- } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
- DCHECK(current->HasSpillSlot());
- size_t slot = current->GetSpillSlot()
- + GetNumberOfSpillSlots()
- + reserved_out_slots_
- - catch_phi_spill_slots_;
- current->SetSpillSlot(slot * kVRegSize);
- } else if (current->HasSpillSlot()) {
- // Adjust the stack slot, now that we know the number of them for each type.
- // The way this implementation lays out the stack is the following:
- // [parameter slots ]
- // [catch phi spill slots ]
- // [double spill slots ]
- // [long spill slots ]
- // [float spill slots ]
- // [int/ref values ]
- // [maximum out values ] (number of arguments for calls)
- // [art method ].
- size_t slot = current->GetSpillSlot();
- switch (current->GetType()) {
- case Primitive::kPrimDouble:
- slot += long_spill_slots_.size();
- FALLTHROUGH_INTENDED;
- case Primitive::kPrimLong:
- slot += float_spill_slots_.size();
- FALLTHROUGH_INTENDED;
- case Primitive::kPrimFloat:
- slot += int_spill_slots_.size();
- FALLTHROUGH_INTENDED;
- case Primitive::kPrimNot:
- case Primitive::kPrimInt:
- case Primitive::kPrimChar:
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimShort:
- slot += reserved_out_slots_;
- break;
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unexpected type for interval " << current->GetType();
- }
- current->SetSpillSlot(slot * kVRegSize);
- }
-
- Location source = current->ToLocation();
-
- if (location.IsUnallocated()) {
- if (location.GetPolicy() == Location::kSameAsFirstInput) {
- if (locations->InAt(0).IsUnallocated()) {
- locations->SetInAt(0, source);
- } else {
- DCHECK(locations->InAt(0).Equals(source));
- }
- }
- locations->UpdateOut(source);
- } else {
- DCHECK(source.Equals(location));
- }
- }
-
- // Connect siblings.
- for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
- HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
- ConnectSiblings(instruction->GetLiveInterval());
- }
-
- // Resolve non-linear control flow across branches. Order does not matter.
- for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
- if (block->IsCatchBlock() ||
- (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
- // Instructions live at the top of catch blocks or irreducible loop header
- // were forced to spill.
- if (kIsDebugBuild) {
- BitVector* live = liveness_.GetLiveInSet(*block);
- for (uint32_t idx : live->Indexes()) {
- LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
- LiveInterval* sibling = interval->GetSiblingAt(block->GetLifetimeStart());
- // `GetSiblingAt` returns the sibling that contains a position, but there could be
- // a lifetime hole in it. `CoversSlow` returns whether the interval is live at that
- // position.
- if ((sibling != nullptr) && sibling->CoversSlow(block->GetLifetimeStart())) {
- DCHECK(!sibling->HasRegister());
- }
- }
- }
- } else {
- BitVector* live = liveness_.GetLiveInSet(*block);
- for (uint32_t idx : live->Indexes()) {
- LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
- for (HBasicBlock* predecessor : block->GetPredecessors()) {
- ConnectSplitSiblings(interval, predecessor, block);
- }
- }
- }
- }
-
- // Resolve phi inputs. Order does not matter.
- for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
- if (current->IsCatchBlock()) {
- // Catch phi values are set at runtime by the exception delivery mechanism.
- } else {
- for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
- HInstruction* phi = inst_it.Current();
- for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
- HBasicBlock* predecessor = current->GetPredecessors()[i];
- DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
- HInstruction* input = phi->InputAt(i);
- Location source = input->GetLiveInterval()->GetLocationAt(
- predecessor->GetLifetimeEnd() - 1);
- Location destination = phi->GetLiveInterval()->ToLocation();
- InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
- }
- }
- }
- }
-
- // Assign temp locations.
- for (LiveInterval* temp : temp_intervals_) {
- if (temp->IsHighInterval()) {
- // High intervals can be skipped, they are already handled by the low interval.
- continue;
- }
- HInstruction* at = liveness_.GetTempUser(temp);
- size_t temp_index = liveness_.GetTempIndex(temp);
- LocationSummary* locations = at->GetLocations();
- switch (temp->GetType()) {
- case Primitive::kPrimInt:
- locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister()));
- break;
-
- case Primitive::kPrimDouble:
- if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
- Location location = Location::FpuRegisterPairLocation(
- temp->GetRegister(), temp->GetHighInterval()->GetRegister());
- locations->SetTempAt(temp_index, location);
- } else {
- locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister()));
- }
- break;
-
- default:
- LOG(FATAL) << "Unexpected type for temporary location "
- << temp->GetType();
- }
- }
-}
-
} // namespace art
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 58600b789b..7e1fff8e2b 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
#include "arch/instruction_set.h"
#include "base/arena_containers.h"
+#include "base/arena_object.h"
#include "base/macros.h"
#include "primitive.h"
@@ -29,36 +30,41 @@ class HBasicBlock;
class HGraph;
class HInstruction;
class HParallelMove;
-class HPhi;
class LiveInterval;
class Location;
class SsaLivenessAnalysis;
/**
- * An implementation of a linear scan register allocator on an `HGraph` with SSA form.
+ * Base class for any register allocator.
*/
-class RegisterAllocator {
+class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> {
public:
- RegisterAllocator(ArenaAllocator* allocator,
- CodeGenerator* codegen,
- const SsaLivenessAnalysis& analysis);
+ enum Strategy {
+ kRegisterAllocatorLinearScan,
+ kRegisterAllocatorGraphColor
+ };
+
+ static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan;
+
+ static RegisterAllocator* Create(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis,
+ Strategy strategy = kRegisterAllocatorDefault);
+
+ virtual ~RegisterAllocator() = default;
// Main entry point for the register allocator. Given the liveness analysis,
// allocates registers to live intervals.
- void AllocateRegisters();
+ virtual void AllocateRegisters() = 0;
// Validate that the register allocator did not allocate the same register to
- // intervals that intersect each other. Returns false if it did not.
- bool Validate(bool log_fatal_on_failure) {
- processing_core_registers_ = true;
- if (!ValidateInternal(log_fatal_on_failure)) {
- return false;
- }
- processing_core_registers_ = false;
- return ValidateInternal(log_fatal_on_failure);
- }
-
- // Helper method for validation. Used by unit testing.
+ // intervals that intersect each other. Returns false if it failed.
+ virtual bool Validate(bool log_fatal_on_failure) = 0;
+
+ static bool CanAllocateRegistersFor(const HGraph& graph,
+ InstructionSet instruction_set);
+
+ // Verifies that live intervals do not conflict. Used by unit testing.
static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
size_t number_of_spill_slots,
size_t number_of_out_slots,
@@ -67,178 +73,25 @@ class RegisterAllocator {
bool processing_core_registers,
bool log_fatal_on_failure);
- static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
-
- size_t GetNumberOfSpillSlots() const {
- return int_spill_slots_.size()
- + long_spill_slots_.size()
- + float_spill_slots_.size()
- + double_spill_slots_.size()
- + catch_phi_spill_slots_;
- }
-
static constexpr const char* kRegisterAllocatorPassName = "register";
- private:
- // Main methods of the allocator.
- void LinearScan();
- bool TryAllocateFreeReg(LiveInterval* interval);
- bool AllocateBlockedReg(LiveInterval* interval);
- void Resolve();
-
- // Add `interval` in the given sorted list.
- static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
+ protected:
+ RegisterAllocator(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis);
// Split `interval` at the position `position`. The new interval starts at `position`.
- LiveInterval* Split(LiveInterval* interval, size_t position);
+ // If `position` is at the start of `interval`, returns `interval` with its
+ // register location(s) cleared.
+ static LiveInterval* Split(LiveInterval* interval, size_t position);
// Split `interval` at a position between `from` and `to`. The method will try
// to find an optimal split position.
LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to);
- // Returns whether `reg` is blocked by the code generator.
- bool IsBlocked(int reg) const;
-
- // Update the interval for the register in `location` to cover [start, end).
- void BlockRegister(Location location, size_t start, size_t end);
- void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
-
- // Allocate a spill slot for the given interval. Should be called in linear
- // order of interval starting positions.
- void AllocateSpillSlotFor(LiveInterval* interval);
-
- // Allocate a spill slot for the given catch phi. Will allocate the same slot
- // for phis which share the same vreg. Must be called in reverse linear order
- // of lifetime positions and ascending vreg numbers for correctness.
- void AllocateSpillSlotForCatchPhi(HPhi* phi);
-
- // Connect adjacent siblings within blocks.
- void ConnectSiblings(LiveInterval* interval);
-
- // Connect siblings between block entries and exits.
- void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
-
- // Helper methods to insert parallel moves in the graph.
- void InsertParallelMoveAtExitOf(HBasicBlock* block,
- HInstruction* instruction,
- Location source,
- Location destination) const;
- void InsertParallelMoveAtEntryOf(HBasicBlock* block,
- HInstruction* instruction,
- Location source,
- Location destination) const;
- void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
- void AddInputMoveFor(HInstruction* input,
- HInstruction* user,
- Location source,
- Location destination) const;
- void InsertParallelMoveAt(size_t position,
- HInstruction* instruction,
- Location source,
- Location destination) const;
-
- void AddMove(HParallelMove* move,
- Location source,
- Location destination,
- HInstruction* instruction,
- Primitive::Type type) const;
-
- // Helper methods.
- void AllocateRegistersInternal();
- void ProcessInstruction(HInstruction* instruction);
- bool ValidateInternal(bool log_fatal_on_failure) const;
- void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
- void DumpAllIntervals(std::ostream& stream) const;
- int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
- int FindAvailableRegister(size_t* next_use, LiveInterval* current) const;
- bool IsCallerSaveRegister(int reg) const;
-
- // Try splitting an active non-pair or unaligned pair interval at the given `position`.
- // Returns whether it was successful at finding such an interval.
- bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
- size_t first_register_use,
- size_t* next_use);
-
ArenaAllocator* const allocator_;
CodeGenerator* const codegen_;
const SsaLivenessAnalysis& liveness_;
-
- // List of intervals for core registers that must be processed, ordered by start
- // position. Last entry is the interval that has the lowest start position.
- // This list is initially populated before doing the linear scan.
- ArenaVector<LiveInterval*> unhandled_core_intervals_;
-
- // List of intervals for floating-point registers. Same comments as above.
- ArenaVector<LiveInterval*> unhandled_fp_intervals_;
-
- // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_`
- // or `unhandled_fp_intervals_`.
- ArenaVector<LiveInterval*>* unhandled_;
-
- // List of intervals that have been processed.
- ArenaVector<LiveInterval*> handled_;
-
- // List of intervals that are currently active when processing a new live interval.
- // That is, they have a live range that spans the start of the new interval.
- ArenaVector<LiveInterval*> active_;
-
- // List of intervals that are currently inactive when processing a new live interval.
- // That is, they have a lifetime hole that spans the start of the new interval.
- ArenaVector<LiveInterval*> inactive_;
-
- // Fixed intervals for physical registers. Such intervals cover the positions
- // where an instruction requires a specific register.
- ArenaVector<LiveInterval*> physical_core_register_intervals_;
- ArenaVector<LiveInterval*> physical_fp_register_intervals_;
-
- // Intervals for temporaries. Such intervals cover the positions
- // where an instruction requires a temporary.
- ArenaVector<LiveInterval*> temp_intervals_;
-
- // The spill slots allocated for live intervals. We ensure spill slots
- // are typed to avoid (1) doing moves and swaps between two different kinds
- // of registers, and (2) swapping between a single stack slot and a double
- // stack slot. This simplifies the parallel move resolver.
- ArenaVector<size_t> int_spill_slots_;
- ArenaVector<size_t> long_spill_slots_;
- ArenaVector<size_t> float_spill_slots_;
- ArenaVector<size_t> double_spill_slots_;
-
- // Spill slots allocated to catch phis. This category is special-cased because
- // (1) slots are allocated prior to linear scan and in reverse linear order,
- // (2) equivalent phis need to share slots despite having different types.
- size_t catch_phi_spill_slots_;
-
- // Instructions that need a safepoint.
- ArenaVector<HInstruction*> safepoints_;
-
- // True if processing core registers. False if processing floating
- // point registers.
- bool processing_core_registers_;
-
- // Number of registers for the current register kind (core or floating point).
- size_t number_of_registers_;
-
- // Temporary array, allocated ahead of time for simplicity.
- size_t* registers_array_;
-
- // Blocked registers, as decided by the code generator.
- bool* const blocked_core_registers_;
- bool* const blocked_fp_registers_;
-
- // Slots reserved for out arguments.
- size_t reserved_out_slots_;
-
- // The maximum live core registers at safepoints.
- size_t maximum_number_of_live_core_registers_;
-
- // The maximum live FP registers at safepoints.
- size_t maximum_number_of_live_fp_registers_;
-
- ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
- ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
-
- DISALLOW_COPY_AND_ASSIGN(RegisterAllocator);
};
} // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
new file mode 100644
index 0000000000..a21595fe03
--- /dev/null
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -0,0 +1,2105 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocator_graph_color.h"
+
+#include "code_generator.h"
+#include "register_allocation_resolver.h"
+#include "ssa_liveness_analysis.h"
+#include "thread-inl.h"
+
+namespace art {
+
+// Highest number of registers that we support for any platform. This can be used for std::bitset,
+// for example, which needs to know its size at compile time.
+static constexpr size_t kMaxNumRegs = 32;
+
+// The maximum number of graph coloring attempts before triggering a DCHECK.
+// This is meant to catch changes to the graph coloring algorithm that undermine its forward
+// progress guarantees. Forward progress for the algorithm means splitting live intervals on
+// every graph coloring attempt so that eventually the interference graph will be sparse enough
+// to color. The main threat to forward progress is trying to split short intervals which cannot be
+// split further; this could cause infinite looping because the interference graph would never
+// change. This is avoided by prioritizing short intervals before long ones, so that long
+// intervals are split when coloring fails.
+static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
+
+// We always want to avoid spilling inside loops.
+static constexpr size_t kLoopSpillWeightMultiplier = 10;
+
+// If we avoid moves in single jump blocks, we can avoid jumps to jumps.
+static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
+
+// We avoid moves in blocks that dominate the exit block, since these blocks will
+// be executed on every path through the method.
+static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
+
+enum class CoalesceKind {
+ kAdjacentSibling, // Prevents moves at interval split points.
+ kFixedOutputSibling, // Prevents moves from a fixed output location.
+ kFixedInput, // Prevents moves into a fixed input location.
+ kNonlinearControlFlow, // Prevents moves between blocks.
+ kPhi, // Prevents phi resolution moves.
+ kFirstInput, // Prevents a single input move.
+ kAnyInput, // May lead to better instruction selection / smaller encodings.
+};
+
+std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
+ return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
+}
+
+static size_t LoopDepthAt(HBasicBlock* block) {
+ HLoopInformation* loop_info = block->GetLoopInformation();
+ size_t depth = 0;
+ while (loop_info != nullptr) {
+ ++depth;
+ loop_info = loop_info->GetPreHeader()->GetLoopInformation();
+ }
+ return depth;
+}
+
+// Return the runtime cost of inserting a move instruction at the specified location.
+static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
+ HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
+ DCHECK(block != nullptr);
+ size_t cost = 1;
+ if (block->IsSingleJump()) {
+ cost *= kSingleJumpBlockWeightMultiplier;
+ }
+ if (block->Dominates(block->GetGraph()->GetExitBlock())) {
+ cost *= kDominatesExitBlockWeightMultiplier;
+ }
+ for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
+ cost *= kLoopSpillWeightMultiplier;
+ }
+ return cost;
+}
+
+// In general, we estimate coalesce priority by whether it will definitely avoid a move,
+// and by how likely it is to create an interference graph that's harder to color.
+static size_t ComputeCoalescePriority(CoalesceKind kind,
+ size_t position,
+ const SsaLivenessAnalysis& liveness) {
+ if (kind == CoalesceKind::kAnyInput) {
+ // This type of coalescing can affect instruction selection, but not moves, so we
+ // give it the lowest priority.
+ return 0;
+ } else {
+ return CostForMoveAt(position, liveness);
+ }
+}
+
+enum class CoalesceStage {
+ kWorklist, // Currently in the iterative coalescing worklist.
+ kActive, // Not in a worklist, but could be considered again during iterative coalescing.
+ kInactive, // No longer considered until last-chance coalescing.
+ kDefunct, // Either the two nodes interfere, or have already been coalesced.
+};
+
+std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
+ return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
+}
+
+// Represents a coalesce opportunity between two nodes.
+struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
+ CoalesceOpportunity(InterferenceNode* a,
+ InterferenceNode* b,
+ CoalesceKind kind,
+ size_t position,
+ const SsaLivenessAnalysis& liveness)
+ : node_a(a),
+ node_b(b),
+ stage(CoalesceStage::kWorklist),
+ priority(ComputeCoalescePriority(kind, position, liveness)) {}
+
+ // Compare two coalesce opportunities based on their priority.
+ // Return true if lhs has a lower priority than that of rhs.
+ static bool CmpPriority(const CoalesceOpportunity* lhs,
+ const CoalesceOpportunity* rhs) {
+ return lhs->priority < rhs->priority;
+ }
+
+ InterferenceNode* const node_a;
+ InterferenceNode* const node_b;
+
+ // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
+ // and whether it should still be considered.
+ CoalesceStage stage;
+
+ // The priority of this coalesce opportunity, based on heuristics.
+ const size_t priority;
+};
+
+enum class NodeStage {
+ kInitial, // Uninitialized.
+ kPrecolored, // Marks fixed nodes.
+ kSafepoint, // Marks safepoint nodes.
+ kPrunable, // Marks uncolored nodes in the interference graph.
+ kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers.
+ kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers.
+ kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers.
+ kPruned // Marks nodes already pruned from the interference graph.
+};
+
+std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
+ return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
+}
+
+// Returns the estimated cost of spilling a particular live interval.
+static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
+ if (interval->HasRegister()) {
+ // Intervals with a fixed register cannot be spilled.
+ return std::numeric_limits<float>::min();
+ }
+
+ size_t length = interval->GetLength();
+ if (length == 1) {
+ // Tiny intervals should have maximum priority, since they cannot be split any further.
+ return std::numeric_limits<float>::max();
+ }
+
+ size_t use_weight = 0;
+ if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
+ // Cost for spilling at a register definition point.
+ use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
+ }
+
+ UsePosition* use = interval->GetFirstUse();
+ while (use != nullptr && use->GetPosition() <= interval->GetStart()) {
+ // Skip uses before the start of this live interval.
+ use = use->GetNext();
+ }
+
+ while (use != nullptr && use->GetPosition() <= interval->GetEnd()) {
+ if (use->GetUser() != nullptr && use->RequiresRegister()) {
+ // Cost for spilling at a register use point.
+ use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness);
+ }
+ use = use->GetNext();
+ }
+
+ // We divide by the length of the interval because we want to prioritize
+ // short intervals; we do not benefit much if we split them further.
+ return static_cast<float>(use_weight) / static_cast<float>(length);
+}
+
+// Interference nodes make up the interference graph, which is the primary data structure in
+// graph coloring register allocation. Each node represents a single live interval, and contains
+// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
+// pre-colored nodes never contain outgoing edges (only incoming ones).
+//
+// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
+// but outgoing edges remain in order to later color the node based on the colors of its neighbors.
+//
+// Note that a pair interval is represented by a single node in the interference graph, which
+// essentially requires two colors. One consequence of this is that the degree of a node is not
+// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
+// number of colors with which a node could interfere. We model this by giving edges different
+// weights (1 or 2) to control how much it increases the degree of adjacent nodes.
+// For example, the edge between two single nodes will have weight 1. On the other hand,
+// the edge between a single node and a pair node will have weight 2. This is because the pair
+// node could block up to two colors for the single node, and because the single node could
+// block an entire two-register aligned slot for the pair node.
+// The degree is defined this way because we use it to decide whether a node is guaranteed a color,
+// and thus whether it is safe to prune it from the interference graph early on.
+class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
+ public:
+ InterferenceNode(ArenaAllocator* allocator,
+ LiveInterval* interval,
+ const SsaLivenessAnalysis& liveness)
+ : stage(NodeStage::kInitial),
+ interval_(interval),
+ adjacent_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ coalesce_opportunities_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
+ alias_(this),
+ spill_weight_(ComputeSpillWeight(interval, liveness)),
+ requires_color_(interval->RequiresRegister()),
+ needs_spill_slot_(false) {
+ DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
+ }
+
+ void AddInterference(InterferenceNode* other, bool guaranteed_not_interfering_yet) {
+ DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
+ DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
+ DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
+ DCHECK_NE(stage, NodeStage::kPruned);
+ DCHECK_NE(other->stage, NodeStage::kPruned);
+ if (guaranteed_not_interfering_yet) {
+ DCHECK(std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other)
+ == adjacent_nodes_.end());
+ adjacent_nodes_.push_back(other);
+ out_degree_ += EdgeWeightWith(other);
+ } else {
+ auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+ if (it == adjacent_nodes_.end()) {
+ adjacent_nodes_.push_back(other);
+ out_degree_ += EdgeWeightWith(other);
+ }
+ }
+ }
+
+ void RemoveInterference(InterferenceNode* other) {
+ DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
+ DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
+ auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+ if (it != adjacent_nodes_.end()) {
+ adjacent_nodes_.erase(it);
+ out_degree_ -= EdgeWeightWith(other);
+ }
+ }
+
+ bool ContainsInterference(InterferenceNode* other) const {
+ DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
+ DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
+ auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+ return it != adjacent_nodes_.end();
+ }
+
+ LiveInterval* GetInterval() const {
+ return interval_;
+ }
+
+ const ArenaVector<InterferenceNode*>& GetAdjacentNodes() const {
+ return adjacent_nodes_;
+ }
+
+ size_t GetOutDegree() const {
+ // Pre-colored nodes have infinite degree.
+ DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max());
+ return out_degree_;
+ }
+
+ void AddCoalesceOpportunity(CoalesceOpportunity* opportunity) {
+ coalesce_opportunities_.push_back(opportunity);
+ }
+
+ void ClearCoalesceOpportunities() {
+ coalesce_opportunities_.clear();
+ }
+
+ bool IsMoveRelated() const {
+ for (CoalesceOpportunity* opportunity : coalesce_opportunities_) {
+ if (opportunity->stage == CoalesceStage::kWorklist ||
+ opportunity->stage == CoalesceStage::kActive) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // Return whether this node already has a color.
+ // Used to find fixed nodes in the interference graph before coloring.
+ bool IsPrecolored() const {
+ return interval_->HasRegister();
+ }
+
+ bool IsPair() const {
+ return interval_->HasHighInterval();
+ }
+
+ void SetAlias(InterferenceNode* rep) {
+ DCHECK_NE(rep->stage, NodeStage::kPruned);
+ DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
+ alias_ = rep;
+ }
+
+ InterferenceNode* GetAlias() {
+ if (alias_ != this) {
+ // Recurse in order to flatten tree of alias pointers.
+ alias_ = alias_->GetAlias();
+ }
+ return alias_;
+ }
+
+ const ArenaVector<CoalesceOpportunity*>& GetCoalesceOpportunities() const {
+ return coalesce_opportunities_;
+ }
+
+ float GetSpillWeight() const {
+ return spill_weight_;
+ }
+
+ bool RequiresColor() const {
+ return requires_color_;
+ }
+
+ // We give extra weight to edges adjacent to pair nodes. See the general comment on the
+ // interference graph above.
+ size_t EdgeWeightWith(const InterferenceNode* other) const {
+ return (IsPair() || other->IsPair()) ? 2 : 1;
+ }
+
+ bool NeedsSpillSlot() const {
+ return needs_spill_slot_;
+ }
+
+ void SetNeedsSpillSlot() {
+ needs_spill_slot_ = true;
+ }
+
+ // The current stage of this node, indicating which worklist it belongs to.
+ NodeStage stage;
+
+ private:
+ // The live interval that this node represents.
+ LiveInterval* const interval_;
+
+ // All nodes interfering with this one.
+ // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
+ // set sizes that we encounter. Using a vector leads to much better performance.
+ ArenaVector<InterferenceNode*> adjacent_nodes_;
+
+ // Interference nodes that this node should be coalesced with to reduce moves.
+ ArenaVector<CoalesceOpportunity*> coalesce_opportunities_;
+
+ // The maximum number of colors with which this node could interfere. This could be more than
+ // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
+ // We use "out" degree because incoming edges come from nodes already pruned from the graph,
+ // and do not affect the coloring of this node.
+ // Pre-colored nodes are treated as having infinite degree.
+ size_t out_degree_;
+
+ // The node representing this node in the interference graph.
+ // Initially set to `this`, and only changed if this node is coalesced into another.
+ InterferenceNode* alias_;
+
+ // The cost of splitting and spilling this interval to the stack.
+ // Nodes with a higher spill weight should be prioritized when assigning registers.
+ // This is essentially based on use density and location; short intervals with many uses inside
+ // deeply nested loops have a high spill weight.
+ const float spill_weight_;
+
+ const bool requires_color_;
+
+ bool needs_spill_slot_;
+
+ DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
+};
+
+// The order in which we color nodes is important. To guarantee forward progress,
+// we prioritize intervals that require registers, and after that we prioritize
+// short intervals. That way, if we fail to color a node, it either won't require a
+// register, or it will be a long interval that can be split in order to make the
+// interference graph sparser.
+// To improve code quality, we prioritize intervals used frequently in deeply nested loops.
+// (This metric is secondary to the forward progress requirements above.)
+// TODO: May also want to consider:
+// - Constants (since they can be rematerialized)
+// - Allocated spill slots
+static bool HasGreaterNodePriority(const InterferenceNode* lhs,
+ const InterferenceNode* rhs) {
+ // (1) Prioritize the node that requires a color.
+ if (lhs->RequiresColor() != rhs->RequiresColor()) {
+ return lhs->RequiresColor();
+ }
+
+ // (2) Prioritize the interval that has a higher spill weight.
+ return lhs->GetSpillWeight() > rhs->GetSpillWeight();
+}
+
+// A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
+// and provides methods for each phase of the attempt.
+class ColoringIteration {
+ public:
+ ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
+ ArenaAllocator* allocator,
+ bool processing_core_regs,
+ size_t num_regs)
+ : register_allocator_(register_allocator),
+ allocator_(allocator),
+ processing_core_regs_(processing_core_regs),
+ num_regs_(num_regs),
+ interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
+ coalesce_worklist_(CoalesceOpportunity::CmpPriority,
+ allocator->Adapter(kArenaAllocRegisterAllocator)) {}
+
+ // Use the intervals collected from instructions to construct an
+ // interference graph mapping intervals to adjacency lists.
+ // Also, collect synthesized safepoint nodes, used to keep
+ // track of live intervals across safepoints.
+ // TODO: Should build safepoints elsewhere.
+ void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals,
+ const ArenaVector<InterferenceNode*>& physical_nodes,
+ ArenaVector<InterferenceNode*>* safepoints);
+
+ // Add coalesce opportunities to interference nodes.
+ void FindCoalesceOpportunities();
+
+ // Prune nodes from the interference graph to be colored later. Build
+ // a stack (pruned_nodes) containing these intervals in an order determined
+ // by various heuristics.
+ void PruneInterferenceGraph();
+
+ // Process pruned_intervals_ to color the interference graph, spilling when
+ // necessary. Returns true if successful. Else, some intervals have been
+ // split, and the interference graph should be rebuilt for another attempt.
+ bool ColorInterferenceGraph();
+
+ // Return prunable nodes.
+ // The register allocator will need to access prunable nodes after coloring
+ // in order to tell the code generator which registers have been assigned.
+ const ArenaVector<InterferenceNode*>& GetPrunableNodes() const {
+ return prunable_nodes_;
+ }
+
+ private:
+ // Create a coalesce opportunity between two nodes.
+ void CreateCoalesceOpportunity(InterferenceNode* a,
+ InterferenceNode* b,
+ CoalesceKind kind,
+ size_t position);
+
+ // Add an edge in the interference graph, if valid.
+ // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
+ // when possible.
+ void AddPotentialInterference(InterferenceNode* from,
+ InterferenceNode* to,
+ bool guaranteed_not_interfering_yet,
+ bool both_directions = true);
+
+ // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
+ // may be pruned from the interference graph.
+ void FreezeMoves(InterferenceNode* node);
+
+ // Prune a node from the interference graph, updating worklists if necessary.
+ void PruneNode(InterferenceNode* node);
+
+ // Add coalesce opportunities associated with this node to the coalesce worklist.
+ void EnableCoalesceOpportunities(InterferenceNode* node);
+
+ // If needed, from `node` from the freeze worklist to the simplify worklist.
+ void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
+
+ // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
+ bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
+
+ // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
+ bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
+
+ void Coalesce(CoalesceOpportunity* opportunity);
+
+ // Merge `from` into `into` in the interference graph.
+ void Combine(InterferenceNode* from, InterferenceNode* into);
+
+ // A reference to the register allocator instance,
+ // needed to split intervals and assign spill slots.
+ RegisterAllocatorGraphColor* register_allocator_;
+
+ // An arena allocator used for a single graph coloring attempt.
+ ArenaAllocator* allocator_;
+
+ const bool processing_core_regs_;
+
+ const size_t num_regs_;
+
+ // A map from live intervals to interference nodes.
+ ArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
+
+ // Uncolored nodes that should be pruned from the interference graph.
+ ArenaVector<InterferenceNode*> prunable_nodes_;
+
+ // A stack of nodes pruned from the interference graph, waiting to be pruned.
+ ArenaStdStack<InterferenceNode*> pruned_nodes_;
+
+ // A queue containing low degree, non-move-related nodes that can pruned immediately.
+ ArenaDeque<InterferenceNode*> simplify_worklist_;
+
+ // A queue containing low degree, move-related nodes.
+ ArenaDeque<InterferenceNode*> freeze_worklist_;
+
+ // A queue containing high degree nodes.
+ // If we have to prune from the spill worklist, we cannot guarantee
+ // the pruned node a color, so we order the worklist by priority.
+ ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
+
+ // A queue containing coalesce opportunities.
+ // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
+ // inside of loops) are more important than others.
+ ArenaPriorityQueue<CoalesceOpportunity*,
+ decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
+
+ DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
+};
+
+static bool IsCoreInterval(LiveInterval* interval) {
+ return !Primitive::IsFloatingPointType(interval->GetType());
+}
+
+static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
+ return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
+}
+
+RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& liveness,
+ bool iterative_move_coalescing)
+ : RegisterAllocator(allocator, codegen, liveness),
+ iterative_move_coalescing_(iterative_move_coalescing),
+ core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ num_int_spill_slots_(0),
+ num_double_spill_slots_(0),
+ num_float_spill_slots_(0),
+ num_long_spill_slots_(0),
+ catch_phi_spill_slot_counter_(0),
+ reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
+ reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()),
+ number_of_globally_blocked_core_regs_(0),
+ number_of_globally_blocked_fp_regs_(0),
+ max_safepoint_live_core_regs_(0),
+ max_safepoint_live_fp_regs_(0) {
+ // Before we ask for blocked registers, set them up in the code generator.
+ codegen->SetupBlockedRegisters();
+
+ // Initialize physical core register live intervals and blocked registers.
+ // This includes globally blocked registers, such as the stack pointer.
+ physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
+ for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+ LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt);
+ physical_core_nodes_[i] =
+ new (allocator_) InterferenceNode(allocator_, interval, liveness);
+ physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
+ core_intervals_.push_back(interval);
+ if (codegen_->IsBlockedCoreRegister(i)) {
+ ++number_of_globally_blocked_core_regs_;
+ interval->AddRange(0, liveness.GetMaxLifetimePosition());
+ }
+ }
+ // Initialize physical floating point register live intervals and blocked registers.
+ physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
+ for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+ LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat);
+ physical_fp_nodes_[i] =
+ new (allocator_) InterferenceNode(allocator_, interval, liveness);
+ physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
+ fp_intervals_.push_back(interval);
+ if (codegen_->IsBlockedFloatingPointRegister(i)) {
+ ++number_of_globally_blocked_fp_regs_;
+ interval->AddRange(0, liveness.GetMaxLifetimePosition());
+ }
+ }
+}
+
+void RegisterAllocatorGraphColor::AllocateRegisters() {
+ // (1) Collect and prepare live intervals.
+ ProcessInstructions();
+
+ for (bool processing_core_regs : {true, false}) {
+ ArenaVector<LiveInterval*>& intervals = processing_core_regs
+ ? core_intervals_
+ : fp_intervals_;
+ size_t num_registers = processing_core_regs
+ ? codegen_->GetNumberOfCoreRegisters()
+ : codegen_->GetNumberOfFloatingPointRegisters();
+
+ size_t attempt = 0;
+ while (true) {
+ ++attempt;
+ DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
+ << "Exceeded debug max graph coloring register allocation attempts. "
+ << "This could indicate that the register allocator is not making forward progress, "
+ << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
+ << "should be prioritized over long ones, because they cannot be split further.)";
+
+ // Many data structures are cleared between graph coloring attempts, so we reduce
+ // total memory usage by using a new arena allocator for each attempt.
+ ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool());
+ ColoringIteration iteration(this,
+ &coloring_attempt_allocator,
+ processing_core_regs,
+ num_registers);
+
+ // (2) Build the interference graph. Also gather safepoints.
+ ArenaVector<InterferenceNode*> safepoints(
+ coloring_attempt_allocator.Adapter(kArenaAllocRegisterAllocator));
+ ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
+ ? physical_core_nodes_
+ : physical_fp_nodes_;
+ iteration.BuildInterferenceGraph(intervals, physical_nodes, &safepoints);
+
+ // (3) Add coalesce opportunities.
+ // If we have tried coloring the graph a suspiciously high number of times, give
+ // up on move coalescing, just in case the coalescing heuristics are not conservative.
+ // (This situation will be caught if DCHECKs are turned on.)
+ if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
+ iteration.FindCoalesceOpportunities();
+ }
+
+ // (4) Prune all uncolored nodes from interference graph.
+ iteration.PruneInterferenceGraph();
+
+ // (5) Color pruned nodes based on interferences.
+ bool successful = iteration.ColorInterferenceGraph();
+
+ // We manually clear coalesce opportunities for physical nodes,
+ // since they persist across coloring attempts.
+ for (InterferenceNode* node : physical_core_nodes_) {
+ node->ClearCoalesceOpportunities();
+ }
+ for (InterferenceNode* node : physical_fp_nodes_) {
+ node->ClearCoalesceOpportunities();
+ }
+
+ if (successful) {
+ // Assign spill slots.
+ AllocateSpillSlots(iteration.GetPrunableNodes());
+
+ // Compute the maximum number of live registers across safepoints.
+ // Notice that we do not count globally blocked registers, such as the stack pointer.
+ if (safepoints.size() > 0) {
+ size_t max_safepoint_live_regs = ComputeMaxSafepointLiveRegisters(safepoints);
+ if (processing_core_regs) {
+ max_safepoint_live_core_regs_ =
+ max_safepoint_live_regs - number_of_globally_blocked_core_regs_;
+ } else {
+ max_safepoint_live_fp_regs_=
+ max_safepoint_live_regs - number_of_globally_blocked_fp_regs_;
+ }
+ }
+
+ // Tell the code generator which registers were allocated.
+ // We only look at prunable_nodes because we already told the code generator about
+ // fixed intervals while processing instructions. We also ignore the fixed intervals
+ // placed at the top of catch blocks.
+ for (InterferenceNode* node : iteration.GetPrunableNodes()) {
+ LiveInterval* interval = node->GetInterval();
+ if (interval->HasRegister()) {
+ Location low_reg = processing_core_regs
+ ? Location::RegisterLocation(interval->GetRegister())
+ : Location::FpuRegisterLocation(interval->GetRegister());
+ codegen_->AddAllocatedRegister(low_reg);
+ if (interval->HasHighInterval()) {
+ LiveInterval* high = interval->GetHighInterval();
+ DCHECK(high->HasRegister());
+ Location high_reg = processing_core_regs
+ ? Location::RegisterLocation(high->GetRegister())
+ : Location::FpuRegisterLocation(high->GetRegister());
+ codegen_->AddAllocatedRegister(high_reg);
+ }
+ } else {
+ DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister());
+ }
+ }
+
+ break;
+ }
+ } // while unsuccessful
+ } // for processing_core_instructions
+
+ // (6) Resolve locations and deconstruct SSA form.
+ RegisterAllocationResolver(allocator_, codegen_, liveness_)
+ .Resolve(max_safepoint_live_core_regs_,
+ max_safepoint_live_fp_regs_,
+ reserved_art_method_slots_ + reserved_out_slots_,
+ num_int_spill_slots_,
+ num_long_spill_slots_,
+ num_float_spill_slots_,
+ num_double_spill_slots_,
+ catch_phi_spill_slot_counter_,
+ temp_intervals_);
+
+ if (kIsDebugBuild) {
+ Validate(/*log_fatal_on_failure*/ true);
+ }
+}
+
+bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
+ for (bool processing_core_regs : {true, false}) {
+ ArenaVector<LiveInterval*> intervals(
+ allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
+ for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
+ HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+ LiveInterval* interval = instruction->GetLiveInterval();
+ if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
+ intervals.push_back(instruction->GetLiveInterval());
+ }
+ }
+
+ ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
+ ? physical_core_nodes_
+ : physical_fp_nodes_;
+ for (InterferenceNode* fixed : physical_nodes) {
+ LiveInterval* interval = fixed->GetInterval();
+ if (interval->GetFirstRange() != nullptr) {
+ // Ideally we would check fixed ranges as well, but currently there are times when
+ // two fixed intervals for the same register will overlap. For example, a fixed input
+ // and a fixed output may sometimes share the same register, in which there will be two
+ // fixed intervals for the same place.
+ }
+ }
+
+ for (LiveInterval* temp : temp_intervals_) {
+ if (IsCoreInterval(temp) == processing_core_regs) {
+ intervals.push_back(temp);
+ }
+ }
+
+ size_t spill_slots = num_int_spill_slots_
+ + num_long_spill_slots_
+ + num_float_spill_slots_
+ + num_double_spill_slots_
+ + catch_phi_spill_slot_counter_;
+ bool ok = ValidateIntervals(intervals,
+ spill_slots,
+ reserved_art_method_slots_ + reserved_out_slots_,
+ *codegen_,
+ allocator_,
+ processing_core_regs,
+ log_fatal_on_failure);
+ if (!ok) {
+ return false;
+ }
+ } // for processing_core_regs
+
+ return true;
+}
+
+void RegisterAllocatorGraphColor::ProcessInstructions() {
+ for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+
+ // Note that we currently depend on this ordering, since some helper
+ // code is designed for linear scan register allocation.
+ for (HBackwardInstructionIterator instr_it(block->GetInstructions());
+ !instr_it.Done();
+ instr_it.Advance()) {
+ ProcessInstruction(instr_it.Current());
+ }
+
+ for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ ProcessInstruction(phi_it.Current());
+ }
+
+ if (block->IsCatchBlock()
+ || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+ // By blocking all registers at the top of each catch block or irreducible loop, we force
+ // intervals belonging to the live-in set of the catch/header block to be spilled.
+ // TODO(ngeoffray): Phis in this block could be allocated in register.
+ size_t position = block->GetLifetimeStart();
+ BlockRegisters(position, position + 1);
+ }
+ }
+}
+
+void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ if (locations == nullptr) {
+ return;
+ }
+ if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) {
+ // We do this here because we do not want the suspend check to artificially
+ // create live registers.
+ DCHECK(instruction->IsSuspendCheckEntry());
+ DCHECK_EQ(locations->GetTempCount(), 0u);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+
+ CheckForTempLiveIntervals(instruction);
+ CheckForSafepoint(instruction);
+ if (instruction->GetLocations()->WillCall()) {
+ // If a call will happen, create fixed intervals for caller-save registers.
+ // TODO: Note that it may be beneficial to later split intervals at this point,
+ // so that we allow last-minute moves from a caller-save register
+ // to a callee-save register.
+ BlockRegisters(instruction->GetLifetimePosition(),
+ instruction->GetLifetimePosition() + 1,
+ /*caller_save_only*/ true);
+ }
+ CheckForFixedInputs(instruction);
+
+ LiveInterval* interval = instruction->GetLiveInterval();
+ if (interval == nullptr) {
+ // Instructions lacking a valid output location do not have a live interval.
+ DCHECK(!locations->Out().IsValid());
+ return;
+ }
+
+ // Low intervals act as representatives for their corresponding high interval.
+ DCHECK(!interval->IsHighInterval());
+ if (codegen_->NeedsTwoRegisters(interval->GetType())) {
+ interval->AddHighInterval();
+ }
+ AddSafepointsFor(instruction);
+ CheckForFixedOutput(instruction);
+ AllocateSpillSlotForCatchPhi(instruction);
+
+ ArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
+ ? core_intervals_
+ : fp_intervals_;
+ if (interval->HasSpillSlot() || instruction->IsConstant()) {
+ // Note that if an interval already has a spill slot, then its value currently resides
+ // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
+ // register use. This is also true for constants, which can be materialized at any point.
+ size_t first_register_use = interval->FirstRegisterUse();
+ if (first_register_use != kNoLifetime) {
+ LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
+ intervals.push_back(split);
+ } else {
+ // We won't allocate a register for this value.
+ }
+ } else {
+ intervals.push_back(interval);
+ }
+}
+
+void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
+ // We simply block physical registers where necessary.
+ // TODO: Ideally we would coalesce the physical register with the register
+ // allocated to the input value, but this can be tricky if, e.g., there
+ // could be multiple physical register uses of the same value at the
+ // same instruction. Furthermore, there's currently no distinction between
+ // fixed inputs to a call (which will be clobbered) and other fixed inputs (which
+ // may not be clobbered).
+ LocationSummary* locations = instruction->GetLocations();
+ size_t position = instruction->GetLifetimePosition();
+ for (size_t i = 0; i < locations->GetInputCount(); ++i) {
+ Location input = locations->InAt(i);
+ if (input.IsRegister() || input.IsFpuRegister()) {
+ BlockRegister(input, position, position + 1);
+ codegen_->AddAllocatedRegister(input);
+ } else if (input.IsPair()) {
+ BlockRegister(input.ToLow(), position, position + 1);
+ BlockRegister(input.ToHigh(), position, position + 1);
+ codegen_->AddAllocatedRegister(input.ToLow());
+ codegen_->AddAllocatedRegister(input.ToHigh());
+ }
+ }
+}
+
+void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
+ // If an instruction has a fixed output location, we give the live interval a register and then
+ // proactively split it just after the definition point to avoid creating too many interferences
+ // with a fixed node.
+ LiveInterval* interval = instruction->GetLiveInterval();
+ Location out = interval->GetDefinedBy()->GetLocations()->Out();
+ size_t position = instruction->GetLifetimePosition();
+ DCHECK_GE(interval->GetEnd() - position, 2u);
+
+ if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
+ out = instruction->GetLocations()->InAt(0);
+ }
+
+ if (out.IsRegister() || out.IsFpuRegister()) {
+ interval->SetRegister(out.reg());
+ codegen_->AddAllocatedRegister(out);
+ Split(interval, position + 1);
+ } else if (out.IsPair()) {
+ interval->SetRegister(out.low());
+ interval->GetHighInterval()->SetRegister(out.high());
+ codegen_->AddAllocatedRegister(out.ToLow());
+ codegen_->AddAllocatedRegister(out.ToHigh());
+ Split(interval, position + 1);
+ } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
+ interval->SetSpillSlot(out.GetStackIndex());
+ } else {
+ DCHECK(out.IsUnallocated() || out.IsConstant());
+ }
+}
+
+void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
+ LiveInterval* interval = instruction->GetLiveInterval();
+ for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
+ HInstruction* safepoint = safepoints_[safepoint_index - 1u];
+ size_t safepoint_position = safepoint->GetLifetimePosition();
+
+ // Test that safepoints_ are ordered in the optimal way.
+ DCHECK(safepoint_index == safepoints_.size() ||
+ safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
+
+ if (safepoint_position == interval->GetStart()) {
+ // The safepoint is for this instruction, so the location of the instruction
+ // does not need to be saved.
+ DCHECK_EQ(safepoint_index, safepoints_.size());
+ DCHECK_EQ(safepoint, instruction);
+ continue;
+ } else if (interval->IsDeadAt(safepoint_position)) {
+ break;
+ } else if (!interval->Covers(safepoint_position)) {
+ // Hole in the interval.
+ continue;
+ }
+ interval->AddSafepoint(safepoint);
+ }
+}
+
+void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ size_t position = instruction->GetLifetimePosition();
+ for (size_t i = 0; i < locations->GetTempCount(); ++i) {
+ Location temp = locations->GetTemp(i);
+ if (temp.IsRegister() || temp.IsFpuRegister()) {
+ BlockRegister(temp, position, position + 1);
+ codegen_->AddAllocatedRegister(temp);
+ } else {
+ DCHECK(temp.IsUnallocated());
+ switch (temp.GetPolicy()) {
+ case Location::kRequiresRegister: {
+ LiveInterval* interval =
+ LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
+ interval->AddTempUse(instruction, i);
+ core_intervals_.push_back(interval);
+ temp_intervals_.push_back(interval);
+ break;
+ }
+
+ case Location::kRequiresFpuRegister: {
+ LiveInterval* interval =
+ LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
+ interval->AddTempUse(instruction, i);
+ fp_intervals_.push_back(interval);
+ temp_intervals_.push_back(interval);
+ if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+ interval->AddHighInterval(/*is_temp*/ true);
+ temp_intervals_.push_back(interval->GetHighInterval());
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected policy for temporary location "
+ << temp.GetPolicy();
+ }
+ }
+ }
+}
+
+void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ size_t position = instruction->GetLifetimePosition();
+
+ if (locations->NeedsSafepoint()) {
+ safepoints_.push_back(instruction);
+ if (locations->OnlyCallsOnSlowPath()) {
+ // We add a synthesized range at this position to record the live registers
+ // at this position. Ideally, we could just update the safepoints when locations
+ // are updated, but we currently need to know the full stack size before updating
+ // locations (because of parameters and the fact that we don't have a frame pointer).
+ // And knowing the full stack size requires to know the maximum number of live
+ // registers at calls in slow paths.
+ // By adding the following interval in the algorithm, we can compute this
+ // maximum before updating locations.
+ LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
+ interval->AddRange(position, position + 1);
+ core_intervals_.push_back(interval);
+ fp_intervals_.push_back(interval);
+ }
+ }
+}
+
+LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
+ if (interval->GetStart() < position && position < interval->GetEnd()) {
+ return Split(interval, position);
+ } else {
+ return interval;
+ }
+}
+
+void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
+ DCHECK(!interval->IsHighInterval());
+
+ // Split just after a register definition.
+ if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
+ interval = TrySplit(interval, interval->GetStart() + 1);
+ }
+
+ UsePosition* use = interval->GetFirstUse();
+ while (use != nullptr && use->GetPosition() < interval->GetStart()) {
+ use = use->GetNext();
+ }
+
+ // Split around register uses.
+ size_t end = interval->GetEnd();
+ while (use != nullptr && use->GetPosition() <= end) {
+ if (use->RequiresRegister()) {
+ size_t position = use->GetPosition();
+ interval = TrySplit(interval, position - 1);
+ if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
+ // If we are at the very end of a basic block, we cannot split right
+ // at the use. Split just after instead.
+ interval = TrySplit(interval, position + 1);
+ } else {
+ interval = TrySplit(interval, position);
+ }
+ }
+ use = use->GetNext();
+ }
+}
+
+void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
+ if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+ HPhi* phi = instruction->AsPhi();
+ LiveInterval* interval = phi->GetLiveInterval();
+
+ HInstruction* previous_phi = phi->GetPrevious();
+ DCHECK(previous_phi == nullptr ||
+ previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+ << "Phis expected to be sorted by vreg number, "
+ << "so that equivalent phis are adjacent.";
+
+ if (phi->IsVRegEquivalentOf(previous_phi)) {
+ // Assign the same spill slot.
+ DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
+ interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
+ } else {
+ interval->SetSpillSlot(catch_phi_spill_slot_counter_);
+ catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ }
+ }
+}
+
+void RegisterAllocatorGraphColor::BlockRegister(Location location,
+ size_t start,
+ size_t end) {
+ DCHECK(location.IsRegister() || location.IsFpuRegister());
+ int reg = location.reg();
+ LiveInterval* interval = location.IsRegister()
+ ? physical_core_nodes_[reg]->GetInterval()
+ : physical_fp_nodes_[reg]->GetInterval();
+ DCHECK(interval->GetRegister() == reg);
+ bool blocked_by_codegen = location.IsRegister()
+ ? codegen_->IsBlockedCoreRegister(reg)
+ : codegen_->IsBlockedFloatingPointRegister(reg);
+ if (blocked_by_codegen) {
+ // We've already blocked this register for the entire method. (And adding a
+ // range inside another range violates the preconditions of AddRange).
+ } else {
+ interval->AddRange(start, end);
+ }
+}
+
+void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+ for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+ if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
+ BlockRegister(Location::RegisterLocation(i), start, end);
+ }
+ }
+ for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+ if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
+ BlockRegister(Location::FpuRegisterLocation(i), start, end);
+ }
+ }
+}
+
+void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
+ InterferenceNode* to,
+ bool guaranteed_not_interfering_yet,
+ bool both_directions) {
+ if (from->IsPrecolored()) {
+ // We save space by ignoring outgoing edges from fixed nodes.
+ } else if (to->GetInterval()->IsSlowPathSafepoint()) {
+ // Safepoint intervals are only there to count max live registers,
+ // so no need to give them incoming interference edges.
+ // This is also necessary for correctness, because we don't want nodes
+ // to remove themselves from safepoint adjacency sets when they're pruned.
+ } else if (to->IsPrecolored()) {
+ // It is important that only a single node represents a given fixed register in the
+ // interference graph. We retrieve that node here.
+ const ArenaVector<InterferenceNode*>& physical_nodes = to->GetInterval()->IsFloatingPoint()
+ ? register_allocator_->physical_fp_nodes_
+ : register_allocator_->physical_core_nodes_;
+ InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
+ from->AddInterference(physical_node, /*guaranteed_not_interfering_yet*/ false);
+ DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
+ DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
+
+ // If a node interferes with a fixed pair node, the weight of the edge may
+ // be inaccurate after using the alias of the pair node, because the alias of the pair node
+ // is a singular node.
+ // We could make special pair fixed nodes, but that ends up being too conservative because
+ // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
+ // three rather than two.
+ // Instead, we explicitly add an interference with the high node of the fixed pair node.
+ // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
+ // can be unaligned on x86 complicates things.
+ if (to->IsPair()) {
+ InterferenceNode* high_node =
+ physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
+ DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
+ high_node->GetInterval()->GetRegister());
+ from->AddInterference(high_node, /*guaranteed_not_interfering_yet*/ false);
+ }
+ } else {
+ // Standard interference between two uncolored nodes.
+ from->AddInterference(to, guaranteed_not_interfering_yet);
+ }
+
+ if (both_directions) {
+ AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
+ }
+}
+
+// Returns true if `in_node` represents an input interval of `out_node`, and the output interval
+// is allowed to have the same register as the input interval.
+// TODO: Ideally we should just produce correct intervals in liveness analysis.
+// We would need to refactor the current live interval layout to do so, which is
+// no small task.
+static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
+ LiveInterval* output_interval = out_node->GetInterval();
+ HInstruction* defined_by = output_interval->GetDefinedBy();
+ if (defined_by == nullptr) {
+ // This must not be a definition point.
+ return false;
+ }
+
+ LocationSummary* locations = defined_by->GetLocations();
+ if (locations->OutputCanOverlapWithInputs()) {
+ // This instruction does not allow the output to reuse a register from an input.
+ return false;
+ }
+
+ LiveInterval* input_interval = in_node->GetInterval();
+ LiveInterval* next_sibling = input_interval->GetNextSibling();
+ size_t def_position = defined_by->GetLifetimePosition();
+ size_t use_position = def_position + 1;
+ if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
+ // The next sibling starts at the use position, so reusing the input register in the output
+ // would clobber the input before it's moved into the sibling interval location.
+ return false;
+ }
+
+ if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
+ // The input interval is live after the use position.
+ return false;
+ }
+
+ HInputsRef inputs = defined_by->GetInputs();
+ for (size_t i = 0; i < inputs.size(); ++i) {
+ if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
+ DCHECK(input_interval->SameRegisterKind(*output_interval));
+ return true;
+ }
+ }
+
+ // The input interval was not an input for this instruction.
+ return false;
+}
+
+void ColoringIteration::BuildInterferenceGraph(
+ const ArenaVector<LiveInterval*>& intervals,
+ const ArenaVector<InterferenceNode*>& physical_nodes,
+ ArenaVector<InterferenceNode*>* safepoints) {
+ DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty());
+ // Build the interference graph efficiently by ordering range endpoints
+ // by position and doing a linear sweep to find interferences. (That is, we
+ // jump from endpoint to endpoint, maintaining a set of intervals live at each
+ // point. If two nodes are ever in the live set at the same time, then they
+ // interfere with each other.)
+ //
+ // We order by both position and (secondarily) by whether the endpoint
+ // begins or ends a range; we want to process range endings before range
+ // beginnings at the same position because they should not conflict.
+ //
+ // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
+ // Tuple contents: (position, is_range_beginning, node).
+ ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
+ allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ // We reserve plenty of space to avoid excessive copying.
+ range_endpoints.reserve(4 * prunable_nodes_.size());
+
+ for (LiveInterval* parent : intervals) {
+ for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
+ LiveRange* range = sibling->GetFirstRange();
+ if (range != nullptr) {
+ InterferenceNode* node = new (allocator_) InterferenceNode(
+ allocator_, sibling, register_allocator_->liveness_);
+ interval_node_map_.Insert(std::make_pair(sibling, node));
+
+ if (sibling->HasRegister()) {
+ // Fixed nodes should alias the canonical node for the corresponding register.
+ node->stage = NodeStage::kPrecolored;
+ InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
+ node->SetAlias(physical_node);
+ DCHECK_EQ(node->GetInterval()->GetRegister(),
+ physical_node->GetInterval()->GetRegister());
+ } else if (sibling->IsSlowPathSafepoint()) {
+ // Safepoint intervals are synthesized to count max live registers.
+ // They will be processed separately after coloring.
+ node->stage = NodeStage::kSafepoint;
+ safepoints->push_back(node);
+ } else {
+ node->stage = NodeStage::kPrunable;
+ prunable_nodes_.push_back(node);
+ }
+
+ while (range != nullptr) {
+ range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
+ range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
+ range = range->GetNext();
+ }
+ }
+ }
+ }
+
+ // Sort the endpoints.
+ // We explicitly ignore the third entry of each tuple (the node pointer) in order
+ // to maintain determinism.
+ std::sort(range_endpoints.begin(), range_endpoints.end(),
+ [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
+ const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
+ return std::tie(std::get<0>(lhs), std::get<1>(lhs))
+ < std::tie(std::get<0>(rhs), std::get<1>(rhs));
+ });
+
+ // Nodes live at the current position in the linear sweep.
+ ArenaVector<InterferenceNode*> live(
+ allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
+ // live set. When we encounter the end of a range, we remove the corresponding node
+ // from the live set. Nodes interfere if they are in the live set at the same time.
+ for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
+ bool is_range_beginning;
+ InterferenceNode* node;
+ size_t position;
+ // Extract information from the tuple, including the node this tuple represents.
+ std::tie(position, is_range_beginning, node) = *it;
+
+ if (is_range_beginning) {
+ bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
+ for (InterferenceNode* conflicting : live) {
+ DCHECK_NE(node, conflicting);
+ if (CheckInputOutputCanOverlap(conflicting, node)) {
+ // We do not add an interference, because the instruction represented by `node` allows
+ // its output to share a register with an input, represented here by `conflicting`.
+ } else {
+ AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
+ }
+ }
+ DCHECK(std::find(live.begin(), live.end(), node) == live.end());
+ live.push_back(node);
+ } else {
+ // End of range.
+ auto live_it = std::find(live.begin(), live.end(), node);
+ DCHECK(live_it != live.end());
+ live.erase(live_it);
+ }
+ }
+ DCHECK(live.empty());
+}
+
+void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
+ InterferenceNode* b,
+ CoalesceKind kind,
+ size_t position) {
+ DCHECK_EQ(a->IsPair(), b->IsPair())
+ << "Nodes of different memory widths should never be coalesced";
+ CoalesceOpportunity* opportunity =
+ new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
+ a->AddCoalesceOpportunity(opportunity);
+ b->AddCoalesceOpportunity(opportunity);
+ coalesce_worklist_.push(opportunity);
+}
+
+// When looking for coalesce opportunities, we use the interval_node_map_ to find the node
+// corresponding to an interval. Note that not all intervals are in this map, notably the parents
+// of constants and stack arguments. (However, these interval should not be involved in coalesce
+// opportunities anyway, because they're not going to be in registers.)
+void ColoringIteration::FindCoalesceOpportunities() {
+ DCHECK(coalesce_worklist_.empty());
+
+ for (InterferenceNode* node : prunable_nodes_) {
+ LiveInterval* interval = node->GetInterval();
+
+ // Coalesce siblings.
+ LiveInterval* next_sibling = interval->GetNextSibling();
+ if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
+ auto it = interval_node_map_.Find(next_sibling);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* sibling_node = it->second;
+ CreateCoalesceOpportunity(node,
+ sibling_node,
+ CoalesceKind::kAdjacentSibling,
+ interval->GetEnd());
+ }
+ }
+
+ // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
+ LiveInterval* parent = interval->GetParent();
+ if (parent->HasRegister()
+ && parent->GetNextSibling() == interval
+ && parent->GetEnd() == interval->GetStart()) {
+ auto it = interval_node_map_.Find(parent);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* parent_node = it->second;
+ CreateCoalesceOpportunity(node,
+ parent_node,
+ CoalesceKind::kFixedOutputSibling,
+ parent->GetEnd());
+ }
+ }
+
+ // Try to prevent moves across blocks.
+ // Note that this does not lead to many succeeding coalesce attempts, so could be removed
+ // if found to add to compile time.
+ const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
+ if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
+ // If the start of this interval is at a block boundary, we look at the
+ // location of the interval in blocks preceding the block this interval
+ // starts at. This can avoid a move between the two blocks.
+ HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
+ for (HBasicBlock* predecessor : block->GetPredecessors()) {
+ size_t position = predecessor->GetLifetimeEnd() - 1;
+ LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
+ if (existing != nullptr) {
+ auto it = interval_node_map_.Find(existing);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* existing_node = it->second;
+ CreateCoalesceOpportunity(node,
+ existing_node,
+ CoalesceKind::kNonlinearControlFlow,
+ position);
+ }
+ }
+ }
+ }
+
+ // Coalesce phi inputs with the corresponding output.
+ HInstruction* defined_by = interval->GetDefinedBy();
+ if (defined_by != nullptr && defined_by->IsPhi()) {
+ const ArenaVector<HBasicBlock*>& predecessors = defined_by->GetBlock()->GetPredecessors();
+ HInputsRef inputs = defined_by->GetInputs();
+
+ for (size_t i = 0, e = inputs.size(); i < e; ++i) {
+ // We want the sibling at the end of the appropriate predecessor block.
+ size_t position = predecessors[i]->GetLifetimeEnd() - 1;
+ LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
+
+ auto it = interval_node_map_.Find(input_interval);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* input_node = it->second;
+ CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
+ }
+ }
+ }
+
+ // Coalesce output with first input when policy is kSameAsFirstInput.
+ if (defined_by != nullptr) {
+ Location out = defined_by->GetLocations()->Out();
+ if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
+ LiveInterval* input_interval
+ = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
+ // TODO: Could we consider lifetime holes here?
+ if (input_interval->GetEnd() == interval->GetStart()) {
+ auto it = interval_node_map_.Find(input_interval);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* input_node = it->second;
+ CreateCoalesceOpportunity(node,
+ input_node,
+ CoalesceKind::kFirstInput,
+ interval->GetStart());
+ }
+ }
+ }
+ }
+
+ // An interval that starts an instruction (that is, it is not split), may
+ // re-use the registers used by the inputs of that instruction, based on the
+ // location summary.
+ if (defined_by != nullptr) {
+ DCHECK(!interval->IsSplit());
+ LocationSummary* locations = defined_by->GetLocations();
+ if (!locations->OutputCanOverlapWithInputs()) {
+ HInputsRef inputs = defined_by->GetInputs();
+ for (size_t i = 0; i < inputs.size(); ++i) {
+ size_t def_point = defined_by->GetLifetimePosition();
+ // TODO: Getting the sibling at the def_point might not be quite what we want
+ // for fixed inputs, since the use will be *at* the def_point rather than after.
+ LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
+ if (input_interval != nullptr &&
+ input_interval->HasHighInterval() == interval->HasHighInterval()) {
+ auto it = interval_node_map_.Find(input_interval);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* input_node = it->second;
+ CreateCoalesceOpportunity(node,
+ input_node,
+ CoalesceKind::kAnyInput,
+ interval->GetStart());
+ }
+ }
+ }
+ }
+ }
+
+ // Try to prevent moves into fixed input locations.
+ UsePosition* use = interval->GetFirstUse();
+ for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) {
+ // Skip past uses before the start of this interval.
+ }
+ for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) {
+ HInstruction* user = use->GetUser();
+ if (user == nullptr) {
+ // User may be null for certain intervals, such as temp intervals.
+ continue;
+ }
+ LocationSummary* locations = user->GetLocations();
+ Location input = locations->InAt(use->GetInputIndex());
+ if (input.IsRegister() || input.IsFpuRegister()) {
+ // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
+ // is currently not supported.
+ InterferenceNode* fixed_node = input.IsRegister()
+ ? register_allocator_->physical_core_nodes_[input.reg()]
+ : register_allocator_->physical_fp_nodes_[input.reg()];
+ CreateCoalesceOpportunity(node,
+ fixed_node,
+ CoalesceKind::kFixedInput,
+ user->GetLifetimePosition());
+ }
+ }
+ } // for node in prunable_nodes
+}
+
+static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
+ return node->GetOutDegree() < num_regs;
+}
+
+static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
+ return !IsLowDegreeNode(node, num_regs);
+}
+
+void ColoringIteration::PruneInterferenceGraph() {
+ DCHECK(pruned_nodes_.empty()
+ && simplify_worklist_.empty()
+ && freeze_worklist_.empty()
+ && spill_worklist_.empty());
+ // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
+ // and all others as high degree nodes. The distinction is important: low degree nodes are
+ // guaranteed a color, while high degree nodes are not.
+
+ // Build worklists. Note that the coalesce worklist has already been
+ // filled by FindCoalesceOpportunities().
+ for (InterferenceNode* node : prunable_nodes_) {
+ DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
+ DCHECK(!node->GetInterval()->IsSlowPathSafepoint()) << "Safepoint nodes should never be pruned";
+ if (IsLowDegreeNode(node, num_regs_)) {
+ if (node->GetCoalesceOpportunities().empty()) {
+ // Simplify Worklist.
+ node->stage = NodeStage::kSimplifyWorklist;
+ simplify_worklist_.push_back(node);
+ } else {
+ // Freeze Worklist.
+ node->stage = NodeStage::kFreezeWorklist;
+ freeze_worklist_.push_back(node);
+ }
+ } else {
+ // Spill worklist.
+ node->stage = NodeStage::kSpillWorklist;
+ spill_worklist_.push(node);
+ }
+ }
+
+ // Prune graph.
+ // Note that we do not remove a node from its current worklist if it moves to another, so it may
+ // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
+ while (true) {
+ if (!simplify_worklist_.empty()) {
+ // Prune low-degree nodes.
+ // TODO: pop_back() should work as well, but it didn't; we get a
+ // failed check while pruning. We should look into this.
+ InterferenceNode* node = simplify_worklist_.front();
+ simplify_worklist_.pop_front();
+ DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
+ DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
+ DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
+ PruneNode(node);
+ } else if (!coalesce_worklist_.empty()) {
+ // Coalesce.
+ CoalesceOpportunity* opportunity = coalesce_worklist_.top();
+ coalesce_worklist_.pop();
+ if (opportunity->stage == CoalesceStage::kWorklist) {
+ Coalesce(opportunity);
+ }
+ } else if (!freeze_worklist_.empty()) {
+ // Freeze moves and prune a low-degree move-related node.
+ InterferenceNode* node = freeze_worklist_.front();
+ freeze_worklist_.pop_front();
+ if (node->stage == NodeStage::kFreezeWorklist) {
+ DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
+ DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
+ FreezeMoves(node);
+ PruneNode(node);
+ }
+ } else if (!spill_worklist_.empty()) {
+ // We spill the lowest-priority node, because pruning a node earlier
+ // gives it a higher chance of being spilled.
+ InterferenceNode* node = spill_worklist_.top();
+ spill_worklist_.pop();
+ if (node->stage == NodeStage::kSpillWorklist) {
+ DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
+ FreezeMoves(node);
+ PruneNode(node);
+ }
+ } else {
+ // Pruning complete.
+ break;
+ }
+ }
+ DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
+}
+
+void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
+ for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+ if (opportunity->stage == CoalesceStage::kActive) {
+ opportunity->stage = CoalesceStage::kWorklist;
+ coalesce_worklist_.push(opportunity);
+ }
+ }
+}
+
+void ColoringIteration::PruneNode(InterferenceNode* node) {
+ DCHECK_NE(node->stage, NodeStage::kPruned);
+ DCHECK(!node->IsPrecolored());
+ node->stage = NodeStage::kPruned;
+ pruned_nodes_.push(node);
+
+ for (InterferenceNode* adj : node->GetAdjacentNodes()) {
+ DCHECK(!adj->GetInterval()->IsSlowPathSafepoint())
+ << "Nodes should never interfere with synthesized safepoint nodes";
+ DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
+
+ if (adj->IsPrecolored()) {
+ // No effect on pre-colored nodes; they're never pruned.
+ } else {
+ // Remove the interference.
+ bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
+ DCHECK(adj->ContainsInterference(node))
+ << "Missing reflexive interference from non-fixed node";
+ adj->RemoveInterference(node);
+
+ // Handle transitions from high degree to low degree.
+ if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
+ EnableCoalesceOpportunities(adj);
+ for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
+ EnableCoalesceOpportunities(adj_adj);
+ }
+
+ DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
+ if (adj->IsMoveRelated()) {
+ adj->stage = NodeStage::kFreezeWorklist;
+ freeze_worklist_.push_back(adj);
+ } else {
+ adj->stage = NodeStage::kSimplifyWorklist;
+ simplify_worklist_.push_back(adj);
+ }
+ }
+ }
+ }
+}
+
+void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
+ if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
+ DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
+ node->stage = NodeStage::kSimplifyWorklist;
+ simplify_worklist_.push_back(node);
+ }
+}
+
+void ColoringIteration::FreezeMoves(InterferenceNode* node) {
+ for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+ if (opportunity->stage == CoalesceStage::kDefunct) {
+ // Constrained moves should remain constrained, since they will not be considered
+ // during last-chance coalescing.
+ } else {
+ opportunity->stage = CoalesceStage::kInactive;
+ }
+ InterferenceNode* other = opportunity->node_a->GetAlias() == node
+ ? opportunity->node_b->GetAlias()
+ : opportunity->node_a->GetAlias();
+ if (other != node && other->stage == NodeStage::kFreezeWorklist) {
+ DCHECK(IsLowDegreeNode(node, num_regs_));
+ CheckTransitionFromFreezeWorklist(other);
+ }
+ }
+}
+
+bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
+ InterferenceNode* into) {
+ if (!into->IsPrecolored()) {
+ // The uncolored heuristic will cover this case.
+ return false;
+ }
+ if (from->IsPair() || into->IsPair()) {
+ // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
+ // are currently represented as two single fixed nodes in the graph, and `into` is
+ // only one of them. (We may lose the implicit connections to the second one in a merge.)
+ return false;
+ }
+
+ // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
+ // Reasons an adjacent node `adj` can be "ok":
+ // (1) If `adj` is low degree, interference with `into` will not affect its existing
+ // colorable guarantee. (Notice that coalescing cannot increase its degree.)
+ // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
+ // (3) If there's already an interference with `into`, coalescing will not add interferences.
+ for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+ if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
+ // Ok.
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
+ InterferenceNode* into) {
+ if (into->IsPrecolored()) {
+ // The pre-colored heuristic will handle this case.
+ return false;
+ }
+
+ // Arbitrary cap to improve compile time. Tests show that this has negligible affect
+ // on generated code.
+ if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
+ return false;
+ }
+
+ // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
+ // of high degree. (Low degree neighbors can be ignored, because they will eventually be
+ // pruned from the interference graph in the simplify stage.)
+ size_t high_degree_interferences = 0;
+ for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+ if (IsHighDegreeNode(adj, num_regs_)) {
+ high_degree_interferences += from->EdgeWeightWith(adj);
+ }
+ }
+ for (InterferenceNode* adj : into->GetAdjacentNodes()) {
+ if (IsHighDegreeNode(adj, num_regs_)) {
+ if (from->ContainsInterference(adj)) {
+ // We've already counted this adjacent node.
+ // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
+ // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
+ // but remains conservative.)
+ if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
+ high_degree_interferences -= from->EdgeWeightWith(adj);
+ }
+ } else {
+ high_degree_interferences += into->EdgeWeightWith(adj);
+ }
+ }
+ }
+
+ return high_degree_interferences < num_regs_;
+}
+
+void ColoringIteration::Combine(InterferenceNode* from,
+ InterferenceNode* into) {
+ from->SetAlias(into);
+
+ // Add interferences.
+ for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+ bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
+ AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
+ if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
+ // This is a (temporary) transition to a high degree node. Its degree will decrease again
+ // when we prune `from`, but it's best to be consistent about the current worklist.
+ adj->stage = NodeStage::kSpillWorklist;
+ spill_worklist_.push(adj);
+ }
+ }
+
+ // Add coalesce opportunities.
+ for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
+ if (opportunity->stage != CoalesceStage::kDefunct) {
+ into->AddCoalesceOpportunity(opportunity);
+ }
+ }
+ EnableCoalesceOpportunities(from);
+
+ // Prune and update worklists.
+ PruneNode(from);
+ if (IsLowDegreeNode(into, num_regs_)) {
+ // Coalesce(...) takes care of checking for a transition to the simplify worklist.
+ DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
+ } else if (into->stage == NodeStage::kFreezeWorklist) {
+ // This is a transition to a high degree node.
+ into->stage = NodeStage::kSpillWorklist;
+ spill_worklist_.push(into);
+ } else {
+ DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
+ }
+}
+
+void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
+ InterferenceNode* from = opportunity->node_a->GetAlias();
+ InterferenceNode* into = opportunity->node_b->GetAlias();
+ DCHECK_NE(from->stage, NodeStage::kPruned);
+ DCHECK_NE(into->stage, NodeStage::kPruned);
+
+ if (from->IsPrecolored()) {
+ // If we have one pre-colored node, make sure it's the `into` node.
+ std::swap(from, into);
+ }
+
+ if (from == into) {
+ // These nodes have already been coalesced.
+ opportunity->stage = CoalesceStage::kDefunct;
+ CheckTransitionFromFreezeWorklist(from);
+ } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
+ // These nodes interfere.
+ opportunity->stage = CoalesceStage::kDefunct;
+ CheckTransitionFromFreezeWorklist(from);
+ CheckTransitionFromFreezeWorklist(into);
+ } else if (PrecoloredHeuristic(from, into)
+ || UncoloredHeuristic(from, into)) {
+ // We can coalesce these nodes.
+ opportunity->stage = CoalesceStage::kDefunct;
+ Combine(from, into);
+ CheckTransitionFromFreezeWorklist(into);
+ } else {
+ // We cannot coalesce, but we may be able to later.
+ opportunity->stage = CoalesceStage::kActive;
+ }
+}
+
+// Build a mask with a bit set for each register assigned to some
+// interval in `intervals`.
+template <typename Container>
+static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) {
+ std::bitset<kMaxNumRegs> conflict_mask;
+ for (InterferenceNode* adjacent : intervals) {
+ LiveInterval* conflicting = adjacent->GetInterval();
+ if (conflicting->HasRegister()) {
+ conflict_mask.set(conflicting->GetRegister());
+ if (conflicting->HasHighInterval()) {
+ DCHECK(conflicting->GetHighInterval()->HasRegister());
+ conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
+ }
+ } else {
+ DCHECK(!conflicting->HasHighInterval()
+ || !conflicting->GetHighInterval()->HasRegister());
+ }
+ }
+ return conflict_mask;
+}
+
+bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
+ return processing_core_regs
+ ? !codegen_->IsCoreCalleeSaveRegister(reg)
+ : !codegen_->IsCoreCalleeSaveRegister(reg);
+}
+
+static bool RegisterIsAligned(size_t reg) {
+ return reg % 2 == 0;
+}
+
+static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
+ // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
+ // Note that CTZ is undefined if all bits are 0, so we special-case it.
+ return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
+}
+
+bool ColoringIteration::ColorInterferenceGraph() {
+ DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
+ ArenaVector<LiveInterval*> colored_intervals(
+ allocator_->Adapter(kArenaAllocRegisterAllocator));
+ bool successful = true;
+
+ while (!pruned_nodes_.empty()) {
+ InterferenceNode* node = pruned_nodes_.top();
+ pruned_nodes_.pop();
+ LiveInterval* interval = node->GetInterval();
+ size_t reg = 0;
+
+ InterferenceNode* alias = node->GetAlias();
+ if (alias != node) {
+ // This node was coalesced with another.
+ LiveInterval* alias_interval = alias->GetInterval();
+ if (alias_interval->HasRegister()) {
+ reg = alias_interval->GetRegister();
+ DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
+ << "This node conflicts with the register it was coalesced with";
+ } else {
+ DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
+ << "Move coalescing was not conservative, causing a node to be coalesced "
+ << "with another node that could not be colored";
+ if (interval->RequiresRegister()) {
+ successful = false;
+ }
+ }
+ } else {
+ // Search for free register(s).
+ std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
+ if (interval->HasHighInterval()) {
+ // Note that the graph coloring allocator assumes that pair intervals are aligned here,
+ // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
+ // change the alignment requirements here, we will have to update the algorithm (e.g.,
+ // be more conservative about the weight of edges adjacent to pair nodes.)
+ while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
+ reg += 2;
+ }
+
+ // Try to use a caller-save register first.
+ for (size_t i = 0; i < num_regs_ - 1; i += 2) {
+ bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_);
+ bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
+ if (!conflict_mask[i] && !conflict_mask[i + 1]) {
+ if (low_caller_save && high_caller_save) {
+ reg = i;
+ break;
+ } else if (low_caller_save || high_caller_save) {
+ reg = i;
+ // Keep looking to try to get both parts in caller-save registers.
+ }
+ }
+ }
+ } else {
+ // Not a pair interval.
+ reg = FindFirstZeroInConflictMask(conflict_mask);
+
+ // Try to use caller-save registers first.
+ for (size_t i = 0; i < num_regs_; ++i) {
+ if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
+ reg = i;
+ break;
+ }
+ }
+ }
+
+ // Last-chance coalescing.
+ for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+ if (opportunity->stage == CoalesceStage::kDefunct) {
+ continue;
+ }
+ LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
+ ? opportunity->node_b->GetAlias()->GetInterval()
+ : opportunity->node_a->GetAlias()->GetInterval();
+ if (other_interval->HasRegister()) {
+ size_t coalesce_register = other_interval->GetRegister();
+ if (interval->HasHighInterval()) {
+ if (!conflict_mask[coalesce_register] &&
+ !conflict_mask[coalesce_register + 1] &&
+ RegisterIsAligned(coalesce_register)) {
+ reg = coalesce_register;
+ break;
+ }
+ } else if (!conflict_mask[coalesce_register]) {
+ reg = coalesce_register;
+ break;
+ }
+ }
+ }
+ }
+
+ if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
+ // Assign register.
+ DCHECK(!interval->HasRegister());
+ interval->SetRegister(reg);
+ colored_intervals.push_back(interval);
+ if (interval->HasHighInterval()) {
+ DCHECK(!interval->GetHighInterval()->HasRegister());
+ interval->GetHighInterval()->SetRegister(reg + 1);
+ colored_intervals.push_back(interval->GetHighInterval());
+ }
+ } else if (interval->RequiresRegister()) {
+ // The interference graph is too dense to color. Make it sparser by
+ // splitting this live interval.
+ successful = false;
+ register_allocator_->SplitAtRegisterUses(interval);
+ // We continue coloring, because there may be additional intervals that cannot
+ // be colored, and that we should split.
+ } else {
+ // Spill.
+ node->SetNeedsSpillSlot();
+ }
+ }
+
+ // If unsuccessful, reset all register assignments.
+ if (!successful) {
+ for (LiveInterval* interval : colored_intervals) {
+ interval->ClearRegister();
+ }
+ }
+
+ return successful;
+}
+
+size_t RegisterAllocatorGraphColor::ComputeMaxSafepointLiveRegisters(
+ const ArenaVector<InterferenceNode*>& safepoints) {
+ size_t max_safepoint_live_regs = 0;
+ for (InterferenceNode* safepoint : safepoints) {
+ DCHECK(safepoint->GetInterval()->IsSlowPathSafepoint());
+ std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(safepoint->GetAdjacentNodes());
+ size_t live_regs = conflict_mask.count();
+ max_safepoint_live_regs = std::max(max_safepoint_live_regs, live_regs);
+ }
+ return max_safepoint_live_regs;
+}
+
+void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) {
+ // The register allocation resolver will organize the stack based on value type,
+ // so we assign stack slots for each value type separately.
+ ArenaVector<LiveInterval*> double_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+ ArenaVector<LiveInterval*> long_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+ ArenaVector<LiveInterval*> float_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+ ArenaVector<LiveInterval*> int_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ // The set of parent intervals already handled.
+ ArenaSet<LiveInterval*> seen(allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ // Find nodes that need spill slots.
+ for (InterferenceNode* node : nodes) {
+ if (!node->NeedsSpillSlot()) {
+ continue;
+ }
+
+ LiveInterval* parent = node->GetInterval()->GetParent();
+ if (seen.find(parent) != seen.end()) {
+ // We've already handled this interval.
+ // This can happen if multiple siblings of the same interval request a stack slot.
+ continue;
+ }
+ seen.insert(parent);
+
+ HInstruction* defined_by = parent->GetDefinedBy();
+ if (parent->HasSpillSlot()) {
+ // We already have a spill slot for this value that we can reuse.
+ } else if (defined_by->IsParameterValue()) {
+ // Parameters already have a stack slot.
+ parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+ } else if (defined_by->IsCurrentMethod()) {
+ // The current method is always at stack slot 0.
+ parent->SetSpillSlot(0);
+ } else if (defined_by->IsConstant()) {
+ // Constants don't need a spill slot.
+ } else {
+ // We need to find a spill slot for this interval. Place it in the correct
+ // worklist to be processed later.
+ switch (node->GetInterval()->GetType()) {
+ case Primitive::kPrimDouble:
+ double_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimLong:
+ long_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimFloat:
+ float_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimShort:
+ int_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
+ UNREACHABLE();
+ }
+ }
+ }
+
+ // Color spill slots for each value type.
+ ColorSpillSlots(&double_intervals, &num_double_spill_slots_);
+ ColorSpillSlots(&long_intervals, &num_long_spill_slots_);
+ ColorSpillSlots(&float_intervals, &num_float_spill_slots_);
+ ColorSpillSlots(&int_intervals, &num_int_spill_slots_);
+}
+
+void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* intervals,
+ size_t* num_stack_slots_used) {
+ // We cannot use the original interference graph here because spill slots are assigned to
+ // all of the siblings of an interval, whereas an interference node represents only a single
+ // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
+ // by position, and assigning the lowest spill slot available when we encounter an interval
+ // beginning. We ignore lifetime holes for simplicity.
+ ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
+ allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) {
+ LiveInterval* parent_interval = *it;
+ DCHECK(parent_interval->IsParent());
+ DCHECK(!parent_interval->HasSpillSlot());
+ size_t start = parent_interval->GetStart();
+ size_t end = parent_interval->GetLastSibling()->GetEnd();
+ DCHECK_LT(start, end);
+ interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
+ interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
+ }
+
+ // Sort by position.
+ // We explicitly ignore the third entry of each tuple (the interval pointer) in order
+ // to maintain determinism.
+ std::sort(interval_endpoints.begin(), interval_endpoints.end(),
+ [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
+ const std::tuple<size_t, bool, LiveInterval*>& rhs) {
+ return std::tie(std::get<0>(lhs), std::get<1>(lhs))
+ < std::tie(std::get<0>(rhs), std::get<1>(rhs));
+ });
+
+ ArenaBitVector taken(allocator_, 0, true);
+ for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
+ // Extract information from the current tuple.
+ LiveInterval* parent_interval;
+ bool is_interval_beginning;
+ size_t position;
+ std::tie(position, is_interval_beginning, parent_interval) = *it;
+
+ bool needs_two_slots = parent_interval->NeedsTwoSpillSlots();
+
+ if (is_interval_beginning) {
+ DCHECK(!parent_interval->HasSpillSlot());
+ DCHECK_EQ(position, parent_interval->GetStart());
+
+ // Find a free stack slot.
+ size_t slot = 0;
+ for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) {
+ // Skip taken slots.
+ }
+ parent_interval->SetSpillSlot(slot);
+
+ *num_stack_slots_used = std::max(*num_stack_slots_used,
+ needs_two_slots ? slot + 1 : slot + 2);
+ if (needs_two_slots && *num_stack_slots_used % 2 != 0) {
+ // The parallel move resolver requires that there be an even number of spill slots
+ // allocated for pair value types.
+ ++(*num_stack_slots_used);
+ }
+
+ taken.SetBit(slot);
+ if (needs_two_slots) {
+ taken.SetBit(slot + 1);
+ }
+ } else {
+ DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
+ DCHECK(parent_interval->HasSpillSlot());
+
+ // Free up the stack slot used by this interval.
+ size_t slot = parent_interval->GetSpillSlot();
+ DCHECK(taken.IsBitSet(slot));
+ DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1));
+ taken.ClearBit(slot);
+ if (needs_two_slots) {
+ taken.ClearBit(slot + 1);
+ }
+ }
+ }
+ DCHECK_EQ(taken.NumSetBits(), 0u);
+}
+
+} // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
new file mode 100644
index 0000000000..ed12561d2c
--- /dev/null
+++ b/compiler/optimizing/register_allocator_graph_color.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
+
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "base/macros.h"
+#include "primitive.h"
+#include "register_allocator.h"
+
+namespace art {
+
+class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
+class Location;
+class SsaLivenessAnalysis;
+class InterferenceNode;
+struct CoalesceOpportunity;
+enum class CoalesceKind;
+
+/**
+ * A graph coloring register allocator.
+ *
+ * The algorithm proceeds as follows:
+ * (1) Build an interference graph, where nodes represent live intervals, and edges represent
+ * interferences between two intervals. Coloring this graph with k colors is isomorphic to
+ * finding a valid register assignment with k registers.
+ * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are
+ * guaranteed a color. (No matter how we color their adjacent nodes, we can give them a
+ * different color.) As we prune nodes from the graph, more nodes may drop below degree k,
+ * enabling further pruning. The key is to maintain the pruning order in a stack, so that we
+ * can color the nodes in the reverse order.
+ * When there are no more nodes with degree less than k, we start pruning alternate nodes based
+ * on heuristics. Since these nodes are not guaranteed a color, we are careful to
+ * prioritize nodes that require a register. We also prioritize short intervals, because
+ * short intervals cannot be split very much if coloring fails (see below). "Prioritizing"
+ * a node amounts to pruning it later, since it will have fewer interferences if we prune other
+ * nodes first.
+ * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign
+ * a node a color, we do one of two things:
+ * - If the node requires a register, we consider the current coloring attempt a failure.
+ * However, we split the node's live interval in order to make the interference graph
+ * sparser, so that future coloring attempts may succeed.
+ * - If the node does not require a register, we simply assign it a location on the stack.
+ *
+ * If iterative move coalescing is enabled, the algorithm also attempts to conservatively
+ * combine nodes in the graph that would prefer to have the same color. (For example, the output
+ * of a phi instruction would prefer to have the same register as at least one of its inputs.)
+ * There are several additional steps involved with this:
+ * - We look for coalesce opportunities by examining each live interval, a step similar to that
+ * used by linear scan when looking for register hints.
+ * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist
+ * of low degree nodes that have associated coalesce opportunities. Only when we run out of
+ * coalesce opportunities do we start pruning coalesce-associated nodes.
+ * - When pruning a node, if any nodes transition from high degree to low degree, we add
+ * associated coalesce opportunities to the worklist, since these opportunities may now succeed.
+ * - Whether two nodes can be combined is decided by two different heuristics--one used when
+ * coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node.
+ * It is vital that we only combine two nodes if the node that remains is guaranteed to receive
+ * a color. This is because additionally spilling is more costly than failing to coalesce.
+ * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around
+ * to be used as last-chance register hints when coloring. If nothing else, we try to use
+ * caller-save registers before callee-save registers.
+ *
+ * A good reference for graph coloring register allocation is
+ * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition).
+ */
+class RegisterAllocatorGraphColor : public RegisterAllocator {
+ public:
+ RegisterAllocatorGraphColor(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis,
+ bool iterative_move_coalescing = true);
+ ~RegisterAllocatorGraphColor() OVERRIDE {}
+
+ void AllocateRegisters() OVERRIDE;
+
+ bool Validate(bool log_fatal_on_failure);
+
+ private:
+ // Collect all intervals and prepare for register allocation.
+ void ProcessInstructions();
+ void ProcessInstruction(HInstruction* instruction);
+
+ // If any inputs require specific registers, block those registers
+ // at the position of this instruction.
+ void CheckForFixedInputs(HInstruction* instruction);
+
+ // If the output of an instruction requires a specific register, split
+ // the interval and assign the register to the first part.
+ void CheckForFixedOutput(HInstruction* instruction);
+
+ // Add all applicable safepoints to a live interval.
+ // Currently depends on instruction processing order.
+ void AddSafepointsFor(HInstruction* instruction);
+
+ // Collect all live intervals associated with the temporary locations
+ // needed by an instruction.
+ void CheckForTempLiveIntervals(HInstruction* instruction);
+
+ // If a safe point is needed, add a synthesized interval to later record
+ // the number of live registers at this point.
+ void CheckForSafepoint(HInstruction* instruction);
+
+ // Split an interval, but only if `position` is inside of `interval`.
+ // Return either the new interval, or the original interval if not split.
+ static LiveInterval* TrySplit(LiveInterval* interval, size_t position);
+
+ // To ensure every graph can be colored, split live intervals
+ // at their register defs and uses. This creates short intervals with low
+ // degree in the interference graph, which are prioritized during graph
+ // coloring.
+ void SplitAtRegisterUses(LiveInterval* interval);
+
+ // If the given instruction is a catch phi, give it a spill slot.
+ void AllocateSpillSlotForCatchPhi(HInstruction* instruction);
+
+ // Ensure that the given register cannot be allocated for a given range.
+ void BlockRegister(Location location, size_t start, size_t end);
+ void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
+
+ bool IsCallerSave(size_t reg, bool processing_core_regs);
+
+ // Return the maximum number of registers live at safepoints,
+ // based on the outgoing interference edges of safepoint nodes.
+ size_t ComputeMaxSafepointLiveRegisters(const ArenaVector<InterferenceNode*>& safepoints);
+
+ // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not
+ // assigned the same stack slot.
+ void ColorSpillSlots(ArenaVector<LiveInterval*>* nodes,
+ size_t* num_stack_slots_used);
+
+ // Provide stack slots to nodes that need them.
+ void AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes);
+
+ // Whether iterative move coalescing should be performed. Iterative move coalescing
+ // improves code quality, but increases compile time.
+ const bool iterative_move_coalescing_;
+
+ // Live intervals, split by kind (core and floating point).
+ // These should not contain high intervals, as those are represented by
+ // the corresponding low interval throughout register allocation.
+ ArenaVector<LiveInterval*> core_intervals_;
+ ArenaVector<LiveInterval*> fp_intervals_;
+
+ // Intervals for temporaries, saved for special handling in the resolution phase.
+ ArenaVector<LiveInterval*> temp_intervals_;
+
+ // Safepoints, saved for special handling while processing instructions.
+ ArenaVector<HInstruction*> safepoints_;
+
+ // Interference nodes representing specific registers. These are "pre-colored" nodes
+ // in the interference graph.
+ ArenaVector<InterferenceNode*> physical_core_nodes_;
+ ArenaVector<InterferenceNode*> physical_fp_nodes_;
+
+ // Allocated stack slot counters.
+ size_t num_int_spill_slots_;
+ size_t num_double_spill_slots_;
+ size_t num_float_spill_slots_;
+ size_t num_long_spill_slots_;
+ size_t catch_phi_spill_slot_counter_;
+
+ // Number of stack slots needed for the pointer to the current method.
+ // This is 1 for 32-bit architectures, and 2 for 64-bit architectures.
+ const size_t reserved_art_method_slots_;
+
+ // Number of stack slots needed for outgoing arguments.
+ const size_t reserved_out_slots_;
+
+ // The number of globally blocked core and floating point registers, such as the stack pointer.
+ size_t number_of_globally_blocked_core_regs_;
+ size_t number_of_globally_blocked_fp_regs_;
+
+ // The maximum number of registers live at safe points. Needed by the code generator.
+ size_t max_safepoint_live_core_regs_;
+ size_t max_safepoint_live_fp_regs_;
+
+ friend class ColoringIteration;
+
+ DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
new file mode 100644
index 0000000000..768ed2d26a
--- /dev/null
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -0,0 +1,1225 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocator_linear_scan.h"
+
+#include <iostream>
+#include <sstream>
+
+#include "base/bit_vector-inl.h"
+#include "base/enums.h"
+#include "code_generator.h"
+#include "register_allocation_resolver.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+static constexpr size_t kMaxLifetimePosition = -1;
+static constexpr size_t kDefaultNumberOfSpillSlots = 4;
+
+// For simplicity, we implement register pairs as (reg, reg + 1).
+// Note that this is a requirement for double registers on ARM, since we
+// allocate SRegister.
+static int GetHighForLowRegister(int reg) { return reg + 1; }
+static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
+static bool IsLowOfUnalignedPairInterval(LiveInterval* low) {
+ return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
+}
+
+RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& liveness)
+ : RegisterAllocator(allocator, codegen, liveness),
+ unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ unhandled_(nullptr),
+ handled_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ active_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ catch_phi_spill_slots_(0),
+ safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ processing_core_registers_(false),
+ number_of_registers_(-1),
+ registers_array_(nullptr),
+ blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
+ blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
+ reserved_out_slots_(0),
+ maximum_number_of_live_core_registers_(0),
+ maximum_number_of_live_fp_registers_(0) {
+ temp_intervals_.reserve(4);
+ int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+ long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+ float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+ double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+
+ codegen->SetupBlockedRegisters();
+ physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
+ physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
+ // Always reserve for the current method and the graph's max out registers.
+ // TODO: compute it instead.
+ // ArtMethod* takes 2 vregs for 64 bits.
+ size_t ptr_size = static_cast<size_t>(InstructionSetPointerSize(codegen->GetInstructionSet()));
+ reserved_out_slots_ = ptr_size / kVRegSize + codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
+}
+
+static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
+ if (interval == nullptr) return false;
+ bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
+ && (interval->GetType() != Primitive::kPrimFloat);
+ return processing_core_registers == is_core_register;
+}
+
+void RegisterAllocatorLinearScan::AllocateRegisters() {
+ AllocateRegistersInternal();
+ RegisterAllocationResolver(allocator_, codegen_, liveness_)
+ .Resolve(maximum_number_of_live_core_registers_,
+ maximum_number_of_live_fp_registers_,
+ reserved_out_slots_,
+ int_spill_slots_.size(),
+ long_spill_slots_.size(),
+ float_spill_slots_.size(),
+ double_spill_slots_.size(),
+ catch_phi_spill_slots_,
+ temp_intervals_);
+
+ if (kIsDebugBuild) {
+ processing_core_registers_ = true;
+ ValidateInternal(true);
+ processing_core_registers_ = false;
+ ValidateInternal(true);
+ // Check that the linear order is still correct with regards to lifetime positions.
+ // Since only parallel moves have been inserted during the register allocation,
+ // these checks are mostly for making sure these moves have been added correctly.
+ size_t current_liveness = 0;
+ for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+ HInstruction* instruction = inst_it.Current();
+ DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
+ current_liveness = instruction->GetLifetimePosition();
+ }
+ for (HInstructionIterator inst_it(block->GetInstructions());
+ !inst_it.Done();
+ inst_it.Advance()) {
+ HInstruction* instruction = inst_it.Current();
+ DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName();
+ current_liveness = instruction->GetLifetimePosition();
+ }
+ }
+ }
+}
+
+void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start, size_t end) {
+ int reg = location.reg();
+ DCHECK(location.IsRegister() || location.IsFpuRegister());
+ LiveInterval* interval = location.IsRegister()
+ ? physical_core_register_intervals_[reg]
+ : physical_fp_register_intervals_[reg];
+ Primitive::Type type = location.IsRegister()
+ ? Primitive::kPrimInt
+ : Primitive::kPrimFloat;
+ if (interval == nullptr) {
+ interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
+ if (location.IsRegister()) {
+ physical_core_register_intervals_[reg] = interval;
+ } else {
+ physical_fp_register_intervals_[reg] = interval;
+ }
+ }
+ DCHECK(interval->GetRegister() == reg);
+ interval->AddRange(start, end);
+}
+
+void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+ for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+ if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
+ BlockRegister(Location::RegisterLocation(i), start, end);
+ }
+ }
+ for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+ if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
+ BlockRegister(Location::FpuRegisterLocation(i), start, end);
+ }
+ }
+}
+
+void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
+ // Iterate post-order, to ensure the list is sorted, and the last added interval
+ // is the one with the lowest start position.
+ for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
+ back_it.Advance()) {
+ ProcessInstruction(back_it.Current());
+ }
+ for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+ ProcessInstruction(inst_it.Current());
+ }
+
+ if (block->IsCatchBlock() ||
+ (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+ // By blocking all registers at the top of each catch block or irreducible loop, we force
+ // intervals belonging to the live-in set of the catch/header block to be spilled.
+ // TODO(ngeoffray): Phis in this block could be allocated in register.
+ size_t position = block->GetLifetimeStart();
+ BlockRegisters(position, position + 1);
+ }
+ }
+
+ number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
+ registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+ kArenaAllocRegisterAllocator);
+ processing_core_registers_ = true;
+ unhandled_ = &unhandled_core_intervals_;
+ for (LiveInterval* fixed : physical_core_register_intervals_) {
+ if (fixed != nullptr) {
+ // Fixed interval is added to inactive_ instead of unhandled_.
+ // It's also the only type of inactive interval whose start position
+ // can be after the current interval during linear scan.
+ // Fixed interval is never split and never moves to unhandled_.
+ inactive_.push_back(fixed);
+ }
+ }
+ LinearScan();
+
+ inactive_.clear();
+ active_.clear();
+ handled_.clear();
+
+ number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
+ registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+ kArenaAllocRegisterAllocator);
+ processing_core_registers_ = false;
+ unhandled_ = &unhandled_fp_intervals_;
+ for (LiveInterval* fixed : physical_fp_register_intervals_) {
+ if (fixed != nullptr) {
+ // Fixed interval is added to inactive_ instead of unhandled_.
+ // It's also the only type of inactive interval whose start position
+ // can be after the current interval during linear scan.
+ // Fixed interval is never split and never moves to unhandled_.
+ inactive_.push_back(fixed);
+ }
+ }
+ LinearScan();
+}
+
+void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ size_t position = instruction->GetLifetimePosition();
+
+ if (locations == nullptr) return;
+
+ // Create synthesized intervals for temporaries.
+ for (size_t i = 0; i < locations->GetTempCount(); ++i) {
+ Location temp = locations->GetTemp(i);
+ if (temp.IsRegister() || temp.IsFpuRegister()) {
+ BlockRegister(temp, position, position + 1);
+ // Ensure that an explicit temporary register is marked as being allocated.
+ codegen_->AddAllocatedRegister(temp);
+ } else {
+ DCHECK(temp.IsUnallocated());
+ switch (temp.GetPolicy()) {
+ case Location::kRequiresRegister: {
+ LiveInterval* interval =
+ LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
+ temp_intervals_.push_back(interval);
+ interval->AddTempUse(instruction, i);
+ unhandled_core_intervals_.push_back(interval);
+ break;
+ }
+
+ case Location::kRequiresFpuRegister: {
+ LiveInterval* interval =
+ LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
+ temp_intervals_.push_back(interval);
+ interval->AddTempUse(instruction, i);
+ if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+ interval->AddHighInterval(/* is_temp */ true);
+ LiveInterval* high = interval->GetHighInterval();
+ temp_intervals_.push_back(high);
+ unhandled_fp_intervals_.push_back(high);
+ }
+ unhandled_fp_intervals_.push_back(interval);
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected policy for temporary location "
+ << temp.GetPolicy();
+ }
+ }
+ }
+
+ bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
+ && (instruction->GetType() != Primitive::kPrimFloat);
+
+ if (locations->NeedsSafepoint()) {
+ if (codegen_->IsLeafMethod()) {
+ // TODO: We do this here because we do not want the suspend check to artificially
+ // create live registers. We should find another place, but this is currently the
+ // simplest.
+ DCHECK(instruction->IsSuspendCheckEntry());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+ safepoints_.push_back(instruction);
+ if (locations->OnlyCallsOnSlowPath()) {
+ // We add a synthesized range at this position to record the live registers
+ // at this position. Ideally, we could just update the safepoints when locations
+ // are updated, but we currently need to know the full stack size before updating
+ // locations (because of parameters and the fact that we don't have a frame pointer).
+ // And knowing the full stack size requires to know the maximum number of live
+ // registers at calls in slow paths.
+ // By adding the following interval in the algorithm, we can compute this
+ // maximum before updating locations.
+ LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
+ interval->AddRange(position, position + 1);
+ AddSorted(&unhandled_core_intervals_, interval);
+ AddSorted(&unhandled_fp_intervals_, interval);
+ }
+ }
+
+ if (locations->WillCall()) {
+ BlockRegisters(position, position + 1, /* caller_save_only */ true);
+ }
+
+ for (size_t i = 0; i < locations->GetInputCount(); ++i) {
+ Location input = locations->InAt(i);
+ if (input.IsRegister() || input.IsFpuRegister()) {
+ BlockRegister(input, position, position + 1);
+ } else if (input.IsPair()) {
+ BlockRegister(input.ToLow(), position, position + 1);
+ BlockRegister(input.ToHigh(), position, position + 1);
+ }
+ }
+
+ LiveInterval* current = instruction->GetLiveInterval();
+ if (current == nullptr) return;
+
+ ArenaVector<LiveInterval*>& unhandled = core_register
+ ? unhandled_core_intervals_
+ : unhandled_fp_intervals_;
+
+ DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back()));
+
+ if (codegen_->NeedsTwoRegisters(current->GetType())) {
+ current->AddHighInterval();
+ }
+
+ for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
+ HInstruction* safepoint = safepoints_[safepoint_index - 1u];
+ size_t safepoint_position = safepoint->GetLifetimePosition();
+
+ // Test that safepoints are ordered in the optimal way.
+ DCHECK(safepoint_index == safepoints_.size() ||
+ safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
+
+ if (safepoint_position == current->GetStart()) {
+ // The safepoint is for this instruction, so the location of the instruction
+ // does not need to be saved.
+ DCHECK_EQ(safepoint_index, safepoints_.size());
+ DCHECK_EQ(safepoint, instruction);
+ continue;
+ } else if (current->IsDeadAt(safepoint_position)) {
+ break;
+ } else if (!current->Covers(safepoint_position)) {
+ // Hole in the interval.
+ continue;
+ }
+ current->AddSafepoint(safepoint);
+ }
+ current->ResetSearchCache();
+
+ // Some instructions define their output in fixed register/stack slot. We need
+ // to ensure we know these locations before doing register allocation. For a
+ // given register, we create an interval that covers these locations. The register
+ // will be unavailable at these locations when trying to allocate one for an
+ // interval.
+ //
+ // The backwards walking ensures the ranges are ordered on increasing start positions.
+ Location output = locations->Out();
+ if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) {
+ Location first = locations->InAt(0);
+ if (first.IsRegister() || first.IsFpuRegister()) {
+ current->SetFrom(position + 1);
+ current->SetRegister(first.reg());
+ } else if (first.IsPair()) {
+ current->SetFrom(position + 1);
+ current->SetRegister(first.low());
+ LiveInterval* high = current->GetHighInterval();
+ high->SetRegister(first.high());
+ high->SetFrom(position + 1);
+ }
+ } else if (output.IsRegister() || output.IsFpuRegister()) {
+ // Shift the interval's start by one to account for the blocked register.
+ current->SetFrom(position + 1);
+ current->SetRegister(output.reg());
+ BlockRegister(output, position, position + 1);
+ } else if (output.IsPair()) {
+ current->SetFrom(position + 1);
+ current->SetRegister(output.low());
+ LiveInterval* high = current->GetHighInterval();
+ high->SetRegister(output.high());
+ high->SetFrom(position + 1);
+ BlockRegister(output.ToLow(), position, position + 1);
+ BlockRegister(output.ToHigh(), position, position + 1);
+ } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
+ current->SetSpillSlot(output.GetStackIndex());
+ } else {
+ DCHECK(output.IsUnallocated() || output.IsConstant());
+ }
+
+ if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+ AllocateSpillSlotForCatchPhi(instruction->AsPhi());
+ }
+
+ // If needed, add interval to the list of unhandled intervals.
+ if (current->HasSpillSlot() || instruction->IsConstant()) {
+ // Split just before first register use.
+ size_t first_register_use = current->FirstRegisterUse();
+ if (first_register_use != kNoLifetime) {
+ LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
+ // Don't add directly to `unhandled`, it needs to be sorted and the start
+ // of this new interval might be after intervals already in the list.
+ AddSorted(&unhandled, split);
+ } else {
+ // Nothing to do, we won't allocate a register for this value.
+ }
+ } else {
+ // Don't add directly to `unhandled`, temp or safepoint intervals
+ // for this instruction may have been added, and those can be
+ // processed first.
+ AddSorted(&unhandled, current);
+ }
+}
+
+class AllRangesIterator : public ValueObject {
+ public:
+ explicit AllRangesIterator(LiveInterval* interval)
+ : current_interval_(interval),
+ current_range_(interval->GetFirstRange()) {}
+
+ bool Done() const { return current_interval_ == nullptr; }
+ LiveRange* CurrentRange() const { return current_range_; }
+ LiveInterval* CurrentInterval() const { return current_interval_; }
+
+ void Advance() {
+ current_range_ = current_range_->GetNext();
+ if (current_range_ == nullptr) {
+ current_interval_ = current_interval_->GetNextSibling();
+ if (current_interval_ != nullptr) {
+ current_range_ = current_interval_->GetFirstRange();
+ }
+ }
+ }
+
+ private:
+ LiveInterval* current_interval_;
+ LiveRange* current_range_;
+
+ DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
+};
+
+bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) const {
+ // To simplify unit testing, we eagerly create the array of intervals, and
+ // call the helper method.
+ ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
+ for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
+ HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+ if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
+ intervals.push_back(instruction->GetLiveInterval());
+ }
+ }
+
+ const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
+ ? &physical_core_register_intervals_
+ : &physical_fp_register_intervals_;
+ for (LiveInterval* fixed : *physical_register_intervals) {
+ if (fixed != nullptr) {
+ intervals.push_back(fixed);
+ }
+ }
+
+ for (LiveInterval* temp : temp_intervals_) {
+ if (ShouldProcess(processing_core_registers_, temp)) {
+ intervals.push_back(temp);
+ }
+ }
+
+ return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
+ allocator_, processing_core_registers_, log_fatal_on_failure);
+}
+
+void RegisterAllocatorLinearScan::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
+ interval->Dump(stream);
+ stream << ": ";
+ if (interval->HasRegister()) {
+ if (interval->IsFloatingPoint()) {
+ codegen_->DumpFloatingPointRegister(stream, interval->GetRegister());
+ } else {
+ codegen_->DumpCoreRegister(stream, interval->GetRegister());
+ }
+ } else {
+ stream << "spilled";
+ }
+ stream << std::endl;
+}
+
+void RegisterAllocatorLinearScan::DumpAllIntervals(std::ostream& stream) const {
+ stream << "inactive: " << std::endl;
+ for (LiveInterval* inactive_interval : inactive_) {
+ DumpInterval(stream, inactive_interval);
+ }
+ stream << "active: " << std::endl;
+ for (LiveInterval* active_interval : active_) {
+ DumpInterval(stream, active_interval);
+ }
+ stream << "unhandled: " << std::endl;
+ auto unhandled = (unhandled_ != nullptr) ?
+ unhandled_ : &unhandled_core_intervals_;
+ for (LiveInterval* unhandled_interval : *unhandled) {
+ DumpInterval(stream, unhandled_interval);
+ }
+ stream << "handled: " << std::endl;
+ for (LiveInterval* handled_interval : handled_) {
+ DumpInterval(stream, handled_interval);
+ }
+}
+
+// By the book implementation of a linear scan register allocator.
+void RegisterAllocatorLinearScan::LinearScan() {
+ while (!unhandled_->empty()) {
+ // (1) Remove interval with the lowest start position from unhandled.
+ LiveInterval* current = unhandled_->back();
+ unhandled_->pop_back();
+
+ // Make sure the interval is an expected state.
+ DCHECK(!current->IsFixed() && !current->HasSpillSlot());
+ // Make sure we are going in the right order.
+ DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart());
+ // Make sure a low interval is always with a high.
+ DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval());
+ // Make sure a high interval is always with a low.
+ DCHECK(current->IsLowInterval() ||
+ unhandled_->empty() ||
+ !unhandled_->back()->IsHighInterval());
+
+ size_t position = current->GetStart();
+
+ // Remember the inactive_ size here since the ones moved to inactive_ from
+ // active_ below shouldn't need to be re-checked.
+ size_t inactive_intervals_to_handle = inactive_.size();
+
+ // (2) Remove currently active intervals that are dead at this position.
+ // Move active intervals that have a lifetime hole at this position
+ // to inactive.
+ auto active_kept_end = std::remove_if(
+ active_.begin(),
+ active_.end(),
+ [this, position](LiveInterval* interval) {
+ if (interval->IsDeadAt(position)) {
+ handled_.push_back(interval);
+ return true;
+ } else if (!interval->Covers(position)) {
+ inactive_.push_back(interval);
+ return true;
+ } else {
+ return false; // Keep this interval.
+ }
+ });
+ active_.erase(active_kept_end, active_.end());
+
+ // (3) Remove currently inactive intervals that are dead at this position.
+ // Move inactive intervals that cover this position to active.
+ auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
+ auto inactive_kept_end = std::remove_if(
+ inactive_.begin(),
+ inactive_to_handle_end,
+ [this, position](LiveInterval* interval) {
+ DCHECK(interval->GetStart() < position || interval->IsFixed());
+ if (interval->IsDeadAt(position)) {
+ handled_.push_back(interval);
+ return true;
+ } else if (interval->Covers(position)) {
+ active_.push_back(interval);
+ return true;
+ } else {
+ return false; // Keep this interval.
+ }
+ });
+ inactive_.erase(inactive_kept_end, inactive_to_handle_end);
+
+ if (current->IsSlowPathSafepoint()) {
+ // Synthesized interval to record the maximum number of live registers
+ // at safepoints. No need to allocate a register for it.
+ if (processing_core_registers_) {
+ maximum_number_of_live_core_registers_ =
+ std::max(maximum_number_of_live_core_registers_, active_.size());
+ } else {
+ maximum_number_of_live_fp_registers_ =
+ std::max(maximum_number_of_live_fp_registers_, active_.size());
+ }
+ DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
+ continue;
+ }
+
+ if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
+ DCHECK(!current->HasRegister());
+ // Allocating the low part was unsucessful. The splitted interval for the high part
+ // will be handled next (it is in the `unhandled_` list).
+ continue;
+ }
+
+ // (4) Try to find an available register.
+ bool success = TryAllocateFreeReg(current);
+
+ // (5) If no register could be found, we need to spill.
+ if (!success) {
+ success = AllocateBlockedReg(current);
+ }
+
+ // (6) If the interval had a register allocated, add it to the list of active
+ // intervals.
+ if (success) {
+ codegen_->AddAllocatedRegister(processing_core_registers_
+ ? Location::RegisterLocation(current->GetRegister())
+ : Location::FpuRegisterLocation(current->GetRegister()));
+ active_.push_back(current);
+ if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
+ current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
+ }
+ }
+ }
+}
+
+static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) {
+ DCHECK(!interval->IsHighInterval());
+ // Note that the same instruction may occur multiple times in the input list,
+ // so `free_until` may have changed already.
+ // Since `position` is not the current scan position, we need to use CoversSlow.
+ if (interval->IsDeadAt(position)) {
+ // Set the register to be free. Note that inactive intervals might later
+ // update this.
+ free_until[interval->GetRegister()] = kMaxLifetimePosition;
+ if (interval->HasHighInterval()) {
+ DCHECK(interval->GetHighInterval()->IsDeadAt(position));
+ free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition;
+ }
+ } else if (!interval->CoversSlow(position)) {
+ // The interval becomes inactive at `defined_by`. We make its register
+ // available only until the next use strictly after `defined_by`.
+ free_until[interval->GetRegister()] = interval->FirstUseAfter(position);
+ if (interval->HasHighInterval()) {
+ DCHECK(!interval->GetHighInterval()->CoversSlow(position));
+ free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()];
+ }
+ }
+}
+
+// Find a free register. If multiple are found, pick the register that
+// is free the longest.
+bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) {
+ size_t* free_until = registers_array_;
+
+ // First set all registers to be free.
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ free_until[i] = kMaxLifetimePosition;
+ }
+
+ // For each active interval, set its register to not free.
+ for (LiveInterval* interval : active_) {
+ DCHECK(interval->HasRegister());
+ free_until[interval->GetRegister()] = 0;
+ }
+
+ // An interval that starts an instruction (that is, it is not split), may
+ // re-use the registers used by the inputs of that instruciton, based on the
+ // location summary.
+ HInstruction* defined_by = current->GetDefinedBy();
+ if (defined_by != nullptr && !current->IsSplit()) {
+ LocationSummary* locations = defined_by->GetLocations();
+ if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
+ HInputsRef inputs = defined_by->GetInputs();
+ for (size_t i = 0; i < inputs.size(); ++i) {
+ // Take the last interval of the input. It is the location of that interval
+ // that will be used at `defined_by`.
+ LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
+ // Note that interval may have not been processed yet.
+ // TODO: Handle non-split intervals last in the work list.
+ if (locations->InAt(i).IsValid()
+ && interval->HasRegister()
+ && interval->SameRegisterKind(*current)) {
+ // The input must be live until the end of `defined_by`, to comply to
+ // the linear scan algorithm. So we use `defined_by`'s end lifetime
+ // position to check whether the input is dead or is inactive after
+ // `defined_by`.
+ DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
+ size_t position = defined_by->GetLifetimePosition() + 1;
+ FreeIfNotCoverAt(interval, position, free_until);
+ }
+ }
+ }
+ }
+
+ // For each inactive interval, set its register to be free until
+ // the next intersection with `current`.
+ for (LiveInterval* inactive : inactive_) {
+ // Temp/Slow-path-safepoint interval has no holes.
+ DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+ if (!current->IsSplit() && !inactive->IsFixed()) {
+ // Neither current nor inactive are fixed.
+ // Thanks to SSA, a non-split interval starting in a hole of an
+ // inactive interval should never intersect with that inactive interval.
+ // Only if it's not fixed though, because fixed intervals don't come from SSA.
+ DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+ continue;
+ }
+
+ DCHECK(inactive->HasRegister());
+ if (free_until[inactive->GetRegister()] == 0) {
+ // Already used by some active interval. No need to intersect.
+ continue;
+ }
+ size_t next_intersection = inactive->FirstIntersectionWith(current);
+ if (next_intersection != kNoLifetime) {
+ free_until[inactive->GetRegister()] =
+ std::min(free_until[inactive->GetRegister()], next_intersection);
+ }
+ }
+
+ int reg = kNoRegister;
+ if (current->HasRegister()) {
+ // Some instructions have a fixed register output.
+ reg = current->GetRegister();
+ if (free_until[reg] == 0) {
+ DCHECK(current->IsHighInterval());
+ // AllocateBlockedReg will spill the holder of the register.
+ return false;
+ }
+ } else {
+ DCHECK(!current->IsHighInterval());
+ int hint = current->FindFirstRegisterHint(free_until, liveness_);
+ if ((hint != kNoRegister)
+ // For simplicity, if the hint we are getting for a pair cannot be used,
+ // we are just going to allocate a new pair.
+ && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) {
+ DCHECK(!IsBlocked(hint));
+ reg = hint;
+ } else if (current->IsLowInterval()) {
+ reg = FindAvailableRegisterPair(free_until, current->GetStart());
+ } else {
+ reg = FindAvailableRegister(free_until, current);
+ }
+ }
+
+ DCHECK_NE(reg, kNoRegister);
+ // If we could not find a register, we need to spill.
+ if (free_until[reg] == 0) {
+ return false;
+ }
+
+ if (current->IsLowInterval()) {
+ // If the high register of this interval is not available, we need to spill.
+ int high_reg = current->GetHighInterval()->GetRegister();
+ if (high_reg == kNoRegister) {
+ high_reg = GetHighForLowRegister(reg);
+ }
+ if (free_until[high_reg] == 0) {
+ return false;
+ }
+ }
+
+ current->SetRegister(reg);
+ if (!current->IsDeadAt(free_until[reg])) {
+ // If the register is only available for a subset of live ranges
+ // covered by `current`, split `current` before the position where
+ // the register is not available anymore.
+ LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]);
+ DCHECK(split != nullptr);
+ AddSorted(unhandled_, split);
+ }
+ return true;
+}
+
+bool RegisterAllocatorLinearScan::IsBlocked(int reg) const {
+ return processing_core_registers_
+ ? blocked_core_registers_[reg]
+ : blocked_fp_registers_[reg];
+}
+
+int RegisterAllocatorLinearScan::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
+ int reg = kNoRegister;
+ // Pick the register pair that is used the last.
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ if (IsBlocked(i)) continue;
+ if (!IsLowRegister(i)) continue;
+ int high_register = GetHighForLowRegister(i);
+ if (IsBlocked(high_register)) continue;
+ int existing_high_register = GetHighForLowRegister(reg);
+ if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
+ && next_use[high_register] >= next_use[existing_high_register])) {
+ reg = i;
+ if (next_use[i] == kMaxLifetimePosition
+ && next_use[high_register] == kMaxLifetimePosition) {
+ break;
+ }
+ } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
+ // If one of the current register is known to be unavailable, just unconditionally
+ // try a new one.
+ reg = i;
+ }
+ }
+ return reg;
+}
+
+bool RegisterAllocatorLinearScan::IsCallerSaveRegister(int reg) const {
+ return processing_core_registers_
+ ? !codegen_->IsCoreCalleeSaveRegister(reg)
+ : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
+}
+
+int RegisterAllocatorLinearScan::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
+ // We special case intervals that do not span a safepoint to try to find a caller-save
+ // register if one is available. We iterate from 0 to the number of registers,
+ // so if there are caller-save registers available at the end, we continue the iteration.
+ bool prefers_caller_save = !current->HasWillCallSafepoint();
+ int reg = kNoRegister;
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ if (IsBlocked(i)) {
+ // Register cannot be used. Continue.
+ continue;
+ }
+
+ // Best case: we found a register fully available.
+ if (next_use[i] == kMaxLifetimePosition) {
+ if (prefers_caller_save && !IsCallerSaveRegister(i)) {
+ // We can get shorter encodings on some platforms by using
+ // small register numbers. So only update the candidate if the previous
+ // one was not available for the whole method.
+ if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) {
+ reg = i;
+ }
+ // Continue the iteration in the hope of finding a caller save register.
+ continue;
+ } else {
+ reg = i;
+ // We know the register is good enough. Return it.
+ break;
+ }
+ }
+
+ // If we had no register before, take this one as a reference.
+ if (reg == kNoRegister) {
+ reg = i;
+ continue;
+ }
+
+ // Pick the register that is used the last.
+ if (next_use[i] > next_use[reg]) {
+ reg = i;
+ continue;
+ }
+ }
+ return reg;
+}
+
+// Remove interval and its other half if any. Return iterator to the following element.
+static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf(
+ ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) {
+ DCHECK(intervals->begin() <= pos && pos < intervals->end());
+ LiveInterval* interval = *pos;
+ if (interval->IsLowInterval()) {
+ DCHECK(pos + 1 < intervals->end());
+ DCHECK_EQ(*(pos + 1), interval->GetHighInterval());
+ return intervals->erase(pos, pos + 2);
+ } else if (interval->IsHighInterval()) {
+ DCHECK(intervals->begin() < pos);
+ DCHECK_EQ(*(pos - 1), interval->GetLowInterval());
+ return intervals->erase(pos - 1, pos + 1);
+ } else {
+ return intervals->erase(pos);
+ }
+}
+
+bool RegisterAllocatorLinearScan::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+ size_t first_register_use,
+ size_t* next_use) {
+ for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+ LiveInterval* active = *it;
+ DCHECK(active->HasRegister());
+ if (active->IsFixed()) continue;
+ if (active->IsHighInterval()) continue;
+ if (first_register_use > next_use[active->GetRegister()]) continue;
+
+ // Split the first interval found that is either:
+ // 1) A non-pair interval.
+ // 2) A pair interval whose high is not low + 1.
+ // 3) A pair interval whose low is not even.
+ if (!active->IsLowInterval() ||
+ IsLowOfUnalignedPairInterval(active) ||
+ !IsLowRegister(active->GetRegister())) {
+ LiveInterval* split = Split(active, position);
+ if (split != active) {
+ handled_.push_back(active);
+ }
+ RemoveIntervalAndPotentialOtherHalf(&active_, it);
+ AddSorted(unhandled_, split);
+ return true;
+ }
+ }
+ return false;
+}
+
+// Find the register that is used the last, and spill the interval
+// that holds it. If the first use of `current` is after that register
+// we spill `current` instead.
+bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) {
+ size_t first_register_use = current->FirstRegisterUse();
+ if (current->HasRegister()) {
+ DCHECK(current->IsHighInterval());
+ // The low interval has allocated the register for the high interval. In
+ // case the low interval had to split both intervals, we may end up in a
+ // situation where the high interval does not have a register use anymore.
+ // We must still proceed in order to split currently active and inactive
+ // uses of the high interval's register, and put the high interval in the
+ // active set.
+ DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr));
+ } else if (first_register_use == kNoLifetime) {
+ AllocateSpillSlotFor(current);
+ return false;
+ }
+
+ // First set all registers as not being used.
+ size_t* next_use = registers_array_;
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ next_use[i] = kMaxLifetimePosition;
+ }
+
+ // For each active interval, find the next use of its register after the
+ // start of current.
+ for (LiveInterval* active : active_) {
+ DCHECK(active->HasRegister());
+ if (active->IsFixed()) {
+ next_use[active->GetRegister()] = current->GetStart();
+ } else {
+ size_t use = active->FirstRegisterUseAfter(current->GetStart());
+ if (use != kNoLifetime) {
+ next_use[active->GetRegister()] = use;
+ }
+ }
+ }
+
+ // For each inactive interval, find the next use of its register after the
+ // start of current.
+ for (LiveInterval* inactive : inactive_) {
+ // Temp/Slow-path-safepoint interval has no holes.
+ DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+ if (!current->IsSplit() && !inactive->IsFixed()) {
+ // Neither current nor inactive are fixed.
+ // Thanks to SSA, a non-split interval starting in a hole of an
+ // inactive interval should never intersect with that inactive interval.
+ // Only if it's not fixed though, because fixed intervals don't come from SSA.
+ DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+ continue;
+ }
+ DCHECK(inactive->HasRegister());
+ size_t next_intersection = inactive->FirstIntersectionWith(current);
+ if (next_intersection != kNoLifetime) {
+ if (inactive->IsFixed()) {
+ next_use[inactive->GetRegister()] =
+ std::min(next_intersection, next_use[inactive->GetRegister()]);
+ } else {
+ size_t use = inactive->FirstUseAfter(current->GetStart());
+ if (use != kNoLifetime) {
+ next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
+ }
+ }
+ }
+ }
+
+ int reg = kNoRegister;
+ bool should_spill = false;
+ if (current->HasRegister()) {
+ DCHECK(current->IsHighInterval());
+ reg = current->GetRegister();
+ // When allocating the low part, we made sure the high register was available.
+ DCHECK_LT(first_register_use, next_use[reg]);
+ } else if (current->IsLowInterval()) {
+ reg = FindAvailableRegisterPair(next_use, first_register_use);
+ // We should spill if both registers are not available.
+ should_spill = (first_register_use >= next_use[reg])
+ || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
+ } else {
+ DCHECK(!current->IsHighInterval());
+ reg = FindAvailableRegister(next_use, current);
+ should_spill = (first_register_use >= next_use[reg]);
+ }
+
+ DCHECK_NE(reg, kNoRegister);
+ if (should_spill) {
+ DCHECK(!current->IsHighInterval());
+ bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1));
+ if (is_allocation_at_use_site) {
+ if (!current->IsLowInterval()) {
+ DumpInterval(std::cerr, current);
+ DumpAllIntervals(std::cerr);
+ // This situation has the potential to infinite loop, so we make it a non-debug CHECK.
+ HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2);
+ CHECK(false) << "There is not enough registers available for "
+ << current->GetParent()->GetDefinedBy()->DebugName() << " "
+ << current->GetParent()->GetDefinedBy()->GetId()
+ << " at " << first_register_use - 1 << " "
+ << (at == nullptr ? "" : at->DebugName());
+ }
+
+ // If we're allocating a register for `current` because the instruction at
+ // that position requires it, but we think we should spill, then there are
+ // non-pair intervals or unaligned pair intervals blocking the allocation.
+ // We split the first interval found, and put ourselves first in the
+ // `unhandled_` list.
+ bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(),
+ first_register_use,
+ next_use);
+ DCHECK(success);
+ LiveInterval* existing = unhandled_->back();
+ DCHECK(existing->IsHighInterval());
+ DCHECK_EQ(existing->GetLowInterval(), current);
+ unhandled_->push_back(current);
+ } else {
+ // If the first use of that instruction is after the last use of the found
+ // register, we split this interval just before its first register use.
+ AllocateSpillSlotFor(current);
+ LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
+ DCHECK(current != split);
+ AddSorted(unhandled_, split);
+ }
+ return false;
+ } else {
+ // Use this register and spill the active and inactives interval that
+ // have that register.
+ current->SetRegister(reg);
+
+ for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+ LiveInterval* active = *it;
+ if (active->GetRegister() == reg) {
+ DCHECK(!active->IsFixed());
+ LiveInterval* split = Split(active, current->GetStart());
+ if (split != active) {
+ handled_.push_back(active);
+ }
+ RemoveIntervalAndPotentialOtherHalf(&active_, it);
+ AddSorted(unhandled_, split);
+ break;
+ }
+ }
+
+ // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body.
+ for (auto it = inactive_.begin(); it != inactive_.end(); ) {
+ LiveInterval* inactive = *it;
+ bool erased = false;
+ if (inactive->GetRegister() == reg) {
+ if (!current->IsSplit() && !inactive->IsFixed()) {
+ // Neither current nor inactive are fixed.
+ // Thanks to SSA, a non-split interval starting in a hole of an
+ // inactive interval should never intersect with that inactive interval.
+ // Only if it's not fixed though, because fixed intervals don't come from SSA.
+ DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
+ } else {
+ size_t next_intersection = inactive->FirstIntersectionWith(current);
+ if (next_intersection != kNoLifetime) {
+ if (inactive->IsFixed()) {
+ LiveInterval* split = Split(current, next_intersection);
+ DCHECK_NE(split, current);
+ AddSorted(unhandled_, split);
+ } else {
+ // Split at the start of `current`, which will lead to splitting
+ // at the end of the lifetime hole of `inactive`.
+ LiveInterval* split = Split(inactive, current->GetStart());
+ // If it's inactive, it must start before the current interval.
+ DCHECK_NE(split, inactive);
+ it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it);
+ erased = true;
+ handled_.push_back(inactive);
+ AddSorted(unhandled_, split);
+ }
+ }
+ }
+ }
+ // If we have erased the element, `it` already points to the next element.
+ // Otherwise we need to move to the next element.
+ if (!erased) {
+ ++it;
+ }
+ }
+
+ return true;
+ }
+}
+
+void RegisterAllocatorLinearScan::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
+ DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
+ size_t insert_at = 0;
+ for (size_t i = array->size(); i > 0; --i) {
+ LiveInterval* current = (*array)[i - 1u];
+ // High intervals must be processed right after their low equivalent.
+ if (current->StartsAfter(interval) && !current->IsHighInterval()) {
+ insert_at = i;
+ break;
+ } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
+ // Ensure the slow path interval is the last to be processed at its location: we want the
+ // interval to know all live registers at this location.
+ DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
+ insert_at = i;
+ break;
+ }
+ }
+
+ // Insert the high interval before the low, to ensure the low is processed before.
+ auto insert_pos = array->begin() + insert_at;
+ if (interval->HasHighInterval()) {
+ array->insert(insert_pos, { interval->GetHighInterval(), interval });
+ } else if (interval->HasLowInterval()) {
+ array->insert(insert_pos, { interval, interval->GetLowInterval() });
+ } else {
+ array->insert(insert_pos, interval);
+ }
+}
+
+void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) {
+ if (interval->IsHighInterval()) {
+ // The low interval already took care of allocating the spill slot.
+ DCHECK(!interval->GetLowInterval()->HasRegister());
+ DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot());
+ return;
+ }
+
+ LiveInterval* parent = interval->GetParent();
+
+ // An instruction gets a spill slot for its entire lifetime. If the parent
+ // of this interval already has a spill slot, there is nothing to do.
+ if (parent->HasSpillSlot()) {
+ return;
+ }
+
+ HInstruction* defined_by = parent->GetDefinedBy();
+ DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi());
+
+ if (defined_by->IsParameterValue()) {
+ // Parameters have their own stack slot.
+ parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+ return;
+ }
+
+ if (defined_by->IsCurrentMethod()) {
+ parent->SetSpillSlot(0);
+ return;
+ }
+
+ if (defined_by->IsConstant()) {
+ // Constants don't need a spill slot.
+ return;
+ }
+
+ ArenaVector<size_t>* spill_slots = nullptr;
+ switch (interval->GetType()) {
+ case Primitive::kPrimDouble:
+ spill_slots = &double_spill_slots_;
+ break;
+ case Primitive::kPrimLong:
+ spill_slots = &long_spill_slots_;
+ break;
+ case Primitive::kPrimFloat:
+ spill_slots = &float_spill_slots_;
+ break;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimShort:
+ spill_slots = &int_spill_slots_;
+ break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
+ }
+
+ // Find an available spill slot.
+ size_t slot = 0;
+ for (size_t e = spill_slots->size(); slot < e; ++slot) {
+ if ((*spill_slots)[slot] <= parent->GetStart()) {
+ if (!parent->NeedsTwoSpillSlots()) {
+ // One spill slot is sufficient.
+ break;
+ }
+ if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
+ // Two spill slots are available.
+ break;
+ }
+ }
+ }
+
+ size_t end = interval->GetLastSibling()->GetEnd();
+ if (parent->NeedsTwoSpillSlots()) {
+ if (slot + 2u > spill_slots->size()) {
+ // We need a new spill slot.
+ spill_slots->resize(slot + 2u, end);
+ }
+ (*spill_slots)[slot] = end;
+ (*spill_slots)[slot + 1] = end;
+ } else {
+ if (slot == spill_slots->size()) {
+ // We need a new spill slot.
+ spill_slots->push_back(end);
+ } else {
+ (*spill_slots)[slot] = end;
+ }
+ }
+
+ // Note that the exact spill slot location will be computed when we resolve,
+ // that is when we know the number of spill slots for each type.
+ parent->SetSpillSlot(slot);
+}
+
+void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
+ LiveInterval* interval = phi->GetLiveInterval();
+
+ HInstruction* previous_phi = phi->GetPrevious();
+ DCHECK(previous_phi == nullptr ||
+ previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+ << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
+
+ if (phi->IsVRegEquivalentOf(previous_phi)) {
+ // This is an equivalent of the previous phi. We need to assign the same
+ // catch phi slot.
+ DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
+ interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
+ } else {
+ // Allocate a new spill slot for this catch phi.
+ // TODO: Reuse spill slots when intervals of phis from different catch
+ // blocks do not overlap.
+ interval->SetSpillSlot(catch_phi_spill_slots_);
+ catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
new file mode 100644
index 0000000000..1a643a0d1a
--- /dev/null
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
+
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
+#include "base/macros.h"
+#include "primitive.h"
+#include "register_allocator.h"
+
+namespace art {
+
+class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
+class HPhi;
+class LiveInterval;
+class Location;
+class SsaLivenessAnalysis;
+
+/**
+ * An implementation of a linear scan register allocator on an `HGraph` with SSA form.
+ */
+class RegisterAllocatorLinearScan : public RegisterAllocator {
+ public:
+ RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis);
+ ~RegisterAllocatorLinearScan() OVERRIDE {}
+
+ void AllocateRegisters() OVERRIDE;
+
+ bool Validate(bool log_fatal_on_failure) OVERRIDE {
+ processing_core_registers_ = true;
+ if (!ValidateInternal(log_fatal_on_failure)) {
+ return false;
+ }
+ processing_core_registers_ = false;
+ return ValidateInternal(log_fatal_on_failure);
+ }
+
+ size_t GetNumberOfSpillSlots() const {
+ return int_spill_slots_.size()
+ + long_spill_slots_.size()
+ + float_spill_slots_.size()
+ + double_spill_slots_.size()
+ + catch_phi_spill_slots_;
+ }
+
+ private:
+ // Main methods of the allocator.
+ void LinearScan();
+ bool TryAllocateFreeReg(LiveInterval* interval);
+ bool AllocateBlockedReg(LiveInterval* interval);
+
+ // Add `interval` in the given sorted list.
+ static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
+
+ // Returns whether `reg` is blocked by the code generator.
+ bool IsBlocked(int reg) const;
+
+ // Update the interval for the register in `location` to cover [start, end).
+ void BlockRegister(Location location, size_t start, size_t end);
+ void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
+
+ // Allocate a spill slot for the given interval. Should be called in linear
+ // order of interval starting positions.
+ void AllocateSpillSlotFor(LiveInterval* interval);
+
+ // Allocate a spill slot for the given catch phi. Will allocate the same slot
+ // for phis which share the same vreg. Must be called in reverse linear order
+ // of lifetime positions and ascending vreg numbers for correctness.
+ void AllocateSpillSlotForCatchPhi(HPhi* phi);
+
+ // Helper methods.
+ void AllocateRegistersInternal();
+ void ProcessInstruction(HInstruction* instruction);
+ bool ValidateInternal(bool log_fatal_on_failure) const;
+ void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
+ void DumpAllIntervals(std::ostream& stream) const;
+ int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
+ int FindAvailableRegister(size_t* next_use, LiveInterval* current) const;
+ bool IsCallerSaveRegister(int reg) const;
+
+ // Try splitting an active non-pair or unaligned pair interval at the given `position`.
+ // Returns whether it was successful at finding such an interval.
+ bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+ size_t first_register_use,
+ size_t* next_use);
+
+ // List of intervals for core registers that must be processed, ordered by start
+ // position. Last entry is the interval that has the lowest start position.
+ // This list is initially populated before doing the linear scan.
+ ArenaVector<LiveInterval*> unhandled_core_intervals_;
+
+ // List of intervals for floating-point registers. Same comments as above.
+ ArenaVector<LiveInterval*> unhandled_fp_intervals_;
+
+ // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_`
+ // or `unhandled_fp_intervals_`.
+ ArenaVector<LiveInterval*>* unhandled_;
+
+ // List of intervals that have been processed.
+ ArenaVector<LiveInterval*> handled_;
+
+ // List of intervals that are currently active when processing a new live interval.
+ // That is, they have a live range that spans the start of the new interval.
+ ArenaVector<LiveInterval*> active_;
+
+ // List of intervals that are currently inactive when processing a new live interval.
+ // That is, they have a lifetime hole that spans the start of the new interval.
+ ArenaVector<LiveInterval*> inactive_;
+
+ // Fixed intervals for physical registers. Such intervals cover the positions
+ // where an instruction requires a specific register.
+ ArenaVector<LiveInterval*> physical_core_register_intervals_;
+ ArenaVector<LiveInterval*> physical_fp_register_intervals_;
+
+ // Intervals for temporaries. Such intervals cover the positions
+ // where an instruction requires a temporary.
+ ArenaVector<LiveInterval*> temp_intervals_;
+
+ // The spill slots allocated for live intervals. We ensure spill slots
+ // are typed to avoid (1) doing moves and swaps between two different kinds
+ // of registers, and (2) swapping between a single stack slot and a double
+ // stack slot. This simplifies the parallel move resolver.
+ ArenaVector<size_t> int_spill_slots_;
+ ArenaVector<size_t> long_spill_slots_;
+ ArenaVector<size_t> float_spill_slots_;
+ ArenaVector<size_t> double_spill_slots_;
+
+ // Spill slots allocated to catch phis. This category is special-cased because
+ // (1) slots are allocated prior to linear scan and in reverse linear order,
+ // (2) equivalent phis need to share slots despite having different types.
+ size_t catch_phi_spill_slots_;
+
+ // Instructions that need a safepoint.
+ ArenaVector<HInstruction*> safepoints_;
+
+ // True if processing core registers. False if processing floating
+ // point registers.
+ bool processing_core_registers_;
+
+ // Number of registers for the current register kind (core or floating point).
+ size_t number_of_registers_;
+
+ // Temporary array, allocated ahead of time for simplicity.
+ size_t* registers_array_;
+
+ // Blocked registers, as decided by the code generator.
+ bool* const blocked_core_registers_;
+ bool* const blocked_fp_registers_;
+
+ // Slots reserved for out arguments.
+ size_t reserved_out_slots_;
+
+ // The maximum live core registers at safepoints.
+ size_t maximum_number_of_live_core_registers_;
+
+ // The maximum live FP registers at safepoints.
+ size_t maximum_number_of_live_fp_registers_;
+
+ ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
+ ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
+
+ DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorLinearScan);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index a9de7c3e59..55ea99e592 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -25,17 +25,35 @@
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "register_allocator.h"
+#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
#include "ssa_phi_elimination.h"
namespace art {
+using Strategy = RegisterAllocator::Strategy;
+
// Note: the register allocator tests rely on the fact that constants have live
// intervals and registers get allocated to them.
-class RegisterAllocatorTest : public CommonCompilerTest {};
+class RegisterAllocatorTest : public CommonCompilerTest {
+ protected:
+ // These functions need to access private variables of LocationSummary, so we declare it
+ // as a member of RegisterAllocatorTest, which we make a friend class.
+ static void SameAsFirstInputHint(Strategy strategy);
+ static void ExpectedInRegisterHint(Strategy strategy);
+};
+
+// This macro should include all register allocation strategies that should be tested.
+#define TEST_ALL_STRATEGIES(test_name)\
+TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\
+ test_name(Strategy::kRegisterAllocatorLinearScan);\
+}\
+TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\
+ test_name(Strategy::kRegisterAllocatorGraphColor);\
+}
-static bool Check(const uint16_t* data) {
+static bool Check(const uint16_t* data, Strategy strategy) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = CreateCFG(&allocator, data);
@@ -44,9 +62,10 @@ static bool Check(const uint16_t* data) {
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
- return register_allocator.Validate(false);
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
+ return register_allocator->Validate(false);
}
/**
@@ -142,7 +161,7 @@ TEST_F(RegisterAllocatorTest, ValidateIntervals) {
}
}
-TEST_F(RegisterAllocatorTest, CFG1) {
+static void CFG1(Strategy strategy) {
/*
* Test the following snippet:
* return 0;
@@ -159,10 +178,12 @@ TEST_F(RegisterAllocatorTest, CFG1) {
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN);
- ASSERT_TRUE(Check(data));
+ ASSERT_TRUE(Check(data, strategy));
}
-TEST_F(RegisterAllocatorTest, Loop1) {
+TEST_ALL_STRATEGIES(CFG1);
+
+static void Loop1(Strategy strategy) {
/*
* Test the following snippet:
* int a = 0;
@@ -198,10 +219,12 @@ TEST_F(RegisterAllocatorTest, Loop1) {
Instruction::CONST_4 | 5 << 12 | 1 << 8,
Instruction::RETURN | 1 << 8);
- ASSERT_TRUE(Check(data));
+ ASSERT_TRUE(Check(data, strategy));
}
-TEST_F(RegisterAllocatorTest, Loop2) {
+TEST_ALL_STRATEGIES(Loop1);
+
+static void Loop2(Strategy strategy) {
/*
* Test the following snippet:
* int a = 0;
@@ -247,10 +270,12 @@ TEST_F(RegisterAllocatorTest, Loop2) {
Instruction::ADD_INT, 1 << 8 | 0,
Instruction::RETURN | 1 << 8);
- ASSERT_TRUE(Check(data));
+ ASSERT_TRUE(Check(data, strategy));
}
-TEST_F(RegisterAllocatorTest, Loop3) {
+TEST_ALL_STRATEGIES(Loop2);
+
+static void Loop3(Strategy strategy) {
/*
* Test the following snippet:
* int a = 0
@@ -295,9 +320,10 @@ TEST_F(RegisterAllocatorTest, Loop3) {
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
- ASSERT_TRUE(register_allocator.Validate(false));
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
+ ASSERT_TRUE(register_allocator->Validate(false));
HBasicBlock* loop_header = graph->GetBlocks()[2];
HPhi* phi = loop_header->GetFirstPhi()->AsPhi();
@@ -313,6 +339,8 @@ TEST_F(RegisterAllocatorTest, Loop3) {
ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
}
+TEST_ALL_STRATEGIES(Loop3);
+
TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
@@ -353,7 +381,7 @@ TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition());
}
-TEST_F(RegisterAllocatorTest, DeadPhi) {
+static void DeadPhi(Strategy strategy) {
/* Test for a dead loop phi taking as back-edge input a phi that also has
* this loop phi as input. Walking backwards in SsaDeadPhiElimination
* does not solve the problem because the loop phi will be visited last.
@@ -384,15 +412,19 @@ TEST_F(RegisterAllocatorTest, DeadPhi) {
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
- ASSERT_TRUE(register_allocator.Validate(false));
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
+ ASSERT_TRUE(register_allocator->Validate(false));
}
+TEST_ALL_STRATEGIES(DeadPhi);
+
/**
* Test that the TryAllocateFreeReg method works in the presence of inactive intervals
* that share the same register. It should split the interval it is currently
* allocating for at the minimum lifetime position between the two inactive intervals.
+ * This test only applies to the linear scan allocator.
*/
TEST_F(RegisterAllocatorTest, FreeUntil) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
@@ -408,7 +440,7 @@ TEST_F(RegisterAllocatorTest, FreeUntil) {
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+ RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
// Add an artifical range to cover the temps that will be put in the unhandled list.
LiveInterval* unhandled = graph->GetEntryBlock()->GetFirstInstruction()->GetLiveInterval();
@@ -506,15 +538,15 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
graph->GetDexFile(),
dex_cache,
0);
-*input2 = new (allocator) HInstanceFieldGet(parameter,
- Primitive::kPrimInt,
- MemberOffset(42),
- false,
- kUnknownFieldIndex,
- kUnknownClassDefIndex,
- graph->GetDexFile(),
- dex_cache,
- 0);
+ *input2 = new (allocator) HInstanceFieldGet(parameter,
+ Primitive::kPrimInt,
+ MemberOffset(42),
+ false,
+ kUnknownFieldIndex,
+ kUnknownClassDefIndex,
+ graph->GetDexFile(),
+ dex_cache,
+ 0);
then->AddInstruction(*input1);
else_->AddInstruction(*input2);
join->AddInstruction(new (allocator) HExit());
@@ -526,7 +558,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
return graph;
}
-TEST_F(RegisterAllocatorTest, PhiHint) {
+static void PhiHint(Strategy strategy) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HPhi *phi;
@@ -541,8 +573,9 @@ TEST_F(RegisterAllocatorTest, PhiHint) {
liveness.Analyze();
// Check that the register allocator is deterministic.
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 0);
@@ -560,8 +593,9 @@ TEST_F(RegisterAllocatorTest, PhiHint) {
// Set the phi to a specific register, and check that the inputs get allocated
// the same register.
phi->GetLocations()->UpdateOut(Location::RegisterLocation(2));
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -579,8 +613,9 @@ TEST_F(RegisterAllocatorTest, PhiHint) {
// Set input1 to a specific register, and check that the phi and other input get allocated
// the same register.
input1->GetLocations()->UpdateOut(Location::RegisterLocation(2));
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -598,8 +633,9 @@ TEST_F(RegisterAllocatorTest, PhiHint) {
// Set input2 to a specific register, and check that the phi and other input get allocated
// the same register.
input2->GetLocations()->UpdateOut(Location::RegisterLocation(2));
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -607,6 +643,12 @@ TEST_F(RegisterAllocatorTest, PhiHint) {
}
}
+// TODO: Enable this test for graph coloring register allocation when iterative move
+// coalescing is merged.
+TEST_F(RegisterAllocatorTest, PhiHint_LinearScan) {
+ PhiHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
HInstruction** field,
HInstruction** ret) {
@@ -645,7 +687,7 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
return graph;
}
-TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
+void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HInstruction *field, *ret;
@@ -658,8 +700,9 @@ TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
// Sanity check that in normal conditions, the register should be hinted to 0 (EAX).
ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 0);
@@ -677,13 +720,20 @@ TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
// Don't use SetInAt because we are overriding an already allocated location.
ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2);
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2);
}
}
+// TODO: Enable this test for graph coloring register allocation when iterative move
+// coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint_LinearScan) {
+ ExpectedInRegisterHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
HInstruction** first_sub,
HInstruction** second_sub) {
@@ -713,7 +763,7 @@ static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
return graph;
}
-TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
+void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HInstruction *first_sub, *second_sub;
@@ -726,8 +776,9 @@ TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
// Sanity check that in normal conditions, the registers are the same.
ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 1);
@@ -748,14 +799,21 @@ TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
ASSERT_EQ(first_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(second_sub->GetLiveInterval()->GetRegister(), 2);
}
}
+// TODO: Enable this test for graph coloring register allocation when iterative move
+// coalescing is merged.
+TEST_F(RegisterAllocatorTest, SameAsFirstInputHint_LinearScan) {
+ SameAsFirstInputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
static HGraph* BuildDiv(ArenaAllocator* allocator,
HInstruction** div) {
HGraph* graph = CreateGraph(allocator);
@@ -782,7 +840,7 @@ static HGraph* BuildDiv(ArenaAllocator* allocator,
return graph;
}
-TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
+static void ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HInstruction *div;
@@ -795,17 +853,25 @@ TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
+ register_allocator->AllocateRegisters();
// div on x86 requires its first input in eax and the output be the same as the first input.
ASSERT_EQ(div->GetLiveInterval()->GetRegister(), 0);
}
}
+// TODO: Enable this test for graph coloring register allocation when iterative move
+// coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint_LinearScan) {
+ ExpectedExactInRegisterAndSameOutputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
// Test a bug in the register allocator, where allocating a blocked
// register would lead to spilling an inactive interval at the wrong
// position.
+// This test only applies to the linear scan allocator.
TEST_F(RegisterAllocatorTest, SpillInactive) {
ArenaPool pool;
@@ -892,7 +958,7 @@ TEST_F(RegisterAllocatorTest, SpillInactive) {
liveness.instructions_from_lifetime_position_.push_back(user);
}
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+ RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
register_allocator.unhandled_core_intervals_.push_back(fourth);
register_allocator.unhandled_core_intervals_.push_back(third);
register_allocator.unhandled_core_intervals_.push_back(second);
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 97f34e6c32..6effc306dc 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -17,6 +17,7 @@
#include "sharpening.h"
#include "base/casts.h"
+#include "base/enums.h"
#include "class_linker.h"
#include "code_generator.h"
#include "driver/dex_compilation_unit.h"
@@ -259,7 +260,7 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) {
load_class->SetLoadKindWithAddress(load_kind, address);
break;
case HLoadClass::LoadKind::kDexCachePcRelative: {
- size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+ PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
DexCacheArraysLayout layout(pointer_size, &dex_file);
size_t element_index = layout.TypeOffset(type_index);
load_class->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index);
@@ -278,8 +279,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
const DexFile& dex_file = load_string->GetDexFile();
uint32_t string_index = load_string->GetStringIndex();
- bool is_in_dex_cache = false;
- HLoadString::LoadKind desired_load_kind;
+ HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
uint64_t address = 0u; // String or dex cache element address.
{
Runtime* runtime = Runtime::Current();
@@ -295,33 +295,14 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
DCHECK(!runtime->UseJitCompilation());
mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
CHECK(string != nullptr);
- if (!compiler_driver_->GetSupportBootImageFixup()) {
- // MIPS/MIPS64 or compiler_driver_test. Do not sharpen.
- desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
- } else {
- DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
- is_in_dex_cache = true;
- desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
- ? HLoadString::LoadKind::kBootImageLinkTimePcRelative
- : HLoadString::LoadKind::kBootImageLinkTimeAddress;
- }
+ // TODO: In follow up CL, add PcRelative and Address back in.
} else if (runtime->UseJitCompilation()) {
// TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
// DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
mirror::String* string = dex_cache->GetResolvedString(string_index);
- is_in_dex_cache = (string != nullptr);
if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
- // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
address = reinterpret_cast64<uint64_t>(string);
- } else {
- // Note: If the string is not in the dex cache, the instruction needs environment
- // and will not be inlined across dex files. Within a dex file, the slow-path helper
- // loads the correct string and inlined frames are used correctly for OOM stack trace.
- // TODO: Write a test for this. Bug: 29416588
- desired_load_kind = HLoadString::LoadKind::kDexCacheAddress;
- void* dex_cache_element_address = &dex_cache->GetStrings()[string_index];
- address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
}
} else {
// AOT app compilation. Try to lookup the string without allocating if not found.
@@ -331,19 +312,9 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
!codegen_->GetCompilerOptions().GetCompilePic()) {
desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
address = reinterpret_cast64<uint64_t>(string);
- } else {
- // Not JIT and either the string is not in boot image or we are compiling in PIC mode.
- // Use PC-relative load from the dex cache if the dex file belongs
- // to the oat file that we're currently compiling.
- desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)
- ? HLoadString::LoadKind::kDexCachePcRelative
- : HLoadString::LoadKind::kDexCacheViaMethod;
}
}
}
- if (is_in_dex_cache) {
- load_string->MarkInDexCache();
- }
HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind);
switch (load_kind) {
@@ -358,7 +329,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
load_string->SetLoadKindWithAddress(load_kind, address);
break;
case HLoadString::LoadKind::kDexCachePcRelative: {
- size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+ PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
DexCacheArraysLayout layout(pointer_size, &dex_file);
size_t element_index = layout.StringOffset(string_index);
load_string->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index);
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 7af4302884..a01e107e02 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -368,6 +368,27 @@ bool SsaLivenessAnalysis::UpdateLiveIn(const HBasicBlock& block) {
return live_in->UnionIfNotIn(live_out, kill);
}
+void LiveInterval::DumpWithContext(std::ostream& stream,
+ const CodeGenerator& codegen) const {
+ Dump(stream);
+ if (IsFixed()) {
+ stream << ", register:" << GetRegister() << "(";
+ if (IsFloatingPoint()) {
+ codegen.DumpFloatingPointRegister(stream, GetRegister());
+ } else {
+ codegen.DumpCoreRegister(stream, GetRegister());
+ }
+ stream << ")";
+ } else {
+ stream << ", spill slot:" << GetSpillSlot();
+ }
+ stream << ", requires_register:" << (GetDefinedBy() != nullptr && RequiresRegister());
+ if (GetParent()->GetDefinedBy() != nullptr) {
+ stream << ", defined_by:" << GetParent()->GetDefinedBy()->GetKind();
+ stream << "(" << GetParent()->GetDefinedBy()->GetLifetimePosition() << ")";
+ }
+}
+
static int RegisterOrLowRegister(Location location) {
return location.IsPair() ? location.low() : location.reg();
}
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index dc98864d9b..92788fe6b8 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -150,9 +150,7 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
if (GetIsEnvironment()) return false;
if (IsSynthesized()) return false;
Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
- return location.IsUnallocated()
- && (location.GetPolicy() == Location::kRequiresRegister
- || location.GetPolicy() == Location::kRequiresFpuRegister);
+ return location.IsUnallocated() && location.RequiresRegisterKind();
}
private:
@@ -481,6 +479,10 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
return last_range_->GetEnd();
}
+ size_t GetLength() const {
+ return GetEnd() - GetStart();
+ }
+
size_t FirstRegisterUseAfter(size_t position) const {
if (is_temp_) {
return position == GetStart() ? position : kNoLifetime;
@@ -504,10 +506,18 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
return kNoLifetime;
}
+ // Returns the location of the first register use for this live interval,
+ // including a register definition if applicable.
size_t FirstRegisterUse() const {
return FirstRegisterUseAfter(GetStart());
}
+ // Whether the interval requires a register rather than a stack location.
+ // If needed for performance, this could be cached.
+ bool RequiresRegister() const {
+ return !HasRegister() && FirstRegisterUse() != kNoLifetime;
+ }
+
size_t FirstUseAfter(size_t position) const {
if (is_temp_) {
return position == GetStart() ? position : kNoLifetime;
@@ -693,6 +703,10 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
stream << " is_high: " << IsHighInterval();
}
+ // Same as Dump, but adds context such as the instruction defining this interval, and
+ // the register currently assigned to this interval.
+ void DumpWithContext(std::ostream& stream, const CodeGenerator& codegen) const;
+
LiveInterval* GetNextSibling() const { return next_sibling_; }
LiveInterval* GetLastSibling() {
LiveInterval* result = this;
@@ -871,6 +885,33 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
range_search_start_ = first_range_;
}
+ bool DefinitionRequiresRegister() const {
+ DCHECK(IsParent());
+ LocationSummary* locations = defined_by_->GetLocations();
+ Location location = locations->Out();
+ // This interval is the first interval of the instruction. If the output
+ // of the instruction requires a register, we return the position of that instruction
+ // as the first register use.
+ if (location.IsUnallocated()) {
+ if ((location.GetPolicy() == Location::kRequiresRegister)
+ || (location.GetPolicy() == Location::kSameAsFirstInput
+ && (locations->InAt(0).IsRegister()
+ || locations->InAt(0).IsRegisterPair()
+ || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
+ return true;
+ } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
+ || (location.GetPolicy() == Location::kSameAsFirstInput
+ && (locations->InAt(0).IsFpuRegister()
+ || locations->InAt(0).IsFpuRegisterPair()
+ || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
+ return true;
+ }
+ } else if (location.IsRegister() || location.IsRegisterPair()) {
+ return true;
+ }
+ return false;
+ }
+
private:
LiveInterval(ArenaAllocator* allocator,
Primitive::Type type,
@@ -925,33 +966,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
return range;
}
- bool DefinitionRequiresRegister() const {
- DCHECK(IsParent());
- LocationSummary* locations = defined_by_->GetLocations();
- Location location = locations->Out();
- // This interval is the first interval of the instruction. If the output
- // of the instruction requires a register, we return the position of that instruction
- // as the first register use.
- if (location.IsUnallocated()) {
- if ((location.GetPolicy() == Location::kRequiresRegister)
- || (location.GetPolicy() == Location::kSameAsFirstInput
- && (locations->InAt(0).IsRegister()
- || locations->InAt(0).IsRegisterPair()
- || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
- return true;
- } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
- || (location.GetPolicy() == Location::kSameAsFirstInput
- && (locations->InAt(0).IsFpuRegister()
- || locations->InAt(0).IsFpuRegisterPair()
- || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
- return true;
- }
- } else if (location.IsRegister() || location.IsRegisterPair()) {
- return true;
- }
- return false;
- }
-
bool IsDefiningPosition(size_t position) const {
return IsParent() && (position == GetStart());
}
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
new file mode 100644
index 0000000000..8aa315a7e3
--- /dev/null
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "x86_memory_gen.h"
+#include "code_generator.h"
+
+namespace art {
+namespace x86 {
+
+/**
+ * Replace instructions with memory operand forms.
+ */
+class MemoryOperandVisitor : public HGraphVisitor {
+ public:
+ MemoryOperandVisitor(HGraph* graph, bool do_implicit_null_checks)
+ : HGraphVisitor(graph),
+ do_implicit_null_checks_(do_implicit_null_checks) {}
+
+ private:
+ void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE {
+ // Replace the length by the array itself, so that we can do compares to memory.
+ HArrayLength* array_len = check->InputAt(1)->AsArrayLength();
+
+ // We only want to replace an ArrayLength.
+ if (array_len == nullptr) {
+ return;
+ }
+
+ HInstruction* array = array_len->InputAt(0);
+ DCHECK_EQ(array->GetType(), Primitive::kPrimNot);
+
+ // Don't apply this optimization when the array is nullptr.
+ if (array->IsConstant() || (array->IsNullCheck() && array->InputAt(0)->IsConstant())) {
+ return;
+ }
+
+ // Is there a null check that could be an implicit check?
+ if (array->IsNullCheck() && do_implicit_null_checks_) {
+ // The ArrayLen may generate the implicit null check. Can the
+ // bounds check do so as well?
+ if (array_len->GetNextDisregardingMoves() != check) {
+ // No, it won't. Leave as is.
+ return;
+ }
+ }
+
+ // Can we suppress the ArrayLength and generate at BoundCheck?
+ if (array_len->HasOnlyOneNonEnvironmentUse()) {
+ array_len->MarkEmittedAtUseSite();
+ // We need the ArrayLength just before the BoundsCheck.
+ array_len->MoveBefore(check);
+ }
+ }
+
+ bool do_implicit_null_checks_;
+};
+
+X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph,
+ CodeGenerator* codegen,
+ OptimizingCompilerStats* stats)
+ : HOptimization(graph, kX86MemoryOperandGenerationPassName, stats),
+ do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) {
+}
+
+void X86MemoryOperandGeneration::Run() {
+ MemoryOperandVisitor visitor(graph_, do_implicit_null_checks_);
+ visitor.VisitInsertionOrder();
+}
+
+} // namespace x86
+} // namespace art
diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h
new file mode 100644
index 0000000000..5f15d9f1e6
--- /dev/null
+++ b/compiler/optimizing/x86_memory_gen.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+#define ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+class CodeGenerator;
+
+namespace x86 {
+
+class X86MemoryOperandGeneration : public HOptimization {
+ public:
+ X86MemoryOperandGeneration(HGraph* graph,
+ CodeGenerator* codegen,
+ OptimizingCompilerStats* stats);
+
+ void Run() OVERRIDE;
+
+ static constexpr const char* kX86MemoryOperandGenerationPassName =
+ "x86_memory_operand_generation";
+
+ private:
+ bool do_implicit_null_checks_;
+};
+
+} // namespace x86
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_