summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc29
-rw-r--r--compiler/optimizing/builder.cc4
-rw-r--r--compiler/optimizing/builder.h4
-rw-r--r--compiler/optimizing/code_generator.cc223
-rw-r--r--compiler/optimizing/code_generator.h93
-rw-r--r--compiler/optimizing/code_generator_arm.cc1321
-rw-r--r--compiler/optimizing/code_generator_arm.h72
-rw-r--r--compiler/optimizing/code_generator_arm64.cc1021
-rw-r--r--compiler/optimizing/code_generator_arm64.h54
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc4249
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h566
-rw-r--r--compiler/optimizing/code_generator_mips.cc2253
-rw-r--r--compiler/optimizing/code_generator_mips.h90
-rw-r--r--compiler/optimizing/code_generator_mips64.cc190
-rw-r--r--compiler/optimizing/code_generator_mips64.h25
-rw-r--r--compiler/optimizing/code_generator_x86.cc1176
-rw-r--r--compiler/optimizing/code_generator_x86.h86
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc1165
-rw-r--r--compiler/optimizing/code_generator_x86_64.h85
-rw-r--r--compiler/optimizing/codegen_test.cc372
-rw-r--r--compiler/optimizing/common_arm.h185
-rw-r--r--compiler/optimizing/common_arm64.h99
-rw-r--r--compiler/optimizing/constant_folding.cc27
-rw-r--r--compiler/optimizing/dead_code_elimination.cc49
-rw-r--r--compiler/optimizing/dead_code_elimination.h6
-rw-r--r--compiler/optimizing/dex_cache_array_fixups_arm.cc35
-rw-r--r--compiler/optimizing/dex_cache_array_fixups_arm.h13
-rw-r--r--compiler/optimizing/dex_cache_array_fixups_mips.cc26
-rw-r--r--compiler/optimizing/dex_cache_array_fixups_mips.h4
-rw-r--r--compiler/optimizing/graph_visualizer.cc22
-rw-r--r--compiler/optimizing/gvn.cc4
-rw-r--r--compiler/optimizing/induction_var_analysis.cc105
-rw-r--r--compiler/optimizing/induction_var_analysis.h23
-rw-r--r--compiler/optimizing/induction_var_analysis_test.cc227
-rw-r--r--compiler/optimizing/induction_var_range.cc337
-rw-r--r--compiler/optimizing/induction_var_range.h114
-rw-r--r--compiler/optimizing/induction_var_range_test.cc134
-rw-r--r--compiler/optimizing/inliner.cc146
-rw-r--r--compiler/optimizing/inliner.h28
-rw-r--r--compiler/optimizing/instruction_builder.cc145
-rw-r--r--compiler/optimizing/instruction_builder.h6
-rw-r--r--compiler/optimizing/instruction_simplifier.cc32
-rw-r--r--compiler/optimizing/instruction_simplifier.h6
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.cc8
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.h4
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc7
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h5
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.cc3
-rw-r--r--compiler/optimizing/intrinsics.cc491
-rw-r--r--compiler/optimizing/intrinsics.h32
-rw-r--r--compiler/optimizing/intrinsics_arm.cc856
-rw-r--r--compiler/optimizing/intrinsics_arm.h9
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc834
-rw-r--r--compiler/optimizing/intrinsics_arm64.h4
-rw-r--r--compiler/optimizing/intrinsics_list.h144
-rw-r--r--compiler/optimizing/intrinsics_mips.cc108
-rw-r--r--compiler/optimizing/intrinsics_mips.h4
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc59
-rw-r--r--compiler/optimizing/intrinsics_mips64.h4
-rw-r--r--compiler/optimizing/intrinsics_x86.cc863
-rw-r--r--compiler/optimizing/intrinsics_x86.h4
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc739
-rw-r--r--compiler/optimizing/intrinsics_x86_64.h4
-rw-r--r--compiler/optimizing/licm.cc4
-rw-r--r--compiler/optimizing/linear_order.cc128
-rw-r--r--compiler/optimizing/linear_order.h41
-rw-r--r--compiler/optimizing/liveness_test.cc3
-rw-r--r--compiler/optimizing/load_store_elimination.cc37
-rw-r--r--compiler/optimizing/locations.cc8
-rw-r--r--compiler/optimizing/locations.h79
-rw-r--r--compiler/optimizing/loop_optimization.cc377
-rw-r--r--compiler/optimizing/loop_optimization.h107
-rw-r--r--compiler/optimizing/loop_optimization_test.cc195
-rw-r--r--compiler/optimizing/nodes.cc54
-rw-r--r--compiler/optimizing/nodes.h369
-rw-r--r--compiler/optimizing/nodes_mips.h35
-rw-r--r--compiler/optimizing/nodes_shared.h12
-rw-r--r--compiler/optimizing/optimization.h5
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc23
-rw-r--r--compiler/optimizing/optimizing_cfi_test_expected.inc322
-rw-r--r--compiler/optimizing/optimizing_compiler.cc299
-rw-r--r--compiler/optimizing/optimizing_unit_test.h4
-rw-r--r--compiler/optimizing/pc_relative_fixups_mips.cc39
-rw-r--r--compiler/optimizing/pc_relative_fixups_mips.h2
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc21
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.h4
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc38
-rw-r--r--compiler/optimizing/reference_type_propagation.cc36
-rw-r--r--compiler/optimizing/reference_type_propagation.h24
-rw-r--r--compiler/optimizing/reference_type_propagation_test.cc14
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc141
-rw-r--r--compiler/optimizing/register_allocation_resolver.h14
-rw-r--r--compiler/optimizing/register_allocator.cc16
-rw-r--r--compiler/optimizing/register_allocator.h3
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc2042
-rw-r--r--compiler/optimizing/register_allocator_graph_color.h193
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc52
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.h7
-rw-r--r--compiler/optimizing/register_allocator_test.cc126
-rw-r--r--compiler/optimizing/select_generator.cc3
-rw-r--r--compiler/optimizing/sharpening.cc206
-rw-r--r--compiler/optimizing/side_effects_analysis.cc7
-rw-r--r--compiler/optimizing/ssa_builder.cc30
-rw-r--r--compiler/optimizing/ssa_builder.h4
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc120
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h127
-rw-r--r--compiler/optimizing/ssa_phi_elimination.cc9
-rw-r--r--compiler/optimizing/x86_memory_gen.cc5
-rw-r--r--compiler/optimizing/x86_memory_gen.h4
109 files changed, 18081 insertions, 6341 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 8aefd9ea1f..d2357a5d05 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -887,7 +887,7 @@ class BCEVisitor : public HGraphVisitor {
bool needs_finite_test = false;
bool needs_taken_test = false;
if (DynamicBCESeemsProfitable(loop, bounds_check->GetBlock()) &&
- induction_range_.CanGenerateCode(
+ induction_range_.CanGenerateRange(
bounds_check, index, &needs_finite_test, &needs_taken_test) &&
CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
// Do this test last, since it may generate code.
@@ -1361,6 +1361,11 @@ class BCEVisitor : public HGraphVisitor {
ValueBound other_value = ValueBound::AsValueBound(other_index);
int32_t other_c = other_value.GetConstant();
if (array_length == other_array_length && base == other_value.GetInstruction()) {
+ // Ensure every candidate could be picked for code generation.
+ bool b1 = false, b2 = false;
+ if (!induction_range_.CanGenerateRange(other_bounds_check, other_index, &b1, &b2)) {
+ continue;
+ }
// Does the current basic block dominate all back edges? If not,
// add this candidate later only if it falls into the range.
if (!loop->DominatesAllBackEdges(user->GetBlock())) {
@@ -1403,10 +1408,10 @@ class BCEVisitor : public HGraphVisitor {
// whether code generation on the original and, thus, related bounds check was possible.
// It handles either loop invariants (lower is not set) or unit strides.
if (other_c == max_c) {
- induction_range_.GenerateRangeCode(
+ induction_range_.GenerateRange(
other_bounds_check, other_index, GetGraph(), block, &max_lower, &max_upper);
} else if (other_c == min_c && base != nullptr) {
- induction_range_.GenerateRangeCode(
+ induction_range_.GenerateRange(
other_bounds_check, other_index, GetGraph(), block, &min_lower, &min_upper);
}
ReplaceInstruction(other_bounds_check, other_index);
@@ -1699,11 +1704,8 @@ class BCEVisitor : public HGraphVisitor {
// Insert the taken-test to see if the loop body is entered. If the
// loop isn't entered at all, it jumps around the deoptimization block.
if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); // placeholder
- HInstruction* condition = nullptr;
- induction_range_.GenerateTakenTest(header->GetLastInstruction(),
- GetGraph(),
- if_block,
- &condition);
+ HInstruction* condition = induction_range_.GenerateTakenTest(
+ header->GetLastInstruction(), GetGraph(), if_block);
DCHECK(condition != nullptr);
if_block->RemoveInstruction(if_block->GetLastInstruction());
if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
@@ -1843,8 +1845,8 @@ void BoundsCheckElimination::Run() {
// that value dominated by that instruction fits in that range. Range of that
// value can be narrowed further down in the dominator tree.
BCEVisitor visitor(graph_, side_effects_, induction_analysis_);
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
+ for (size_t i = 0, size = graph_->GetReversePostOrder().size(); i != size; ++i) {
+ HBasicBlock* current = graph_->GetReversePostOrder()[i];
if (visitor.IsAddedBlock(current)) {
// Skip added blocks. Their effects are already taken care of.
continue;
@@ -1853,8 +1855,11 @@ void BoundsCheckElimination::Run() {
// Skip forward to the current block in case new basic blocks were inserted
// (which always appear earlier in reverse post order) to avoid visiting the
// same basic block twice.
- for ( ; !it.Done() && it.Current() != current; it.Advance()) {
- }
+ size_t new_size = graph_->GetReversePostOrder().size();
+ DCHECK_GE(new_size, size);
+ i += new_size - size;
+ DCHECK_EQ(current, graph_->GetReversePostOrder()[i]);
+ size = new_size;
}
// Perform cleanup.
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 86742e6526..2927e1f7c0 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -51,7 +51,7 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_branches) {
if (compiler_options.IsHugeMethod(code_item_.insns_size_in_code_units_)) {
VLOG(compiler) << "Skip compilation of huge method "
- << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+ << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
<< ": " << code_item_.insns_size_in_code_units_ << " code units";
MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod);
return true;
@@ -61,7 +61,7 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_branches) {
if (compiler_options.IsLargeMethod(code_item_.insns_size_in_code_units_)
&& (number_of_branches == 0)) {
VLOG(compiler) << "Skip compilation of large method with no branch "
- << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+ << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
<< ": " << code_item_.insns_size_in_code_units_ << " code units";
MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
return true;
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 580ef72767..f896f1199e 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -43,7 +43,7 @@ class HGraphBuilder : public ValueObject {
OptimizingCompilerStats* compiler_stats,
const uint8_t* interpreter_metadata,
Handle<mirror::DexCache> dex_cache,
- StackHandleScopeCollection* handles)
+ VariableSizedHandleScope* handles)
: graph_(graph),
dex_file_(dex_file),
code_item_(code_item),
@@ -68,7 +68,7 @@ class HGraphBuilder : public ValueObject {
// Only for unit testing.
HGraphBuilder(HGraph* graph,
const DexFile::CodeItem& code_item,
- StackHandleScopeCollection* handles,
+ VariableSizedHandleScope* handles,
Primitive::Type return_type = Primitive::kPrimInt)
: graph_(graph),
dex_file_(nullptr),
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 4a4b98cc48..8b450e11dc 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -18,6 +18,7 @@
#ifdef ART_ENABLE_CODEGEN_arm
#include "code_generator_arm.h"
+#include "code_generator_arm_vixl.h"
#endif
#ifdef ART_ENABLE_CODEGEN_arm64
@@ -283,8 +284,7 @@ void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches A
}
void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
- size_t maximum_number_of_live_core_registers,
- size_t maximum_number_of_live_fpu_registers,
+ size_t maximum_safepoint_spill_size,
size_t number_of_out_slots,
const ArenaVector<HBasicBlock*>& block_order) {
block_order_ = &block_order;
@@ -298,14 +298,12 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
&& !HasAllocatedCalleeSaveRegisters()
&& IsLeafMethod()
&& !RequiresCurrentMethod()) {
- DCHECK_EQ(maximum_number_of_live_core_registers, 0u);
- DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u);
+ DCHECK_EQ(maximum_safepoint_spill_size, 0u);
SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
} else {
SetFrameSize(RoundUp(
first_register_slot_in_slow_path_
- + maximum_number_of_live_core_registers * GetWordSize()
- + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize()
+ + maximum_safepoint_spill_size
+ FrameEntrySpillSize(),
kStackAlignment));
}
@@ -348,7 +346,7 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok
// Initialize to anything to silent compiler warnings.
QuickEntrypointEnum entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck;
- switch (invoke->GetOriginalInvokeType()) {
+ switch (invoke->GetInvokeType()) {
case kStatic:
entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck;
break;
@@ -578,11 +576,19 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
#ifdef ART_ENABLE_CODEGEN_arm
case kArm:
case kThumb2: {
- return std::unique_ptr<CodeGenerator>(
- new (arena) arm::CodeGeneratorARM(graph,
- *isa_features.AsArmInstructionSetFeatures(),
- compiler_options,
- stats));
+ if (kArmUseVIXL32) {
+ return std::unique_ptr<CodeGenerator>(
+ new (arena) arm::CodeGeneratorARMVIXL(graph,
+ *isa_features.AsArmInstructionSetFeatures(),
+ compiler_options,
+ stats));
+ } else {
+ return std::unique_ptr<CodeGenerator>(
+ new (arena) arm::CodeGeneratorARM(graph,
+ *isa_features.AsArmInstructionSetFeatures(),
+ compiler_options,
+ stats));
+ }
}
#endif
#ifdef ART_ENABLE_CODEGEN_arm64
@@ -665,9 +671,9 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
return;
}
ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc));
- for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) {
- if (it.Current()->IsLoopHeader()) {
- HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck();
+ for (HBasicBlock* block : graph.GetReversePostOrder()) {
+ if (block->IsLoopHeader()) {
+ HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
loop_headers.push_back(suspend_check);
}
@@ -753,7 +759,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
}
// Collect PC infos for the mapping table.
- uint32_t native_pc = GetAssembler()->CodeSize();
+ uint32_t native_pc = GetAssembler()->CodePosition();
if (instruction == nullptr) {
// For stack overflow checks and native-debug-info entries without dex register
@@ -765,16 +771,19 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
LocationSummary* locations = instruction->GetLocations();
uint32_t register_mask = locations->GetRegisterMask();
+ DCHECK_EQ(register_mask & ~locations->GetLiveRegisters()->GetCoreRegisters(), 0u);
if (locations->OnlyCallsOnSlowPath()) {
// In case of slow path, we currently set the location of caller-save registers
// to register (instead of their stack location when pushed before the slow-path
// call). Therefore register_mask contains both callee-save and caller-save
- // registers that hold objects. We must remove the caller-save from the mask, since
- // they will be overwritten by the callee.
- register_mask &= core_callee_save_mask_;
+ // registers that hold objects. We must remove the spilled caller-save from the
+ // mask, since they will be overwritten by the callee.
+ uint32_t spills = GetSlowPathSpills(locations, /* core_registers */ true);
+ register_mask &= ~spills;
+ } else {
+ // The register mask must be a subset of callee-save registers.
+ DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
}
- // The register mask must be a subset of callee-save registers.
- DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
stack_map_stream_.BeginStackMapEntry(outer_dex_pc,
native_pc,
register_mask,
@@ -1081,13 +1090,6 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo
}
}
-bool CodeGenerator::IsImplicitNullCheckAllowed(HNullCheck* null_check) const {
- return compiler_options_.GetImplicitNullChecks() &&
- // Null checks which might throw into a catch block need to save live
- // registers and therefore cannot be done implicitly.
- !null_check->CanThrowIntoCatchBlock();
-}
-
bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) {
HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves();
@@ -1096,6 +1098,10 @@ bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) {
}
void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) {
+ if (!compiler_options_.GetImplicitNullChecks()) {
+ return;
+ }
+
// If we are from a static path don't record the pc as we can't throw NPE.
// NB: having the checks here makes the code much less verbose in the arch
// specific code generators.
@@ -1114,16 +1120,35 @@ void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) {
// and needs to record the pc.
if (first_prev_not_move != nullptr && first_prev_not_move->IsNullCheck()) {
HNullCheck* null_check = first_prev_not_move->AsNullCheck();
- if (IsImplicitNullCheckAllowed(null_check)) {
- // TODO: The parallel moves modify the environment. Their changes need to be
- // reverted otherwise the stack maps at the throw point will not be correct.
- RecordPcInfo(null_check, null_check->GetDexPc());
- }
+ // TODO: The parallel moves modify the environment. Their changes need to be
+ // reverted otherwise the stack maps at the throw point will not be correct.
+ RecordPcInfo(null_check, null_check->GetDexPc());
+ }
+}
+
+LocationSummary* CodeGenerator::CreateThrowingSlowPathLocations(HInstruction* instruction,
+ RegisterSet caller_saves) {
+ // Note: Using kNoCall allows the method to be treated as leaf (and eliminate the
+ // HSuspendCheck from entry block). However, it will still get a valid stack frame
+ // because the HNullCheck needs an environment.
+ LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+ // When throwing from a try block, we may need to retrieve dalvik registers from
+ // physical registers and we also need to set up stack mask for GC. This is
+ // implicitly achieved by passing kCallOnSlowPath to the LocationSummary.
+ bool can_throw_into_catch_block = instruction->CanThrowIntoCatchBlock();
+ if (can_throw_into_catch_block) {
+ call_kind = LocationSummary::kCallOnSlowPath;
+ }
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (can_throw_into_catch_block && compiler_options_.GetImplicitNullChecks()) {
+ locations->SetCustomSlowPathCallerSaves(caller_saves); // Default: no caller-save registers.
}
+ DCHECK(!instruction->HasUses());
+ return locations;
}
void CodeGenerator::GenerateNullCheck(HNullCheck* instruction) {
- if (IsImplicitNullCheckAllowed(instruction)) {
+ if (compiler_options_.GetImplicitNullChecks()) {
MaybeRecordStat(kImplicitNullCheckGenerated);
GenerateImplicitNullCheck(instruction);
} else {
@@ -1163,39 +1188,53 @@ void CodeGenerator::EmitParallelMoves(Location from1,
GetMoveResolver()->EmitNativeCode(&parallel_move);
}
-void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path) {
+void CodeGenerator::ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
// Ensure that the call kind indication given to the register allocator is
- // coherent with the runtime call generated, and that the GC side effect is
- // set when required.
+ // coherent with the runtime call generated.
if (slow_path == nullptr) {
DCHECK(instruction->GetLocations()->WillCall())
<< "instruction->DebugName()=" << instruction->DebugName();
- DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()))
- << "instruction->DebugName()=" << instruction->DebugName()
- << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString();
} else {
- DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal())
- << "instruction->DebugName()=" << instruction->DebugName()
- << " slow_path->GetDescription()=" << slow_path->GetDescription();
- DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
- // When (non-Baker) read barriers are enabled, some instructions
- // use a slow path to emit a read barrier, which does not trigger
- // GC.
- (kEmitCompilerReadBarrier &&
- !kUseBakerReadBarrier &&
- (instruction->IsInstanceFieldGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsArrayGet() ||
- instruction->IsLoadClass() ||
- instruction->IsLoadString() ||
- instruction->IsInstanceOf() ||
- instruction->IsCheckCast() ||
- (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))))
+ DCHECK(instruction->GetLocations()->CallsOnSlowPath() || slow_path->IsFatal())
<< "instruction->DebugName()=" << instruction->DebugName()
- << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString()
<< " slow_path->GetDescription()=" << slow_path->GetDescription();
}
+ // Check that the GC side effect is set when required.
+ // TODO: Reverse EntrypointCanTriggerGC
+ if (EntrypointCanTriggerGC(entrypoint)) {
+ if (slow_path == nullptr) {
+ DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()))
+ << "instruction->DebugName()=" << instruction->DebugName()
+ << " instruction->GetSideEffects().ToString()="
+ << instruction->GetSideEffects().ToString();
+ } else {
+ DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
+ // When (non-Baker) read barriers are enabled, some instructions
+ // use a slow path to emit a read barrier, which does not trigger
+ // GC.
+ (kEmitCompilerReadBarrier &&
+ !kUseBakerReadBarrier &&
+ (instruction->IsInstanceFieldGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsArrayGet() ||
+ instruction->IsLoadClass() ||
+ instruction->IsLoadString() ||
+ instruction->IsInstanceOf() ||
+ instruction->IsCheckCast() ||
+ (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))))
+ << "instruction->DebugName()=" << instruction->DebugName()
+ << " instruction->GetSideEffects().ToString()="
+ << instruction->GetSideEffects().ToString()
+ << " slow_path->GetDescription()=" << slow_path->GetDescription();
+ }
+ } else {
+ // The GC side effect is not required for the instruction. But the instruction might still have
+ // it, for example if it calls other entrypoints requiring it.
+ }
+
// Check the coherency of leaf information.
DCHECK(instruction->IsSuspendCheck()
|| ((slow_path != nullptr) && slow_path->IsFatal())
@@ -1216,68 +1255,56 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in
DCHECK(instruction->IsInstanceFieldGet() ||
instruction->IsStaticFieldGet() ||
instruction->IsArrayGet() ||
+ instruction->IsArraySet() ||
instruction->IsLoadClass() ||
instruction->IsLoadString() ||
instruction->IsInstanceOf() ||
instruction->IsCheckCast() ||
- (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))
+ (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()) ||
+ (instruction->IsInvokeStaticOrDirect() && instruction->GetLocations()->Intrinsified()))
<< "instruction->DebugName()=" << instruction->DebugName()
<< " slow_path->GetDescription()=" << slow_path->GetDescription();
}
void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
- RegisterSet* live_registers = locations->GetLiveRegisters();
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
- for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
- if (!codegen->IsCoreCalleeSaveRegister(i)) {
- if (live_registers->ContainsCoreRegister(i)) {
- // If the register holds an object, update the stack mask.
- if (locations->RegisterContainsObject(i)) {
- locations->SetStackBit(stack_offset / kVRegSize);
- }
- DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
- DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
- saved_core_stack_offsets_[i] = stack_offset;
- stack_offset += codegen->SaveCoreRegister(stack_offset, i);
- }
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ for (uint32_t i : LowToHighBits(core_spills)) {
+ // If the register holds an object, update the stack mask.
+ if (locations->RegisterContainsObject(i)) {
+ locations->SetStackBit(stack_offset / kVRegSize);
}
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_core_stack_offsets_[i] = stack_offset;
+ stack_offset += codegen->SaveCoreRegister(stack_offset, i);
}
- for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
- if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
- if (live_registers->ContainsFloatingPointRegister(i)) {
- DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
- DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
- saved_fpu_stack_offsets_[i] = stack_offset;
- stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i);
- }
- }
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ for (uint32_t i : LowToHighBits(fp_spills)) {
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_fpu_stack_offsets_[i] = stack_offset;
+ stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i);
}
}
void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
- RegisterSet* live_registers = locations->GetLiveRegisters();
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
- for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
- if (!codegen->IsCoreCalleeSaveRegister(i)) {
- if (live_registers->ContainsCoreRegister(i)) {
- DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
- DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
- stack_offset += codegen->RestoreCoreRegister(stack_offset, i);
- }
- }
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ for (uint32_t i : LowToHighBits(core_spills)) {
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ stack_offset += codegen->RestoreCoreRegister(stack_offset, i);
}
- for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
- if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
- if (live_registers->ContainsFloatingPointRegister(i)) {
- DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
- DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
- stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i);
- }
- }
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ for (uint32_t i : LowToHighBits(fp_spills)) {
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i);
}
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ad02ecf609..a81f24e3d8 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -22,9 +22,8 @@
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/bit_field.h"
+#include "base/bit_utils.h"
#include "base/enums.h"
-#include "compiled_method.h"
-#include "driver/compiler_options.h"
#include "globals.h"
#include "graph_visualizer.h"
#include "locations.h"
@@ -54,6 +53,7 @@ static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
class Assembler;
class CodeGenerator;
class CompilerDriver;
+class CompilerOptions;
class LinkerPatch;
class ParallelMoveResolver;
@@ -212,8 +212,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
virtual size_t GetFloatingPointSpillSlotSize() const = 0;
virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
void InitializeCodeGeneration(size_t number_of_spill_slots,
- size_t maximum_number_of_live_core_registers,
- size_t maximum_number_of_live_fpu_registers,
+ size_t maximum_safepoint_spill_size,
size_t number_of_out_slots,
const ArenaVector<HBasicBlock*>& block_order);
// Backends can override this as necessary. For most, no special alignment is required.
@@ -279,6 +278,30 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
return (fpu_callee_save_mask_ & (1 << reg)) != 0;
}
+ uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
+ DCHECK(locations->OnlyCallsOnSlowPath() ||
+ (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
+ !locations->HasCustomSlowPathCallingConvention()));
+ uint32_t live_registers = core_registers
+ ? locations->GetLiveRegisters()->GetCoreRegisters()
+ : locations->GetLiveRegisters()->GetFloatingPointRegisters();
+ if (locations->HasCustomSlowPathCallingConvention()) {
+ // Save only the live registers that the custom calling convention wants us to save.
+ uint32_t caller_saves = core_registers
+ ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
+ : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
+ return live_registers & caller_saves;
+ } else {
+ // Default ABI, we need to spill non-callee-save live registers.
+ uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
+ return live_registers & ~callee_saves;
+ }
+ }
+
+ size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
+ return POPCOUNT(GetSlowPathSpills(locations, core_registers));
+ }
+
// Record native to dex mapping for a suspend point. Required by runtime.
void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
// Check whether we have already recorded mapping at this PC.
@@ -290,6 +313,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
bool CanMoveNullCheckToUser(HNullCheck* null_check);
void MaybeRecordImplicitNullCheck(HInstruction* instruction);
+ LocationSummary* CreateThrowingSlowPathLocations(
+ HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty());
void GenerateNullCheck(HNullCheck* null_check);
virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
@@ -299,12 +324,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// TODO: Replace with a catch-entering instruction that records the environment.
void RecordCatchBlockInfo();
- // Returns true if implicit null checks are allowed in the compiler options
- // and if the null check is not inside a try block. We currently cannot do
- // implicit null checks in that case because we need the NullCheckSlowPath to
- // save live registers, which may be needed by the runtime to set catch phis.
- bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
-
// TODO: Avoid creating the `std::unique_ptr` here.
void AddSlowPath(SlowPathCode* slow_path) {
slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
@@ -340,6 +359,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
+ bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
+ bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
+
// Helper that returns the pointer offset of an index in an object array.
// Note: this method assumes we always have the same pointer size, regardless
// of the architecture.
@@ -383,7 +405,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// Perfoms checks pertaining to an InvokeRuntime call.
- void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
+ void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
// Perfoms checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
@@ -491,7 +515,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// otherwise return a fall-back info that should be used instead.
virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method) = 0;
+ HInvokeStaticOrDirect* invoke) = 0;
// Generate a call to a static or direct method.
virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
@@ -507,40 +531,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
uint32_t GetReferenceDisableFlagOffset() const;
protected:
- // Method patch info used for recording locations of required linker patches and
- // target methods. The target method can be used for various purposes, whether for
- // patching the address of the method or the code pointer or a PC-relative call.
+ // Patch info used for recording locations of required linker patches and their targets,
+ // i.e. target method, string, type or code identified by their dex file and index.
template <typename LabelType>
- struct MethodPatchInfo {
- explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { }
-
- MethodReference target_method;
- LabelType label;
- };
-
- // String patch info used for recording locations of required linker patches and
- // target strings. The actual string address can be absolute or PC-relative.
- template <typename LabelType>
- struct StringPatchInfo {
- StringPatchInfo(const DexFile& df, uint32_t index)
- : dex_file(df), string_index(index), label() { }
-
- const DexFile& dex_file;
- uint32_t string_index;
- LabelType label;
- };
-
- // Type patch info used for recording locations of required linker patches and
- // target types. The actual type address can be absolute or PC-relative.
- // TODO: Consider merging with MethodPatchInfo and StringPatchInfo - all these
- // classes contain the dex file, some index and the label.
- template <typename LabelType>
- struct TypePatchInfo {
- TypePatchInfo(const DexFile& df, uint32_t index)
- : dex_file(df), type_index(index), label() { }
+ struct PatchInfo {
+ PatchInfo(const DexFile& target_dex_file, uint32_t target_index)
+ : dex_file(target_dex_file), index(target_index) { }
const DexFile& dex_file;
- uint32_t type_index;
+ uint32_t index;
LabelType label;
};
@@ -556,12 +555,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
core_spill_mask_(0),
fpu_spill_mask_(0),
first_register_slot_in_slow_path_(0),
+ allocated_registers_(RegisterSet::Empty()),
blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
kArenaAllocCodeGenerator)),
blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
kArenaAllocCodeGenerator)),
- blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs,
- kArenaAllocCodeGenerator)),
number_of_core_registers_(number_of_core_registers),
number_of_fpu_registers_(number_of_fpu_registers),
number_of_register_pairs_(number_of_register_pairs),
@@ -598,7 +596,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
return POPCOUNT(core_spill_mask_) * GetWordSize();
}
- bool HasAllocatedCalleeSaveRegisters() const {
+ virtual bool HasAllocatedCalleeSaveRegisters() const {
// We check the core registers against 1 because it always comprises the return PC.
return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
|| (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
@@ -649,7 +647,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// arrays.
bool* const blocked_core_registers_;
bool* const blocked_fpu_registers_;
- bool* const blocked_register_pairs_;
size_t number_of_core_registers_;
size_t number_of_fpu_registers_;
size_t number_of_register_pairs_;
@@ -687,6 +684,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
bool is_leaf_;
// Whether an instruction in the graph accesses the current method.
+ // TODO: Rename: this actually indicates that some instruction in the method
+ // needs the environment including a valid stack frame.
bool requires_current_method_;
friend class OptimizingCFITest;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index cd7a90e280..be65f89ef1 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,13 +59,192 @@ static constexpr DRegister DTMP = D31;
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
-class NullCheckSlowPathARM : public SlowPathCode {
+static constexpr int kRegListThreshold = 4;
+
+// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
+// for each live D registers they treat two corresponding S registers as live ones.
+//
+// Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
+// from a list of contiguous S registers a list of contiguous D registers (processing first/last
+// S registers corner cases) and save/restore this new list treating them as D registers.
+// - decreasing code size
+// - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
+// restored and then used in regular non SlowPath code as D register.
+//
+// For the following example (v means the S register is live):
+// D names: | D0 | D1 | D2 | D4 | ...
+// S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
+// Live? | | v | v | v | v | v | v | | ...
+//
+// S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
+// as D registers.
+static size_t SaveContiguousSRegisterList(size_t first,
+ size_t last,
+ CodeGenerator* codegen,
+ size_t stack_offset) {
+ DCHECK_LE(first, last);
+ if ((first == last) && (first == 0)) {
+ stack_offset += codegen->SaveFloatingPointRegister(stack_offset, first);
+ return stack_offset;
+ }
+ if (first % 2 == 1) {
+ stack_offset += codegen->SaveFloatingPointRegister(stack_offset, first++);
+ }
+
+ bool save_last = false;
+ if (last % 2 == 0) {
+ save_last = true;
+ --last;
+ }
+
+ if (first < last) {
+ DRegister d_reg = static_cast<DRegister>(first / 2);
+ DCHECK_EQ((last - first + 1) % 2, 0u);
+ size_t number_of_d_regs = (last - first + 1) / 2;
+
+ if (number_of_d_regs == 1) {
+ __ StoreDToOffset(d_reg, SP, stack_offset);
+ } else if (number_of_d_regs > 1) {
+ __ add(IP, SP, ShifterOperand(stack_offset));
+ __ vstmiad(IP, d_reg, number_of_d_regs);
+ }
+ stack_offset += number_of_d_regs * kArmWordSize * 2;
+ }
+
+ if (save_last) {
+ stack_offset += codegen->SaveFloatingPointRegister(stack_offset, last + 1);
+ }
+
+ return stack_offset;
+}
+
+static size_t RestoreContiguousSRegisterList(size_t first,
+ size_t last,
+ CodeGenerator* codegen,
+ size_t stack_offset) {
+ DCHECK_LE(first, last);
+ if ((first == last) && (first == 0)) {
+ stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, first);
+ return stack_offset;
+ }
+ if (first % 2 == 1) {
+ stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, first++);
+ }
+
+ bool restore_last = false;
+ if (last % 2 == 0) {
+ restore_last = true;
+ --last;
+ }
+
+ if (first < last) {
+ DRegister d_reg = static_cast<DRegister>(first / 2);
+ DCHECK_EQ((last - first + 1) % 2, 0u);
+ size_t number_of_d_regs = (last - first + 1) / 2;
+ if (number_of_d_regs == 1) {
+ __ LoadDFromOffset(d_reg, SP, stack_offset);
+ } else if (number_of_d_regs > 1) {
+ __ add(IP, SP, ShifterOperand(stack_offset));
+ __ vldmiad(IP, d_reg, number_of_d_regs);
+ }
+ stack_offset += number_of_d_regs * kArmWordSize * 2;
+ }
+
+ if (restore_last) {
+ stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, last + 1);
+ }
+
+ return stack_offset;
+}
+
+void SlowPathCodeARM::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
+ size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+ size_t orig_offset = stack_offset;
+
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ for (uint32_t i : LowToHighBits(core_spills)) {
+ // If the register holds an object, update the stack mask.
+ if (locations->RegisterContainsObject(i)) {
+ locations->SetStackBit(stack_offset / kVRegSize);
+ }
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_core_stack_offsets_[i] = stack_offset;
+ stack_offset += kArmWordSize;
+ }
+
+ int reg_num = POPCOUNT(core_spills);
+ if (reg_num != 0) {
+ if (reg_num > kRegListThreshold) {
+ __ StoreList(RegList(core_spills), orig_offset);
+ } else {
+ stack_offset = orig_offset;
+ for (uint32_t i : LowToHighBits(core_spills)) {
+ stack_offset += codegen->SaveCoreRegister(stack_offset, i);
+ }
+ }
+ }
+
+ uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ orig_offset = stack_offset;
+ for (uint32_t i : LowToHighBits(fp_spills)) {
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_fpu_stack_offsets_[i] = stack_offset;
+ stack_offset += kArmWordSize;
+ }
+
+ stack_offset = orig_offset;
+ while (fp_spills != 0u) {
+ uint32_t begin = CTZ(fp_spills);
+ uint32_t tmp = fp_spills + (1u << begin);
+ fp_spills &= tmp; // Clear the contiguous range of 1s.
+ uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
+ stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
+ }
+ DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+}
+
+void SlowPathCodeARM::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
+ size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+ size_t orig_offset = stack_offset;
+
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ for (uint32_t i : LowToHighBits(core_spills)) {
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ stack_offset += kArmWordSize;
+ }
+
+ int reg_num = POPCOUNT(core_spills);
+ if (reg_num != 0) {
+ if (reg_num > kRegListThreshold) {
+ __ LoadList(RegList(core_spills), orig_offset);
+ } else {
+ stack_offset = orig_offset;
+ for (uint32_t i : LowToHighBits(core_spills)) {
+ stack_offset += codegen->RestoreCoreRegister(stack_offset, i);
+ }
+ }
+ }
+
+ uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ while (fp_spills != 0u) {
+ uint32_t begin = CTZ(fp_spills);
+ uint32_t tmp = fp_spills + (1u << begin);
+ fp_spills &= tmp; // Clear the contiguous range of 1s.
+ uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
+ stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
+ }
+ DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+}
+
+class NullCheckSlowPathARM : public SlowPathCodeARM {
public:
- explicit NullCheckSlowPathARM(HNullCheck* instruction) : SlowPathCode(instruction) {}
+ explicit NullCheckSlowPathARM(HNullCheck* instruction) : SlowPathCodeARM(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -74,8 +253,10 @@ class NullCheckSlowPathARM : public SlowPathCode {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
- arm_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
+ arm_codegen->InvokeRuntime(kQuickThrowNullPointer,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
@@ -87,19 +268,14 @@ class NullCheckSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM);
};
-class DivZeroCheckSlowPathARM : public SlowPathCode {
+class DivZeroCheckSlowPathARM : public SlowPathCodeARM {
public:
- explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
+ explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : SlowPathCodeARM(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
- if (instruction_->CanThrowIntoCatchBlock()) {
- // Live registers will be restored in the catch block if caught.
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- }
- arm_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
+ arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
@@ -111,19 +287,16 @@ class DivZeroCheckSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM);
};
-class SuspendCheckSlowPathARM : public SlowPathCode {
+class SuspendCheckSlowPathARM : public SlowPathCodeARM {
public:
SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor)
- : SlowPathCode(instruction), successor_(successor) {}
+ : SlowPathCodeARM(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- arm_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
+ arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ b(GetReturnLabel());
} else {
@@ -152,10 +325,10 @@ class SuspendCheckSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM);
};
-class BoundsCheckSlowPathARM : public SlowPathCode {
+class BoundsCheckSlowPathARM : public SlowPathCodeARM {
public:
explicit BoundsCheckSlowPathARM(HBoundsCheck* instruction)
- : SlowPathCode(instruction) {}
+ : SlowPathCodeARM(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -176,10 +349,10 @@ class BoundsCheckSlowPathARM : public SlowPathCode {
locations->InAt(1),
Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
Primitive::kPrimInt);
- uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
- ? QUICK_ENTRY_POINT(pThrowStringBounds)
- : QUICK_ENTRY_POINT(pThrowArrayBounds);
- arm_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this);
+ QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
+ ? kQuickThrowStringBounds
+ : kQuickThrowArrayBounds;
+ arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
@@ -192,13 +365,13 @@ class BoundsCheckSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM);
};
-class LoadClassSlowPathARM : public SlowPathCode {
+class LoadClassSlowPathARM : public SlowPathCodeARM {
public:
LoadClassSlowPathARM(HLoadClass* cls,
HInstruction* at,
uint32_t dex_pc,
bool do_clinit)
- : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+ : SlowPathCodeARM(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
DCHECK(at->IsLoadClass() || at->IsClinitCheck());
}
@@ -211,10 +384,9 @@ class LoadClassSlowPathARM : public SlowPathCode {
InvokeRuntimeCallingConvention calling_convention;
__ LoadImmediate(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
- int32_t entry_point_offset = do_clinit_
- ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
- : QUICK_ENTRY_POINT(pInitializeType);
- arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
+ QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
+ : kQuickInitializeType;
+ arm_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
if (do_clinit_) {
CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
} else {
@@ -250,27 +422,57 @@ class LoadClassSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM);
};
-class LoadStringSlowPathARM : public SlowPathCode {
+class LoadStringSlowPathARM : public SlowPathCodeARM {
public:
- explicit LoadStringSlowPathARM(HLoadString* instruction) : SlowPathCode(instruction) {}
+ explicit LoadStringSlowPathARM(HLoadString* instruction) : SlowPathCodeARM(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+ HLoadString* load = instruction_->AsLoadString();
+ const uint32_t string_index = load->GetStringIndex();
+ Register out = locations->Out().AsRegister<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
- const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+ // In the unlucky case that the `temp` is R0, we preserve the address in `out` across
+ // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call).
+ bool temp_is_r0 = (temp == calling_convention.GetRegisterAt(0));
+ Register entry_address = temp_is_r0 ? out : temp;
+ DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+ if (call_saves_everything_except_r0 && temp_is_r0) {
+ __ mov(entry_address, ShifterOperand(temp));
+ }
+
__ LoadImmediate(calling_convention.GetRegisterAt(0), string_index);
- arm_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
+ arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
- arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
+ // Store the resolved String to the .bss entry.
+ if (call_saves_everything_except_r0) {
+ // The string entry address was preserved in `entry_address` thanks to kSaveEverything.
+ __ str(R0, Address(entry_address));
+ } else {
+ // For non-Baker read barrier, we need to re-calculate the address of the string entry.
+ CodeGeneratorARM::PcRelativePatchInfo* labels =
+ arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+ __ BindTrackedLabel(&labels->movw_label);
+ __ movw(entry_address, /* placeholder */ 0u);
+ __ BindTrackedLabel(&labels->movt_label);
+ __ movt(entry_address, /* placeholder */ 0u);
+ __ BindTrackedLabel(&labels->add_pc_label);
+ __ add(entry_address, entry_address, ShifterOperand(PC));
+ __ str(R0, Address(entry_address));
+ }
+
+ arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
RestoreLiveRegisters(codegen, locations);
+
__ b(GetExitLabel());
}
@@ -280,10 +482,10 @@ class LoadStringSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM);
};
-class TypeCheckSlowPathARM : public SlowPathCode {
+class TypeCheckSlowPathARM : public SlowPathCodeARM {
public:
TypeCheckSlowPathARM(HInstruction* instruction, bool is_fatal)
- : SlowPathCode(instruction), is_fatal_(is_fatal) {}
+ : SlowPathCodeARM(instruction), is_fatal_(is_fatal) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
@@ -311,7 +513,7 @@ class TypeCheckSlowPathARM : public SlowPathCode {
Primitive::kPrimNot);
if (instruction_->IsInstanceOf()) {
- arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
+ arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
instruction_,
instruction_->GetDexPc(),
this);
@@ -320,10 +522,7 @@ class TypeCheckSlowPathARM : public SlowPathCode {
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
} else {
DCHECK(instruction_->IsCheckCast());
- arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ arm_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
}
@@ -343,19 +542,15 @@ class TypeCheckSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM);
};
-class DeoptimizationSlowPathARM : public SlowPathCode {
+class DeoptimizationSlowPathARM : public SlowPathCodeARM {
public:
explicit DeoptimizationSlowPathARM(HDeoptimize* instruction)
- : SlowPathCode(instruction) {}
+ : SlowPathCodeARM(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -365,9 +560,9 @@ class DeoptimizationSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
};
-class ArraySetSlowPathARM : public SlowPathCode {
+class ArraySetSlowPathARM : public SlowPathCodeARM {
public:
- explicit ArraySetSlowPathARM(HInstruction* instruction) : SlowPathCode(instruction) {}
+ explicit ArraySetSlowPathARM(HInstruction* instruction) : SlowPathCodeARM(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
@@ -394,10 +589,7 @@ class ArraySetSlowPathARM : public SlowPathCode {
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
RestoreLiveRegisters(codegen, locations);
__ b(GetExitLabel());
@@ -409,11 +601,21 @@ class ArraySetSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
};
-// Slow path marking an object during a read barrier.
-class ReadBarrierMarkSlowPathARM : public SlowPathCode {
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
public:
- ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj)
- : SlowPathCode(instruction), obj_(obj) {
+ ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location ref)
+ : SlowPathCodeARM(instruction), ref_(ref) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -421,17 +623,19 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
- Register reg = obj_.AsRegister<Register>();
+ Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
instruction_->IsLoadClass() ||
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -440,39 +644,215 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode {
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- DCHECK_NE(reg, SP);
- DCHECK_NE(reg, LR);
- DCHECK_NE(reg, PC);
- DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg;
+ DCHECK_NE(ref_reg, SP);
+ DCHECK_NE(ref_reg, LR);
+ DCHECK_NE(ref_reg, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_reg, IP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
// "Compact" slow path, saving two moves.
//
// Instead of using the standard runtime calling convention (input
// and output in R0):
//
- // R0 <- obj
+ // R0 <- ref
// R0 <- ReadBarrierMark(R0)
- // obj <- R0
+ // ref <- R0
//
- // we just use rX (the register holding `obj`) as input and output
+ // we just use rX (the register containing `ref`) as input and output
// of a dedicated entrypoint:
//
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(reg);
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
// This runtime call does not require a stack map.
arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ b(GetExitLabel());
}
private:
- const Location obj_;
+ // The location (register) of the marked object reference.
+ const Location ref_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
};
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathARM above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Register temp1,
+ Register temp2)
+ : SlowPathCodeARM(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_offset_(field_offset),
+ temp1_(temp1),
+ temp2_(temp2) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+ DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+
+ __ Bind(GetEntryLabel());
+
+ // Save the old reference.
+ // Note that we cannot use IP to save the old reference, as IP is
+ // used internally by the ReadBarrierMarkRegX entry point, and we
+ // need the old reference after the call to that entry point.
+ DCHECK_NE(temp1_, IP);
+ __ Mov(temp1_, ref_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ DCHECK_NE(ref_reg, SP);
+ DCHECK_NE(ref_reg, LR);
+ DCHECK_NE(ref_reg, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_reg, IP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // R0 <- ref
+ // R0 <- ReadBarrierMark(R0)
+ // ref <- R0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*(obj_ + field_offset_)`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
+ // (CAS) operation below would abort the CAS, leaving the field
+ // as-is.
+ Label done;
+ __ cmp(temp1_, ShifterOperand(ref_reg));
+ __ b(&done, EQ);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achieved
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+
+ // Convenience aliases.
+ Register base = obj_;
+ // The UnsafeCASObject intrinsic uses a register pair as field
+ // offset ("long offset"), of which only the low part contains
+ // data.
+ Register offset = field_offset_.AsRegisterPairLow<Register>();
+ Register expected = temp1_;
+ Register value = ref_reg;
+ Register tmp_ptr = IP; // Pointer to actual memory.
+ Register tmp = temp2_; // Value in memory.
+
+ __ add(tmp_ptr, base, ShifterOperand(offset));
+
+ if (kPoisonHeapReferences) {
+ __ PoisonHeapReference(expected);
+ if (value == expected) {
+ // Do not poison `value`, as it is the same register as
+ // `expected`, which has just been poisoned.
+ } else {
+ __ PoisonHeapReference(value);
+ }
+ }
+
+ // do {
+ // tmp = [r_ptr] - expected;
+ // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+
+ Label loop_head, exit_loop;
+ __ Bind(&loop_head);
+
+ __ ldrex(tmp, tmp_ptr);
+
+ __ subs(tmp, tmp, ShifterOperand(expected));
+
+ __ it(NE);
+ __ clrex(NE);
+
+ __ b(&exit_loop, NE);
+
+ __ strex(tmp, value, tmp_ptr);
+ __ cmp(tmp, ShifterOperand(1));
+ __ b(&loop_head, EQ);
+
+ __ Bind(&exit_loop);
+
+ if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(expected);
+ if (value == expected) {
+ // Do not unpoison `value`, as it is the same register as
+ // `expected`, which has just been unpoisoned.
+ } else {
+ __ UnpoisonHeapReference(value);
+ }
+ }
+
+ __ Bind(&done);
+ __ b(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const Register obj_;
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset_;
+
+ const Register temp1_;
+ const Register temp2_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM);
+};
+
// Slow path generating a read barrier for a heap reference.
-class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
+class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM {
public:
ReadBarrierForHeapReferenceSlowPathARM(HInstruction* instruction,
Location out,
@@ -480,7 +860,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
Location obj,
uint32_t offset,
Location index)
- : SlowPathCode(instruction),
+ : SlowPathCodeARM(instruction),
out_(out),
ref_(ref),
obj_(obj),
@@ -610,10 +990,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
__ LoadImmediate(calling_convention.GetRegisterAt(2), offset_);
}
- arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<
kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
arm_codegen->Move32(out_, Location::RegisterLocation(R0));
@@ -655,10 +1032,10 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
};
// Slow path generating a read barrier for a GC root.
-class ReadBarrierForRootSlowPathARM : public SlowPathCode {
+class ReadBarrierForRootSlowPathARM : public SlowPathCodeARM {
public:
ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root)
- : SlowPathCode(instruction), out_(out), root_(root) {
+ : SlowPathCodeARM(instruction), out_(out), root_(root) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -677,7 +1054,7 @@ class ReadBarrierForRootSlowPathARM : public SlowPathCode {
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
- arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+ arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
instruction_,
instruction_->GetDexPc(),
this);
@@ -698,8 +1075,8 @@ class ReadBarrierForRootSlowPathARM : public SlowPathCode {
};
#undef __
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT
inline Condition ARMCondition(IfCondition cond) {
switch (cond) {
@@ -853,9 +1230,6 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) {
}
void CodeGeneratorARM::SetupBlockedRegisters() const {
- // Don't allocate the dalvik style register pair passing.
- blocked_register_pairs_[R1_R2] = true;
-
// Stack register, LR and PC are always reserved.
blocked_core_registers_[SP] = true;
blocked_core_registers_[LR] = true;
@@ -875,19 +1249,6 @@ void CodeGeneratorARM::SetupBlockedRegisters() const {
blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
}
}
-
- UpdateBlockedPairRegisters();
-}
-
-void CodeGeneratorARM::UpdateBlockedPairRegisters() const {
- for (int i = 0; i < kNumberOfRegisterPairs; i++) {
- ArmManagedRegister current =
- ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
- if (blocked_core_registers_[current.AsRegisterPairLow()]
- || blocked_core_registers_[current.AsRegisterPairHigh()]) {
- blocked_register_pairs_[i] = true;
- }
- }
}
InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen)
@@ -951,7 +1312,13 @@ void CodeGeneratorARM::GenerateFrameEntry() {
int adjust = GetFrameSize() - FrameEntrySpillSize();
__ AddConstant(SP, -adjust);
__ cfi().AdjustCFAOffset(adjust);
- __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, 0);
+
+ // Save the current method if we need it. Note that we do not
+ // do this in HCurrentMethod, as the instruction might have been removed
+ // in the SSA graph.
+ if (RequiresCurrentMethod()) {
+ __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, 0);
+ }
}
void CodeGeneratorARM::GenerateFrameExit() {
@@ -1218,26 +1585,21 @@ void CodeGeneratorARM::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value(),
- instruction,
- dex_pc,
- slow_path);
-}
-
-void CodeGeneratorARM::InvokeRuntime(int32_t entry_point_offset,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path) {
- ValidateInvokeRuntime(instruction, slow_path);
- __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
- __ blx(LR);
- RecordPcInfo(instruction, dex_pc, slow_path);
+ ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+ GenerateInvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value());
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
}
void CodeGeneratorARM::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
HInstruction* instruction,
SlowPathCode* slow_path) {
ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ GenerateInvokeRuntime(entry_point_offset);
+}
+
+void CodeGeneratorARM::GenerateInvokeRuntime(int32_t entry_point_offset) {
__ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
__ blx(LR);
}
@@ -1548,13 +1910,14 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize);
+ SlowPathCodeARM* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -1854,9 +2217,7 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok
// art::PrepareForRegisterAllocation.
DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
- IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
- codegen_->GetAssembler(),
- codegen_->GetInstructionSetFeatures());
+ IntrinsicLocationsBuilderARM intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
@@ -1902,9 +2263,7 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) {
}
void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
- IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
- codegen_->GetAssembler(),
- codegen_->GetInstructionSetFeatures());
+ IntrinsicLocationsBuilderARM intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
@@ -2375,19 +2734,13 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio
case Primitive::kPrimFloat:
// Processing a Dex `float-to-long' instruction.
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pF2l),
- conversion,
- conversion->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
CheckEntrypointTypes<kQuickF2l, int64_t, float>();
break;
case Primitive::kPrimDouble:
// Processing a Dex `double-to-long' instruction.
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pD2l),
- conversion,
- conversion->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
CheckEntrypointTypes<kQuickD2l, int64_t, double>();
break;
@@ -2434,10 +2787,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio
case Primitive::kPrimLong:
// Processing a Dex `long-to-float' instruction.
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pL2f),
- conversion,
- conversion->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc());
CheckEntrypointTypes<kQuickL2f, float, int64_t>();
break;
@@ -2523,7 +2873,7 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -2560,13 +2910,18 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) {
break;
case Primitive::kPrimLong: {
- DCHECK(second.IsRegisterPair());
- __ adds(out.AsRegisterPairLow<Register>(),
- first.AsRegisterPairLow<Register>(),
- ShifterOperand(second.AsRegisterPairLow<Register>()));
- __ adc(out.AsRegisterPairHigh<Register>(),
- first.AsRegisterPairHigh<Register>(),
- ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ if (second.IsConstant()) {
+ uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+ GenerateAddLongConst(out, first, value);
+ } else {
+ DCHECK(second.IsRegisterPair());
+ __ adds(out.AsRegisterPairLow<Register>(),
+ first.AsRegisterPairLow<Register>(),
+ ShifterOperand(second.AsRegisterPairLow<Register>()));
+ __ adc(out.AsRegisterPairHigh<Register>(),
+ first.AsRegisterPairHigh<Register>(),
+ ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ }
break;
}
@@ -2600,7 +2955,7 @@ void LocationsBuilderARM::VisitSub(HSub* sub) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -2636,13 +2991,18 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) {
}
case Primitive::kPrimLong: {
- DCHECK(second.IsRegisterPair());
- __ subs(out.AsRegisterPairLow<Register>(),
- first.AsRegisterPairLow<Register>(),
- ShifterOperand(second.AsRegisterPairLow<Register>()));
- __ sbc(out.AsRegisterPairHigh<Register>(),
- first.AsRegisterPairHigh<Register>(),
- ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ if (second.IsConstant()) {
+ uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+ GenerateAddLongConst(out, first, -value);
+ } else {
+ DCHECK(second.IsRegisterPair());
+ __ subs(out.AsRegisterPairLow<Register>(),
+ first.AsRegisterPairLow<Register>(),
+ ShifterOperand(second.AsRegisterPairLow<Register>()));
+ __ sbc(out.AsRegisterPairHigh<Register>(),
+ first.AsRegisterPairHigh<Register>(),
+ ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ }
break;
}
@@ -2959,7 +3319,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) {
DCHECK_EQ(calling_convention.GetRegisterAt(1), second.AsRegister<Register>());
DCHECK_EQ(R0, out.AsRegister<Register>());
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr);
+ codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc());
CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
}
break;
@@ -2974,7 +3334,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) {
DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>());
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr);
+ codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc());
CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
break;
}
@@ -3103,26 +3463,26 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) {
DCHECK_EQ(calling_convention.GetRegisterAt(1), second.AsRegister<Register>());
DCHECK_EQ(R1, out.AsRegister<Register>());
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr);
+ codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc());
CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
}
break;
}
case Primitive::kPrimLong: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr);
+ codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc());
CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
break;
}
case Primitive::kPrimFloat: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr);
+ codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc());
CheckEntrypointTypes<kQuickFmodf, float, float, float>();
break;
}
case Primitive::kPrimDouble: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr);
+ codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc());
CheckEntrypointTypes<kQuickFmod, double, double, double>();
break;
}
@@ -3133,18 +3493,12 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) {
}
void LocationsBuilderARM::VisitDivZeroCheck(HDivZeroCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void InstructionCodeGeneratorARM::VisitDivZeroCheck(HDivZeroCheck* instruction) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM(instruction);
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM(instruction);
codegen_->AddSlowPath(slow_path);
LocationSummary* locations = instruction->GetLocations();
@@ -3562,10 +3916,7 @@ void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
__ blx(LR);
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
} else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
}
}
@@ -3585,10 +3936,7 @@ void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) {
__ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
- codegen_->InvokeRuntime(instruction->GetEntrypoint(),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
}
@@ -3984,6 +4332,9 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI
object_field_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
bool volatile_for_double = field_info.IsVolatile()
@@ -4044,31 +4395,51 @@ bool LocationsBuilderARM::CanEncodeConstantAsImmediate(HConstant* input_cst,
Opcode opcode) {
uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
if (Primitive::Is64BitType(input_cst->GetType())) {
- return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) &&
- CanEncodeConstantAsImmediate(High32Bits(value), opcode);
+ Opcode high_opcode = opcode;
+ SetCc low_set_cc = kCcDontCare;
+ switch (opcode) {
+ case SUB:
+ // Flip the operation to an ADD.
+ value = -value;
+ opcode = ADD;
+ FALLTHROUGH_INTENDED;
+ case ADD:
+ if (Low32Bits(value) == 0u) {
+ return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare);
+ }
+ high_opcode = ADC;
+ low_set_cc = kCcSet;
+ break;
+ default:
+ break;
+ }
+ return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) &&
+ CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare);
} else {
return CanEncodeConstantAsImmediate(Low32Bits(value), opcode);
}
}
-bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) {
+bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value,
+ Opcode opcode,
+ SetCc set_cc) {
ShifterOperand so;
ArmAssembler* assembler = codegen_->GetAssembler();
- if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) {
+ if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, set_cc, &so)) {
return true;
}
Opcode neg_opcode = kNoOperand;
switch (opcode) {
- case AND:
- neg_opcode = BIC;
- break;
- case ORR:
- neg_opcode = ORN;
- break;
+ case AND: neg_opcode = BIC; value = ~value; break;
+ case ORR: neg_opcode = ORN; value = ~value; break;
+ case ADD: neg_opcode = SUB; value = -value; break;
+ case ADC: neg_opcode = SBC; value = ~value; break;
+ case SUB: neg_opcode = ADD; value = -value; break;
+ case SBC: neg_opcode = ADC; value = ~value; break;
default:
return false;
}
- return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so);
+ return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so);
}
void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
@@ -4282,14 +4653,8 @@ void InstructionCodeGeneratorARM::VisitUnresolvedStaticFieldSet(
}
void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
locations->SetInAt(0, Location::RequiresRegister());
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void CodeGeneratorARM::GenerateImplicitNullCheck(HNullCheck* instruction) {
@@ -4303,7 +4668,7 @@ void CodeGeneratorARM::GenerateImplicitNullCheck(HNullCheck* instruction) {
}
void CodeGeneratorARM::GenerateExplicitNullCheck(HNullCheck* instruction) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction);
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction);
AddSlowPath(slow_path);
LocationSummary* locations = instruction->GetLocations();
@@ -4440,6 +4805,9 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
object_array_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -4454,7 +4822,9 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
}
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
- if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ // Also need for String compression feature.
+ if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
+ || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -4467,6 +4837,8 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
Location out_loc = locations->Out();
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
Primitive::Type type = instruction->GetType();
+ const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
+ instruction->IsStringCharAt();
HInstruction* array_instr = instruction->GetArray();
bool has_intermediate_address = array_instr->IsIntermediateAddress();
// The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
@@ -4480,10 +4852,31 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimInt: {
if (index.IsConstant()) {
int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
- uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
+ if (maybe_compressed_char_at) {
+ Register length = IP;
+ Label uncompressed_load, done;
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ __ LoadFromOffset(kLoadWord, length, obj, count_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ cmp(length, ShifterOperand(0));
+ __ b(&uncompressed_load, GE);
+ __ LoadFromOffset(kLoadUnsignedByte,
+ out_loc.AsRegister<Register>(),
+ obj,
+ data_offset + const_index);
+ __ b(&done);
+ __ Bind(&uncompressed_load);
+ __ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
+ out_loc.AsRegister<Register>(),
+ obj,
+ data_offset + (const_index << 1));
+ __ Bind(&done);
+ } else {
+ uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
- LoadOperandType load_type = GetLoadOperandType(type);
- __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
+ LoadOperandType load_type = GetLoadOperandType(type);
+ __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
+ }
} else {
Register temp = IP;
@@ -4499,7 +4892,24 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
} else {
__ add(temp, obj, ShifterOperand(data_offset));
}
- codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
+ if (maybe_compressed_char_at) {
+ Label uncompressed_load, done;
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ Register length = locations->GetTemp(0).AsRegister<Register>();
+ __ LoadFromOffset(kLoadWord, length, obj, count_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ cmp(length, ShifterOperand(0));
+ __ b(&uncompressed_load, GE);
+ __ ldrb(out_loc.AsRegister<Register>(),
+ Address(temp, index.AsRegister<Register>(), Shift::LSL, 0));
+ __ b(&done);
+ __ Bind(&uncompressed_load);
+ __ ldrh(out_loc.AsRegister<Register>(),
+ Address(temp, index.AsRegister<Register>(), Shift::LSL, 1));
+ __ Bind(&done);
+ } else {
+ codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
+ }
}
break;
}
@@ -4599,7 +5009,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
if (type == Primitive::kPrimNot) {
// Potential implicit null checks, in the case of reference
// arrays, are handled in the previous switch statement.
- } else {
+ } else if (!maybe_compressed_char_at) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
}
@@ -4610,12 +5020,10 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) {
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
- bool object_array_set_with_read_barrier =
- kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+ may_need_runtime_call_for_type_check ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
@@ -4712,13 +5120,15 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
}
DCHECK(needs_write_barrier);
- Register temp1 = locations->GetTemp(0).AsRegister<Register>();
- Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
Label done;
- SlowPathCode* slow_path = nullptr;
+ SlowPathCodeARM* slow_path = nullptr;
if (may_need_runtime_call_for_type_check) {
slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction);
@@ -4743,63 +5153,42 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
__ Bind(&non_zero);
}
- if (kEmitCompilerReadBarrier) {
- // When read barriers are enabled, the type checking
- // instrumentation requires two read barriers:
- //
- // __ Mov(temp2, temp1);
- // // /* HeapReference<Class> */ temp1 = temp1->component_type_
- // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
- // codegen_->GenerateReadBarrierSlow(
- // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
- //
- // // /* HeapReference<Class> */ temp2 = value->klass_
- // __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
- // codegen_->GenerateReadBarrierSlow(
- // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
- //
- // __ cmp(temp1, ShifterOperand(temp2));
- //
- // However, the second read barrier may trash `temp`, as it
- // is a temporary register, and as such would not be saved
- // along with live registers before calling the runtime (nor
- // restored afterwards). So in this case, we bail out and
- // delegate the work to the array set slow path.
- //
- // TODO: Extend the register allocator to support a new
- // "(locally) live temp" location so as to avoid always
- // going into the slow path when read barriers are enabled.
- __ b(slow_path->GetEntryLabel());
- } else {
- // /* HeapReference<Class> */ temp1 = array->klass_
- __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Note that when read barriers are enabled, the type checks
+ // are performed without read barriers. This is fine, even in
+ // the case where a class object is in the from-space after
+ // the flip, as a comparison involving such a type would not
+ // produce a false positive; it may of course produce a false
+ // negative, in which case we would take the ArraySet slow
+ // path.
+
+ // /* HeapReference<Class> */ temp1 = array->klass_
+ __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+ // /* HeapReference<Class> */ temp2 = value->klass_
+ __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+ // If heap poisoning is enabled, no need to unpoison `temp1`
+ // nor `temp2`, as we are comparing two poisoned references.
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ Label do_put;
+ __ b(&do_put, EQ);
+ // If heap poisoning is enabled, the `temp1` reference has
+ // not been unpoisoned yet; unpoison it now.
__ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
- // /* HeapReference<Class> */ temp2 = value->klass_
- __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
- // If heap poisoning is enabled, no need to unpoison `temp1`
- // nor `temp2`, as we are comparing two poisoned references.
- __ cmp(temp1, ShifterOperand(temp2));
-
- if (instruction->StaticTypeOfArrayIsObjectArray()) {
- Label do_put;
- __ b(&do_put, EQ);
- // If heap poisoning is enabled, the `temp1` reference has
- // not been unpoisoned yet; unpoison it now.
- __ MaybeUnpoisonHeapReference(temp1);
-
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
- // If heap poisoning is enabled, no need to unpoison
- // `temp1`, as we are comparing against null below.
- __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_put);
- } else {
- __ b(slow_path->GetEntryLabel(), NE);
- }
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp1`, as we are comparing against null below.
+ __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ b(slow_path->GetEntryLabel(), NE);
}
}
@@ -4910,6 +5299,10 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) {
Register out = locations->Out().AsRegister<Register>();
__ LoadFromOffset(kLoadWord, out, obj, offset);
codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out compression flag from String's array length.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ bic(out, out, ShifterOperand(1u << 31));
+ }
}
void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
@@ -4944,20 +5337,18 @@ void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress*
}
void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) {
LocationSummary* locations = instruction->GetLocations();
- SlowPathCode* slow_path =
+ SlowPathCodeARM* slow_path =
new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
codegen_->AddSlowPath(slow_path);
@@ -4994,7 +5385,9 @@ void InstructionCodeGeneratorARM::VisitParallelMove(HParallelMove* instruction)
}
void LocationsBuilderARM::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5269,17 +5662,6 @@ void ParallelMoveResolverARM::RestoreScratch(int reg) {
HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
- if (kEmitCompilerReadBarrier) {
- switch (desired_class_load_kind) {
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageAddress:
- // TODO: Implement for read barrier.
- return HLoadClass::LoadKind::kDexCacheViaMethod;
- default:
- break;
- }
- }
switch (desired_class_load_kind) {
case HLoadClass::LoadKind::kReferrersClass:
break;
@@ -5321,10 +5703,15 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
return;
}
- LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
@@ -5338,10 +5725,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
LocationSummary* locations = cls->GetLocations();
if (cls->NeedsAccessCheck()) {
codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
- cls,
- cls->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
@@ -5349,6 +5733,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
bool generate_null_check = false;
switch (cls->GetLoadKind()) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -5356,18 +5741,21 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
DCHECK(!cls->MustGenerateClinitCheck());
// /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
Register current_method = locations->InAt(0).AsRegister<Register>();
- GenerateGcRootFieldLoad(
- cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ current_method,
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ requires_read_barrier);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!requires_read_barrier);
__ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
cls->GetTypeIndex()));
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!requires_read_barrier);
CodeGeneratorARM::PcRelativePatchInfo* labels =
codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
__ BindTrackedLabel(&labels->movw_label);
@@ -5379,7 +5767,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
break;
}
case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!requires_read_barrier);
DCHECK_NE(cls->GetAddress(), 0u);
uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
__ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
@@ -5399,7 +5787,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
__ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
// /* GcRoot<mirror::Class> */ out = *(base_address + offset)
- GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+ GenerateGcRootFieldLoad(cls, out_loc, out, offset, requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
}
@@ -5408,7 +5796,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase();
int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset();
// /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
- GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset);
+ GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset, requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
}
@@ -5422,14 +5810,14 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
// /* GcRoot<mirror::Class> */ out = out[type_index]
size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
- GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+ GenerateGcRootFieldLoad(cls, out_loc, out, offset, requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
}
}
if (generate_null_check || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
codegen_->AddSlowPath(slow_path);
if (generate_null_check) {
@@ -5454,7 +5842,7 @@ void LocationsBuilderARM::VisitClinitCheck(HClinitCheck* check) {
void InstructionCodeGeneratorARM::VisitClinitCheck(HClinitCheck* check) {
// We assume the class is not null.
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
check->GetLoadClass(), check, check->GetDexPc(), true);
codegen_->AddSlowPath(slow_path);
GenerateClassInitializationCheck(slow_path,
@@ -5462,7 +5850,7 @@ void InstructionCodeGeneratorARM::VisitClinitCheck(HClinitCheck* check) {
}
void InstructionCodeGeneratorARM::GenerateClassInitializationCheck(
- SlowPathCode* slow_path, Register class_reg) {
+ SlowPathCodeARM* slow_path, Register class_reg) {
__ LoadFromOffset(kLoadWord, IP, class_reg, mirror::Class::StatusOffset().Int32Value());
__ cmp(IP, ShifterOperand(mirror::Class::kStatusInitialized));
__ b(slow_path->GetEntryLabel(), LT);
@@ -5474,17 +5862,6 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck(
HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
- if (kEmitCompilerReadBarrier) {
- switch (desired_string_load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageAddress:
- // TODO: Implement for read barrier.
- return HLoadString::LoadKind::kDexCacheViaMethod;
- default:
- break;
- }
- }
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
DCHECK(!GetCompilerOptions().GetCompilePic());
@@ -5497,15 +5874,8 @@ HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind(
case HLoadString::LoadKind::kDexCacheAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kDexCachePcRelative:
+ case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
- // We disable pc-relative load when there is an irreducible loop, as the optimization
- // is incompatible with it.
- // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
- // with irreducible loops.
- if (GetGraph()->HasIrreducibleLoops()) {
- return HLoadString::LoadKind::kDexCacheViaMethod;
- }
break;
case HLoadString::LoadKind::kDexCacheViaMethod:
break;
@@ -5514,32 +5884,51 @@ HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind(
}
void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
- LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
- ? LocationSummary::kCallOnSlowPath
+ LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+ ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod)
+ ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kCallOnSlowPath)
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+
HLoadString::LoadKind load_kind = load->GetLoadKind();
- if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
- load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
- locations->SetInAt(0, Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+ locations->SetOut(Location::RegisterLocation(R0));
+ } else {
+ locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and/or marking to save everything, including temps.
+ // Note that IP may theoretically be clobbered by saving/restoring the live register
+ // (only one thanks to the custom calling convention), so we request a different temp.
+ locations->AddTemp(Location::RequiresRegister());
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
+ // that the the kPrimNot result register is the same as the first argument register.
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
- locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
LocationSummary* locations = load->GetLocations();
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
+ HLoadString::LoadKind load_kind = load->GetLoadKind();
- switch (load->GetLoadKind()) {
+ switch (load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
__ LoadLiteral(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
load->GetStringIndex()));
return; // No dex cache slow path.
}
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorARM::PcRelativePatchInfo* labels =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
__ BindTrackedLabel(&labels->movw_label);
@@ -5551,61 +5940,40 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
return; // No dex cache slow path.
}
case HLoadString::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
DCHECK_NE(load->GetAddress(), 0u);
uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
__ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
- // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives
- // a 128B range. To try and reduce the number of literals if we load multiple strings,
- // simply split the dex cache address to a 128B aligned base loaded from a literal
- // and the remaining offset embedded in the load.
- static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
- DCHECK_ALIGNED(load->GetAddress(), 4u);
- constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2;
- uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits);
- uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
- __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
- // /* GcRoot<mirror::String> */ out = *(base_address + offset)
- GenerateGcRootFieldLoad(load, out_loc, out, offset);
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- Register base_reg = locations->InAt(0).AsRegister<Register>();
- HArmDexCacheArraysBase* base = load->InputAt(0)->AsArmDexCacheArraysBase();
- int32_t offset = load->GetDexCacheElementOffset() - base->GetElementOffset();
- // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset)
- GenerateGcRootFieldLoad(load, out_loc, base_reg, offset);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- Register current_method = locations->InAt(0).AsRegister<Register>();
-
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- break;
+ case HLoadString::LoadKind::kBssEntry: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ CodeGeneratorARM::PcRelativePatchInfo* labels =
+ codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+ __ BindTrackedLabel(&labels->movw_label);
+ __ movw(temp, /* placeholder */ 0u);
+ __ BindTrackedLabel(&labels->movt_label);
+ __ movt(temp, /* placeholder */ 0u);
+ __ BindTrackedLabel(&labels->add_pc_label);
+ __ add(temp, temp, ShifterOperand(PC));
+ GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kEmitCompilerReadBarrier);
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
+ codegen_->AddSlowPath(slow_path);
+ __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
}
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
- codegen_->AddSlowPath(slow_path);
- __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Consider re-adding the compiler code to do string dex cache lookup again.
+ DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
+ InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
+ __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex());
+ codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
}
static int32_t GetExceptionTlsOffset() {
@@ -5640,8 +6008,7 @@ void LocationsBuilderARM::VisitThrow(HThrow* instruction) {
}
void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) {
- codegen_->InvokeRuntime(
- QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr);
+ codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
@@ -5656,6 +6023,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -5663,6 +6031,7 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -5672,6 +6041,9 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// The "out" register is used as a temporary, so it overlaps with the inputs.
@@ -5700,7 +6072,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
Label done, zero;
- SlowPathCode* slow_path = nullptr;
+ SlowPathCodeARM* slow_path = nullptr;
// Return 0 if `obj` is null.
// avoid null check if we know obj is not null.
@@ -5892,7 +6264,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
!instruction->CanThrowIntoCatchBlock();
- SlowPathCode* type_check_slow_path =
+ SlowPathCodeARM* type_check_slow_path =
new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
is_type_check_slow_path_fatal);
codegen_->AddSlowPath(type_check_slow_path);
@@ -6041,11 +6413,9 @@ void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction)
}
void InstructionCodeGeneratorARM::VisitMonitorOperation(HMonitorOperation* instruction) {
- codegen_->InvokeRuntime(instruction->IsEnter()
- ? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
+ instruction,
+ instruction->GetDexPc());
if (instruction->IsEnter()) {
CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
} else {
@@ -6198,6 +6568,34 @@ void InstructionCodeGeneratorARM::GenerateEorConst(Register out, Register first,
__ eor(out, first, ShifterOperand(value));
}
+void InstructionCodeGeneratorARM::GenerateAddLongConst(Location out,
+ Location first,
+ uint64_t value) {
+ Register out_low = out.AsRegisterPairLow<Register>();
+ Register out_high = out.AsRegisterPairHigh<Register>();
+ Register first_low = first.AsRegisterPairLow<Register>();
+ Register first_high = first.AsRegisterPairHigh<Register>();
+ uint32_t value_low = Low32Bits(value);
+ uint32_t value_high = High32Bits(value);
+ if (value_low == 0u) {
+ if (out_low != first_low) {
+ __ mov(out_low, ShifterOperand(first_low));
+ }
+ __ AddConstant(out_high, first_high, value_high);
+ return;
+ }
+ __ AddConstantSetFlags(out_low, first_low, value_low);
+ ShifterOperand so;
+ if (__ ShifterOperandCanHold(out_high, first_high, ADC, value_high, kCcDontCare, &so)) {
+ __ adc(out_high, first_high, so);
+ } else if (__ ShifterOperandCanHold(out_low, first_low, SBC, ~value_high, kCcDontCare, &so)) {
+ __ sbc(out_high, first_high, so);
+ } else {
+ LOG(FATAL) << "Unexpected constant " << value_high;
+ UNREACHABLE();
+ }
+}
+
void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location first = locations->InAt(0);
@@ -6335,9 +6733,11 @@ void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction
void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
Register obj,
- uint32_t offset) {
+ uint32_t offset,
+ bool requires_read_barrier) {
Register root_reg = root.AsRegister<Register>();
- if (kEmitCompilerReadBarrier) {
+ if (requires_read_barrier) {
+ DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
@@ -6357,8 +6757,8 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path used to mark the GC root `root`.
- SlowPathCode* slow_path =
+ // Slow path marking the GC root `root`.
+ SlowPathCodeARM* slow_path =
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root);
codegen_->AddSlowPath(slow_path);
@@ -6427,7 +6827,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check) {
+ bool needs_null_check,
+ bool always_update_field,
+ Register* temp2) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -6466,13 +6868,15 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Introduce a dependency on the lock_word including the rb_state,
// which shall prevent load-load reordering without using
// a memory barrier (which would be more expensive).
- // obj is unchanged by this operation, but its value now depends on temp_reg.
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp_reg`.
__ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
// The actual reference load.
if (index.IsValid()) {
- // Load types involving an "index": ArrayGet and
- // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+ // Load types involving an "index": ArrayGet,
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics.
// /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
if (index.IsConstant()) {
size_t computed_offset =
@@ -6480,9 +6884,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
__ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
} else {
// Handle the special case of the
- // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use
- // a register pair as index ("long offset"), of which only the low
- // part contains data.
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics, which use a register pair as index ("long
+ // offset"), of which only the low part contains data.
Register index_reg = index.IsRegisterPair()
? index.AsRegisterPairLow<Register>()
: index.AsRegister<Register>();
@@ -6497,9 +6901,22 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Object* ref = ref_addr->AsMirrorPtr()
__ MaybeUnpoisonHeapReference(ref_reg);
- // Slow path used to mark the object `ref` when it is gray.
- SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
+ // Slow path marking the object `ref` when it is gray.
+ SlowPathCodeARM* slow_path;
+ if (always_update_field) {
+ DCHECK(temp2 != nullptr);
+ // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address
+ // of the form `obj + field_offset`, where `obj` is a register and
+ // `field_offset` is a register pair (of which only the lower half
+ // is used). Thus `offset` and `scale_factor` above are expected
+ // to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM(
+ instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
+ }
AddSlowPath(slow_path);
// if (rb_state == ReadBarrier::gray_ptr_)
@@ -6534,7 +6951,7 @@ void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction,
// not used by the artReadBarrierSlow entry point.
//
// TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
- SlowPathCode* slow_path = new (GetGraph()->GetArena())
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena())
ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index);
AddSlowPath(slow_path);
@@ -6569,7 +6986,7 @@ void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction,
//
// Note that GC roots are not affected by heap poisoning, so we do
// not need to do anything special for this here.
- SlowPathCode* slow_path =
+ SlowPathCodeARM* slow_path =
new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root);
AddSlowPath(slow_path);
@@ -6579,7 +6996,7 @@ void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction,
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method) {
+ HInvokeStaticOrDirect* invoke) {
HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
// We disable pc-relative load when there is an irreducible loop, as the optimization
// is incompatible with it.
@@ -6593,7 +7010,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOr
if (dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
const DexFile& outer_dex_file = GetGraph()->GetDexFile();
- if (&outer_dex_file != target_method.dex_file) {
+ if (&outer_dex_file != invoke->GetTargetMethod().dex_file) {
// Calls across dex files are more likely to exceed the available BL range,
// so use absolute patch with fixup if available and kCallArtMethod otherwise.
HInvokeStaticOrDirect::CodePtrLocation code_ptr_location =
@@ -6655,10 +7072,13 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
- case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+ case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
+ uint32_t offset =
+ GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
// temp = thread->string_init_entrypoint
- __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset());
+ __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, offset);
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
@@ -6708,7 +7128,8 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
__ bl(GetFrameEntryLabel());
break;
case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
- relative_call_patches_.emplace_back(invoke->GetTargetMethod());
+ relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+ invoke->GetTargetMethod().dex_method_index);
__ BindTrackedLabel(&relative_call_patches_.back().label);
// Arbitrarily branch to the BL itself, override at link time.
__ bl(&relative_call_patches_.back().label);
@@ -6810,17 +7231,37 @@ Literal* CodeGeneratorARM::DeduplicateDexCacheAddressLiteral(uint32_t address) {
return DeduplicateUint32Literal(address, &uint32_literals_);
}
+template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+inline void CodeGeneratorARM::EmitPcRelativeLinkerPatches(
+ const ArenaDeque<PcRelativePatchInfo>& infos,
+ ArenaVector<LinkerPatch>* linker_patches) {
+ for (const PcRelativePatchInfo& info : infos) {
+ const DexFile& dex_file = info.target_dex_file;
+ size_t offset_or_index = info.offset_or_index;
+ DCHECK(info.add_pc_label.IsBound());
+ uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position());
+ // Add MOVW patch.
+ DCHECK(info.movw_label.IsBound());
+ uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position());
+ linker_patches->push_back(Factory(movw_offset, &dex_file, add_pc_offset, offset_or_index));
+ // Add MOVT patch.
+ DCHECK(info.movt_label.IsBound());
+ uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position());
+ linker_patches->push_back(Factory(movt_offset, &dex_file, add_pc_offset, offset_or_index));
+ }
+}
+
void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
method_patches_.size() +
call_patches_.size() +
relative_call_patches_.size() +
- /* MOVW+MOVT for each base */ 2u * pc_relative_dex_cache_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
boot_image_string_patches_.size() +
- /* MOVW+MOVT for each base */ 2u * pc_relative_string_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
boot_image_type_patches_.size() +
- /* MOVW+MOVT for each base */ 2u * pc_relative_type_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
boot_image_address_patches_.size();
linker_patches->reserve(size);
for (const auto& entry : method_patches_) {
@@ -6841,32 +7282,13 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
target_method.dex_file,
target_method.dex_method_index));
}
- for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
+ for (const PatchInfo<Label>& info : relative_call_patches_) {
uint32_t literal_offset = info.label.Position();
- linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
- info.target_method.dex_file,
- info.target_method.dex_method_index));
- }
- for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
- const DexFile& dex_file = info.target_dex_file;
- size_t base_element_offset = info.offset_or_index;
- DCHECK(info.add_pc_label.IsBound());
- uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position());
- // Add MOVW patch.
- DCHECK(info.movw_label.IsBound());
- uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position());
- linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset,
- &dex_file,
- add_pc_offset,
- base_element_offset));
- // Add MOVT patch.
- DCHECK(info.movt_label.IsBound());
- uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position());
- linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset,
- &dex_file,
- add_pc_offset,
- base_element_offset));
+ linker_patches->push_back(
+ LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index));
}
+ EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
+ linker_patches);
for (const auto& entry : boot_image_string_patches_) {
const StringReference& target_string = entry.first;
Literal* literal = entry.second;
@@ -6876,25 +7298,12 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
target_string.dex_file,
target_string.string_index));
}
- for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
- const DexFile& dex_file = info.target_dex_file;
- uint32_t string_index = info.offset_or_index;
- DCHECK(info.add_pc_label.IsBound());
- uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position());
- // Add MOVW patch.
- DCHECK(info.movw_label.IsBound());
- uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position());
- linker_patches->push_back(LinkerPatch::RelativeStringPatch(movw_offset,
- &dex_file,
- add_pc_offset,
- string_index));
- // Add MOVT patch.
- DCHECK(info.movt_label.IsBound());
- uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position());
- linker_patches->push_back(LinkerPatch::RelativeStringPatch(movt_offset,
- &dex_file,
- add_pc_offset,
- string_index));
+ if (!GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ linker_patches);
+ } else {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
+ linker_patches);
}
for (const auto& entry : boot_image_type_patches_) {
const TypeReference& target_type = entry.first;
@@ -6905,26 +7314,8 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
target_type.dex_file,
target_type.type_index));
}
- for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
- const DexFile& dex_file = info.target_dex_file;
- uint32_t type_index = info.offset_or_index;
- DCHECK(info.add_pc_label.IsBound());
- uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position());
- // Add MOVW patch.
- DCHECK(info.movw_label.IsBound());
- uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position());
- linker_patches->push_back(LinkerPatch::RelativeTypePatch(movw_offset,
- &dex_file,
- add_pc_offset,
- type_index));
- // Add MOVT patch.
- DCHECK(info.movt_label.IsBound());
- uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position());
- linker_patches->push_back(LinkerPatch::RelativeTypePatch(movt_offset,
- &dex_file,
- add_pc_offset,
- type_index));
- }
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+ linker_patches);
for (const auto& entry : boot_image_address_patches_) {
DCHECK(GetCompilerOptions().GetIncludePatchInformation());
Literal* literal = entry.second;
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index fa7709b9a3..3d46aab31f 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -50,6 +50,18 @@ static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1, S2, S3 };
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterFpuRegisters);
+class SlowPathCodeARM : public SlowPathCode {
+ public:
+ explicit SlowPathCodeARM(HInstruction* instruction) : SlowPathCode(instruction) {}
+
+ void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) FINAL;
+ void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) FINAL;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM);
+};
+
+
class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> {
public:
InvokeRuntimeCallingConvention()
@@ -63,9 +75,9 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegis
DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
};
-static constexpr DRegister FromLowSToD(SRegister reg) {
- return DCHECK_CONSTEXPR(reg % 2 == 0, , D0)
- static_cast<DRegister>(reg / 2);
+constexpr DRegister FromLowSToD(SRegister reg) {
+ DCHECK_EQ(reg % 2, 0);
+ return static_cast<DRegister>(reg / 2);
}
@@ -183,7 +195,7 @@ class LocationsBuilderARM : public HGraphVisitor {
Location ArithmeticZeroOrFpuRegister(HInstruction* input);
Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode);
bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode);
- bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode);
+ bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare);
CodeGeneratorARM* const codegen_;
InvokeDexCallingConventionVisitorARM parameter_visitor_;
@@ -216,10 +228,11 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
// is the block to branch to if the suspend check is not needed, and after
// the suspend call.
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
- void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
+ void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg);
void GenerateAndConst(Register out, Register first, uint32_t value);
void GenerateOrrConst(Register out, Register first, uint32_t value);
void GenerateEorConst(Register out, Register first, uint32_t value);
+ void GenerateAddLongConst(Location out, Location first, uint64_t value);
void HandleBitwiseOperation(HBinaryOperation* operation);
void HandleCondition(HCondition* condition);
void HandleIntegerRotate(LocationSummary* locations);
@@ -270,11 +283,12 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
//
// root <- *(obj + offset)
//
- // while honoring read barriers (if any).
+ // while honoring read barriers if `requires_read_barrier` is true.
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
Register obj,
- uint32_t offset);
+ uint32_t offset,
+ bool requires_read_barrier);
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
Label* true_target,
@@ -351,9 +365,6 @@ class CodeGeneratorARM : public CodeGenerator {
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
- // Blocks all register pairs made out of blocked core registers.
- void UpdateBlockedPairRegisters() const;
-
ParallelMoveResolverARM* GetMoveResolver() OVERRIDE {
return &move_resolver_;
}
@@ -389,12 +400,7 @@ class CodeGeneratorARM : public CodeGenerator {
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path) OVERRIDE;
-
- void InvokeRuntime(int32_t offset,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path);
+ SlowPathCode* slow_path = nullptr) OVERRIDE;
// Generate code to invoke a runtime entry point, but do not record
// PC-related information in a stack map.
@@ -402,6 +408,8 @@ class CodeGeneratorARM : public CodeGenerator {
HInstruction* instruction,
SlowPathCode* slow_path);
+ void GenerateInvokeRuntime(int32_t entry_point_offset);
+
// Emit a write barrier.
void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
@@ -443,7 +451,7 @@ class CodeGeneratorARM : public CodeGenerator {
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method) OVERRIDE;
+ HInvokeStaticOrDirect* invoke) OVERRIDE;
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
@@ -500,6 +508,18 @@ class CodeGeneratorARM : public CodeGenerator {
bool needs_null_check);
// Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
+
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+ //
+ // Load the object reference located at the address
+ // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+ // `ref`, and mark it if needed.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`). This operation
+ // requires an extra temporary register, which must be provided as a
+ // non-null pointer (`temp2`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
@@ -507,7 +527,9 @@ class CodeGeneratorARM : public CodeGenerator {
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check);
+ bool needs_null_check,
+ bool always_update_field = false,
+ Register* temp2 = nullptr);
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
@@ -557,10 +579,10 @@ class CodeGeneratorARM : public CodeGenerator {
// artReadBarrierForRootSlow.
void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
- void GenerateNop();
+ void GenerateNop() OVERRIDE;
- void GenerateImplicitNullCheck(HNullCheck* instruction);
- void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
private:
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
@@ -582,6 +604,10 @@ class CodeGeneratorARM : public CodeGenerator {
uint32_t offset_or_index,
ArenaDeque<PcRelativePatchInfo>* patches);
+ template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
+ ArenaVector<LinkerPatch>* linker_patches);
+
// Labels for each block that will be compiled.
Label* block_labels_; // Indexed by block id.
Label frame_entry_label_;
@@ -598,12 +624,12 @@ class CodeGeneratorARM : public CodeGenerator {
MethodToLiteralMap call_patches_;
// Relative call patch info.
// Using ArenaDeque<> which retains element addresses on push/emplace_back().
- ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
+ ArenaDeque<PatchInfo<Label>> relative_call_patches_;
// PC-relative patch info for each HArmDexCacheArraysBase.
ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
// Deduplication map for boot string literals for kBootImageLinkTimeAddress.
BootStringToLiteralMap boot_image_string_patches_;
- // PC-relative String patch info.
+ // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
// Deduplication map for boot type literals for kBootImageLinkTimeAddress.
BootTypeToLiteralMap boot_image_type_patches_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index f3a09fd09f..b53750966d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -46,16 +46,20 @@ class GcRoot;
namespace arm64 {
+using helpers::ARM64EncodableConstantOrRegister;
+using helpers::ArtVixlRegCodeCoherentForRegSet;
using helpers::CPURegisterFrom;
using helpers::DRegisterFrom;
using helpers::FPRegisterFrom;
using helpers::HeapOperand;
using helpers::HeapOperandFrom;
using helpers::InputCPURegisterAt;
+using helpers::InputCPURegisterOrZeroRegAt;
using helpers::InputFPRegisterAt;
-using helpers::InputRegisterAt;
using helpers::InputOperandAt;
+using helpers::InputRegisterAt;
using helpers::Int64ConstantFrom;
+using helpers::IsConstantZeroBitPattern;
using helpers::LocationFrom;
using helpers::OperandFromMemOperand;
using helpers::OutputCPURegister;
@@ -66,8 +70,6 @@ using helpers::StackOperandFrom;
using helpers::VIXLRegCodeFromART;
using helpers::WRegisterFrom;
using helpers::XRegisterFrom;
-using helpers::ARM64EncodableConstantOrRegister;
-using helpers::ArtVixlRegCodeCoherentForRegSet;
static constexpr int kCurrentMethodStackOffset = 0;
// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
@@ -131,24 +133,24 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type retur
return ARM64ReturnLocation(return_type);
}
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
// Calculate memory accessing operand for save/restore live registers.
static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
- RegisterSet* register_set,
+ LocationSummary* locations,
int64_t spill_offset,
bool is_save) {
- DCHECK(ArtVixlRegCodeCoherentForRegSet(register_set->GetCoreRegisters(),
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
codegen->GetNumberOfCoreRegisters(),
- register_set->GetFloatingPointRegisters(),
+ fp_spills,
codegen->GetNumberOfFloatingPointRegisters()));
- CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize,
- register_set->GetCoreRegisters() & (~callee_saved_core_registers.GetList()));
- CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize,
- register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.GetList()));
+ CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
+ CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize, fp_spills);
MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
@@ -182,38 +184,35 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
}
void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
- RegisterSet* register_set = locations->GetLiveRegisters();
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
- for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
- if (!codegen->IsCoreCalleeSaveRegister(i) && register_set->ContainsCoreRegister(i)) {
- // If the register holds an object, update the stack mask.
- if (locations->RegisterContainsObject(i)) {
- locations->SetStackBit(stack_offset / kVRegSize);
- }
- DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
- DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
- saved_core_stack_offsets_[i] = stack_offset;
- stack_offset += kXRegSizeInBytes;
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ for (uint32_t i : LowToHighBits(core_spills)) {
+ // If the register holds an object, update the stack mask.
+ if (locations->RegisterContainsObject(i)) {
+ locations->SetStackBit(stack_offset / kVRegSize);
}
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_core_stack_offsets_[i] = stack_offset;
+ stack_offset += kXRegSizeInBytes;
}
- for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
- if (!codegen->IsFloatingPointCalleeSaveRegister(i) &&
- register_set->ContainsFloatingPointRegister(i)) {
- DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
- DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
- saved_fpu_stack_offsets_[i] = stack_offset;
- stack_offset += kDRegSizeInBytes;
- }
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ for (uint32_t i : LowToHighBits(fp_spills)) {
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_fpu_stack_offsets_[i] = stack_offset;
+ stack_offset += kDRegSizeInBytes;
}
- SaveRestoreLiveRegistersHelper(codegen, register_set,
+ SaveRestoreLiveRegistersHelper(codegen,
+ locations,
codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */);
}
void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
- RegisterSet* register_set = locations->GetLiveRegisters();
- SaveRestoreLiveRegistersHelper(codegen, register_set,
+ SaveRestoreLiveRegistersHelper(codegen,
+ locations,
codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */);
}
@@ -236,10 +235,10 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
codegen->EmitParallelMoves(
locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
- uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
- ? QUICK_ENTRY_POINT(pThrowStringBounds)
- : QUICK_ENTRY_POINT(pThrowArrayBounds);
- arm64_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this);
+ QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
+ ? kQuickThrowStringBounds
+ : kQuickThrowArrayBounds;
+ arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
@@ -259,12 +258,7 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- if (instruction_->CanThrowIntoCatchBlock()) {
- // Live registers will be restored in the catch block if caught.
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- }
- arm64_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
+ arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
@@ -295,9 +289,9 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
InvokeRuntimeCallingConvention calling_convention;
__ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex());
- int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
- : QUICK_ENTRY_POINT(pInitializeType);
- arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
+ QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
+ : kQuickInitializeType;
+ arm64_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
if (do_clinit_) {
CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
} else {
@@ -337,32 +331,60 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
public:
- explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {}
+ LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label)
+ : SlowPathCodeARM64(instruction),
+ temp_(temp),
+ adrp_label_(adrp_label) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ // temp_ is a scratch register. Make sure it's not used for saving/restoring registers.
+ UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
+ temps.Exclude(temp_);
+
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
__ Mov(calling_convention.GetRegisterAt(0).W(), string_index);
- arm64_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
+ arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
Primitive::Type type = instruction_->GetType();
arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
RestoreLiveRegisters(codegen, locations);
+
+ // Store the resolved String to the BSS entry.
+ const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile();
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // The string entry page address was preserved in temp_ thanks to kSaveEverything.
+ } else {
+ // For non-Baker read barrier, we need to re-calculate the address of the string entry page.
+ adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
+ arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_);
+ }
+ vixl::aarch64::Label* strp_label =
+ arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_);
+ {
+ SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
+ __ Bind(strp_label);
+ __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
+ MemOperand(temp_, /* offset placeholder */ 0));
+ }
+
__ B(GetExitLabel());
}
const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
private:
+ const Register temp_;
+ vixl::aarch64::Label* adrp_label_;
+
DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
};
@@ -377,8 +399,10 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
- arm64_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
+ arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
@@ -398,11 +422,8 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- arm64_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
+ arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
@@ -460,8 +481,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
object_class, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot);
if (instruction_->IsInstanceOf()) {
- arm64_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
+ arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t,
const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
@@ -469,7 +489,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
} else {
DCHECK(instruction_->IsCheckCast());
- arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
+ arm64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
}
@@ -480,7 +500,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
}
const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; }
- bool IsFatal() const { return is_fatal_; }
+ bool IsFatal() const OVERRIDE { return is_fatal_; }
private:
const bool is_fatal_;
@@ -496,11 +516,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -539,10 +555,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
@@ -576,11 +589,21 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
}
}
-// Slow path marking an object during a read barrier.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
public:
- ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj)
- : SlowPathCodeARM64(instruction), obj_(obj) {
+ ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location ref)
+ : SlowPathCodeARM64(instruction), ref_(ref) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -589,15 +612,18 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg()));
+ DCHECK(ref_.IsRegister()) << ref_;
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
instruction_->IsLoadClass() ||
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -606,37 +632,207 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- DCHECK_NE(obj_.reg(), LR);
- DCHECK_NE(obj_.reg(), WSP);
- DCHECK_NE(obj_.reg(), WZR);
- DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
+ DCHECK_NE(ref_.reg(), LR);
+ DCHECK_NE(ref_.reg(), WSP);
+ DCHECK_NE(ref_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_.reg(), IP0);
+ DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
// "Compact" slow path, saving two moves.
//
// Instead of using the standard runtime calling convention (input
// and output in W0):
//
- // W0 <- obj
+ // W0 <- ref
// W0 <- ReadBarrierMark(W0)
- // obj <- W0
+ // ref <- W0
//
- // we just use rX (the register holding `obj`) as input and output
+ // we just use rX (the register containing `ref`) as input and output
// of a dedicated entrypoint:
//
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(obj_.reg());
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
// This runtime call does not require a stack map.
arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ B(GetExitLabel());
}
private:
- const Location obj_;
+ // The location (register) of the marked object reference.
+ const Location ref_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
};
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathARM64 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Register temp)
+ : SlowPathCodeARM64(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_offset_(field_offset),
+ temp_(temp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierMarkAndUpdateFieldSlowPathARM64";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register ref_reg = WRegisterFrom(ref_);
+ DCHECK(locations->CanCall());
+ DCHECK(ref_.IsRegister()) << ref_;
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+ DCHECK(field_offset_.IsRegister()) << field_offset_;
+
+ __ Bind(GetEntryLabel());
+
+ // Save the old reference.
+ // Note that we cannot use IP to save the old reference, as IP is
+ // used internally by the ReadBarrierMarkRegX entry point, and we
+ // need the old reference after the call to that entry point.
+ DCHECK_NE(LocationFrom(temp_).reg(), IP0);
+ __ Mov(temp_.W(), ref_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ DCHECK_NE(ref_.reg(), LR);
+ DCHECK_NE(ref_.reg(), WSP);
+ DCHECK_NE(ref_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_.reg(), IP0);
+ DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in W0):
+ //
+ // W0 <- ref
+ // W0 <- ReadBarrierMark(W0)
+ // ref <- W0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
+ // This runtime call does not require a stack map.
+ arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*(obj_ + field_offset_)`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // LDXR/CMP/BNE sequence of instructions in the compare-and-set
+ // (CAS) operation below would abort the CAS, leaving the field
+ // as-is.
+ vixl::aarch64::Label done;
+ __ Cmp(temp_.W(), ref_reg);
+ __ B(eq, &done);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achieved
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+
+ MacroAssembler* masm = arm64_codegen->GetVIXLAssembler();
+ UseScratchRegisterScope temps(masm);
+
+ // Convenience aliases.
+ Register base = obj_.W();
+ Register offset = XRegisterFrom(field_offset_);
+ Register expected = temp_.W();
+ Register value = ref_reg;
+ Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
+ Register tmp_value = temps.AcquireW(); // Value in memory.
+
+ __ Add(tmp_ptr, base.X(), Operand(offset));
+
+ if (kPoisonHeapReferences) {
+ arm64_codegen->GetAssembler()->PoisonHeapReference(expected);
+ if (value.Is(expected)) {
+ // Do not poison `value`, as it is the same register as
+ // `expected`, which has just been poisoned.
+ } else {
+ arm64_codegen->GetAssembler()->PoisonHeapReference(value);
+ }
+ }
+
+ // do {
+ // tmp_value = [tmp_ptr] - expected;
+ // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
+
+ vixl::aarch64::Label loop_head, comparison_failed, exit_loop;
+ __ Bind(&loop_head);
+ __ Ldxr(tmp_value, MemOperand(tmp_ptr));
+ __ Cmp(tmp_value, expected);
+ __ B(&comparison_failed, ne);
+ __ Stxr(tmp_value, value, MemOperand(tmp_ptr));
+ __ Cbnz(tmp_value, &loop_head);
+ __ B(&exit_loop);
+ __ Bind(&comparison_failed);
+ __ Clrex();
+ __ Bind(&exit_loop);
+
+ if (kPoisonHeapReferences) {
+ arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected);
+ if (value.Is(expected)) {
+ // Do not unpoison `value`, as it is the same register as
+ // `expected`, which has just been unpoisoned.
+ } else {
+ arm64_codegen->GetAssembler()->UnpoisonHeapReference(value);
+ }
+ }
+
+ __ Bind(&done);
+ __ B(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const Register obj_;
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset_;
+
+ const Register temp_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
public:
@@ -750,7 +946,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
<< instruction_->AsInvoke()->GetIntrinsic();
- DCHECK_EQ(offset_, 0U);
+ DCHECK_EQ(offset_, 0u);
DCHECK(index_.IsRegister());
}
}
@@ -777,7 +973,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
}
- arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+ arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
instruction_,
instruction_->GetDexPc(),
this);
@@ -856,7 +1052,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
// which would emit a 32-bit move, as `type` is a (32-bit wide)
// reference type (`Primitive::kPrimNot`).
__ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
- arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+ arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
instruction_,
instruction_->GetDexPc(),
this);
@@ -1038,7 +1234,15 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
// ... : other preserved fp registers.
// ... : reserved frame space.
// sp[0] : current method.
- __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
+
+ // Save the current method if we need it. Note that we do not
+ // do this in HCurrentMethod, as the instruction might have been removed
+ // in the SSA graph.
+ if (RequiresCurrentMethod()) {
+ __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
+ } else {
+ __ Claim(frame_size);
+ }
GetAssembler()->cfi().AdjustCFAOffset(frame_size);
GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(),
frame_size - GetCoreSpillSize());
@@ -1463,12 +1667,18 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
- DCHECK(src.IsFPRegister());
DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
+ Register temp_src;
+ if (src.IsZero()) {
+ // The zero register is used to avoid synthesizing zero constants.
+ temp_src = Register(src);
+ } else {
+ DCHECK(src.IsFPRegister());
+ temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
+ __ Fmov(temp_src, FPRegister(src));
+ }
- Register temp = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
- __ Fmov(temp, FPRegister(src));
- __ Stlr(temp, base);
+ __ Stlr(temp_src, base);
break;
}
case Primitive::kPrimVoid:
@@ -1480,27 +1690,21 @@ void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value(),
- instruction,
- dex_pc,
- slow_path);
-}
-
-void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path) {
- ValidateInvokeRuntime(instruction, slow_path);
- BlockPoolsScope block_pools(GetVIXLAssembler());
- __ Ldr(lr, MemOperand(tr, entry_point_offset));
- __ Blr(lr);
- RecordPcInfo(instruction, dex_pc, slow_path);
+ ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+ GenerateInvokeRuntime(GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value());
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
}
void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
HInstruction* instruction,
SlowPathCode* slow_path) {
ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ GenerateInvokeRuntime(entry_point_offset);
+}
+
+void CodeGeneratorARM64::GenerateInvokeRuntime(int32_t entry_point_offset) {
BlockPoolsScope block_pools(GetVIXLAssembler());
__ Ldr(lr, MemOperand(tr, entry_point_offset));
__ Blr(lr);
@@ -1641,6 +1845,9 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
object_field_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
@@ -1707,7 +1914,9 @@ void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
+ if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ } else if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
locations->SetInAt(1, Location::RequiresFpuRegister());
} else {
locations->SetInAt(1, Location::RequiresRegister());
@@ -1721,7 +1930,7 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
BlockPoolsScope block_pools(GetVIXLAssembler());
Register obj = InputRegisterAt(instruction, 0);
- CPURegister value = InputCPURegisterAt(instruction, 1);
+ CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
CPURegister source = value;
Offset offset = field_info.GetFieldOffset();
Primitive::Type field_type = field_info.GetFieldType();
@@ -2064,6 +2273,9 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
object_array_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -2085,7 +2297,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
Location index = locations->InAt(1);
Location out = locations->Out();
uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
-
+ const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
+ instruction->IsStringCharAt();
MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
// Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
@@ -2103,9 +2316,28 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
} else {
// General case.
MemOperand source = HeapOperand(obj);
+ Register length;
+ if (maybe_compressed_char_at) {
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ length = temps.AcquireW();
+ __ Ldr(length, HeapOperand(obj, count_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
if (index.IsConstant()) {
- offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
- source = HeapOperand(obj, offset);
+ if (maybe_compressed_char_at) {
+ vixl::aarch64::Label uncompressed_load, done;
+ __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+ __ Ldrb(Register(OutputCPURegister(instruction)),
+ HeapOperand(obj, offset + Int64ConstantFrom(index)));
+ __ B(&done);
+ __ Bind(&uncompressed_load);
+ __ Ldrh(Register(OutputCPURegister(instruction)),
+ HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
+ __ Bind(&done);
+ } else {
+ offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+ source = HeapOperand(obj, offset);
+ }
} else {
Register temp = temps.AcquireSameSizeAs(obj);
if (instruction->GetArray()->IsIntermediateAddress()) {
@@ -2123,11 +2355,24 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
} else {
__ Add(temp, obj, offset);
}
- source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+ if (maybe_compressed_char_at) {
+ vixl::aarch64::Label uncompressed_load, done;
+ __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+ __ Ldrb(Register(OutputCPURegister(instruction)),
+ HeapOperand(temp, XRegisterFrom(index), LSL, 0));
+ __ B(&done);
+ __ Bind(&uncompressed_load);
+ __ Ldrh(Register(OutputCPURegister(instruction)),
+ HeapOperand(temp, XRegisterFrom(index), LSL, 1));
+ __ Bind(&done);
+ } else {
+ source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+ }
+ }
+ if (!maybe_compressed_char_at) {
+ codegen_->Load(type, OutputCPURegister(instruction), source);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
-
- codegen_->Load(type, OutputCPURegister(instruction), source);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
if (type == Primitive::kPrimNot) {
static_assert(
@@ -2151,25 +2396,30 @@ void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
+ vixl::aarch64::Register out = OutputRegister(instruction);
BlockPoolsScope block_pools(GetVIXLAssembler());
- __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), offset));
+ __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out compression flag from String's array length.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ And(out.W(), out.W(), Operand(static_cast<int32_t>(INT32_MAX)));
+ }
}
void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
Primitive::Type value_type = instruction->GetComponentType();
bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
- bool object_array_set_with_read_barrier =
- kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+ may_need_runtime_call_for_type_check ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (Primitive::IsFloatingPointType(value_type)) {
+ if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ } else if (Primitive::IsFloatingPointType(value_type)) {
locations->SetInAt(2, Location::RequiresFpuRegister());
} else {
locations->SetInAt(2, Location::RequiresRegister());
@@ -2184,7 +2434,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
Register array = InputRegisterAt(instruction, 0);
- CPURegister value = InputCPURegisterAt(instruction, 2);
+ CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
CPURegister source = value;
Location index = locations->InAt(1);
size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
@@ -2223,7 +2473,6 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
codegen_->Store(value_type, value, destination);
codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
- DCHECK(needs_write_barrier);
DCHECK(!instruction->GetArray()->IsIntermediateAddress());
vixl::aarch64::Label done;
SlowPathCodeARM64* slow_path = nullptr;
@@ -2261,65 +2510,44 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
__ Bind(&non_zero);
}
- if (kEmitCompilerReadBarrier) {
- // When read barriers are enabled, the type checking
- // instrumentation requires two read barriers:
- //
- // __ Mov(temp2, temp);
- // // /* HeapReference<Class> */ temp = temp->component_type_
- // __ Ldr(temp, HeapOperand(temp, component_offset));
- // codegen_->GenerateReadBarrierSlow(
- // instruction, temp_loc, temp_loc, temp2_loc, component_offset);
- //
- // // /* HeapReference<Class> */ temp2 = value->klass_
- // __ Ldr(temp2, HeapOperand(Register(value), class_offset));
- // codegen_->GenerateReadBarrierSlow(
- // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
- //
- // __ Cmp(temp, temp2);
- //
- // However, the second read barrier may trash `temp`, as it
- // is a temporary register, and as such would not be saved
- // along with live registers before calling the runtime (nor
- // restored afterwards). So in this case, we bail out and
- // delegate the work to the array set slow path.
- //
- // TODO: Extend the register allocator to support a new
- // "(locally) live temp" location so as to avoid always
- // going into the slow path when read barriers are enabled.
- __ B(slow_path->GetEntryLabel());
- } else {
- Register temp2 = temps.AcquireSameSizeAs(array);
- // /* HeapReference<Class> */ temp = array->klass_
- __ Ldr(temp, HeapOperand(array, class_offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Note that when Baker read barriers are enabled, the type
+ // checks are performed without read barriers. This is fine,
+ // even in the case where a class object is in the from-space
+ // after the flip, as a comparison involving such a type would
+ // not produce a false positive; it may of course produce a
+ // false negative, in which case we would take the ArraySet
+ // slow path.
+
+ Register temp2 = temps.AcquireSameSizeAs(array);
+ // /* HeapReference<Class> */ temp = array->klass_
+ __ Ldr(temp, HeapOperand(array, class_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+ // /* HeapReference<Class> */ temp = temp->component_type_
+ __ Ldr(temp, HeapOperand(temp, component_offset));
+ // /* HeapReference<Class> */ temp2 = value->klass_
+ __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+ // If heap poisoning is enabled, no need to unpoison `temp`
+ // nor `temp2`, as we are comparing two poisoned references.
+ __ Cmp(temp, temp2);
+ temps.Release(temp2);
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ vixl::aarch64::Label do_put;
+ __ B(eq, &do_put);
+ // If heap poisoning is enabled, the `temp` reference has
+ // not been unpoisoned yet; unpoison it now.
GetAssembler()->MaybeUnpoisonHeapReference(temp);
- // /* HeapReference<Class> */ temp = temp->component_type_
- __ Ldr(temp, HeapOperand(temp, component_offset));
- // /* HeapReference<Class> */ temp2 = value->klass_
- __ Ldr(temp2, HeapOperand(Register(value), class_offset));
- // If heap poisoning is enabled, no need to unpoison `temp`
- // nor `temp2`, as we are comparing two poisoned references.
- __ Cmp(temp, temp2);
-
- if (instruction->StaticTypeOfArrayIsObjectArray()) {
- vixl::aarch64::Label do_put;
- __ B(eq, &do_put);
- // If heap poisoning is enabled, the `temp` reference has
- // not been unpoisoned yet; unpoison it now.
- GetAssembler()->MaybeUnpoisonHeapReference(temp);
-
- // /* HeapReference<Class> */ temp = temp->super_class_
- __ Ldr(temp, HeapOperand(temp, super_offset));
- // If heap poisoning is enabled, no need to unpoison
- // `temp`, as we are comparing against null below.
- __ Cbnz(temp, slow_path->GetEntryLabel());
- __ Bind(&do_put);
- } else {
- __ B(ne, slow_path->GetEntryLabel());
- }
- temps.Release(temp2);
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ __ Ldr(temp, HeapOperand(temp, super_offset));
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp`, as we are comparing against null below.
+ __ Cbnz(temp, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ B(ne, slow_path->GetEntryLabel());
}
}
@@ -2354,22 +2582,19 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
}
void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
BoundsCheckSlowPathARM64* slow_path =
new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction);
codegen_->AddSlowPath(slow_path);
-
__ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
__ B(slow_path->GetEntryLabel(), hs);
}
@@ -2733,14 +2958,8 @@ void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
}
void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
@@ -2972,6 +3191,7 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -3105,6 +3325,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -3112,6 +3333,7 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -3121,6 +3343,9 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// The "out" register is used as a temporary, so it overlaps with the inputs.
@@ -3601,7 +3826,7 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method ATTRIBUTE_UNUSED) {
+ HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
// On ARM64 we support all dispatch types.
return desired_dispatch_info;
}
@@ -3627,10 +3852,13 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
// Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
- case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+ case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
+ uint32_t offset =
+ GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
// temp = thread->string_init_entrypoint
- __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset()));
+ __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
@@ -3645,22 +3873,14 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
// Add ADRP with its PC-relative DexCache access patch.
- const DexFile& dex_file = *invoke->GetTargetMethod().dex_file;
+ const DexFile& dex_file = invoke->GetDexFile();
uint32_t element_offset = invoke->GetDexCacheArrayOffset();
vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
- {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(adrp_label);
- __ adrp(XRegisterFrom(temp), /* offset placeholder */ 0);
- }
+ EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
// Add LDR with its PC-relative DexCache access patch.
vixl::aarch64::Label* ldr_label =
NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
- {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(ldr_label);
- __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), /* offset placeholder */ 0));
- }
+ EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
@@ -3693,7 +3913,8 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
__ Bl(&frame_entry_label_);
break;
case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
- relative_call_patches_.emplace_back(invoke->GetTargetMethod());
+ relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+ invoke->GetTargetMethod().dex_method_index);
vixl::aarch64::Label* label = &relative_call_patches_.back().label;
SingleEmissionCheckScope guard(GetVIXLAssembler());
__ Bind(label);
@@ -3815,6 +4036,45 @@ vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddress
return DeduplicateUint64Literal(address);
}
+void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
+ vixl::aarch64::Register reg) {
+ DCHECK(reg.IsX());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
+ __ Bind(fixup_label);
+ __ adrp(reg, /* offset placeholder */ 0);
+}
+
+void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
+ vixl::aarch64::Register out,
+ vixl::aarch64::Register base) {
+ DCHECK(out.IsX());
+ DCHECK(base.IsX());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
+ __ Bind(fixup_label);
+ __ add(out, base, Operand(/* offset placeholder */ 0));
+}
+
+void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
+ vixl::aarch64::Register out,
+ vixl::aarch64::Register base) {
+ DCHECK(base.IsX());
+ SingleEmissionCheckScope guard(GetVIXLAssembler());
+ __ Bind(fixup_label);
+ __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
+}
+
+template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
+ const ArenaDeque<PcRelativePatchInfo>& infos,
+ ArenaVector<LinkerPatch>* linker_patches) {
+ for (const PcRelativePatchInfo& info : infos) {
+ linker_patches->push_back(Factory(info.label.GetLocation(),
+ &info.target_dex_file,
+ info.pc_insn_label->GetLocation(),
+ info.offset_or_index));
+ }
+}
+
void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -3842,10 +4102,9 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
target_method.dex_file,
target_method.dex_method_index));
}
- for (const MethodPatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) {
- linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.GetLocation(),
- info.target_method.dex_file,
- info.target_method.dex_method_index));
+ for (const PatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) {
+ linker_patches->push_back(
+ LinkerPatch::RelativeCodePatch(info.label.GetLocation(), &info.dex_file, info.index));
}
for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
@@ -3860,11 +4119,12 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
target_string.dex_file,
target_string.string_index));
}
- for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
- linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.GetLocation(),
- &info.target_dex_file,
- info.pc_insn_label->GetLocation(),
- info.offset_or_index));
+ if (!GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ linker_patches);
+ } else {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
+ linker_patches);
}
for (const auto& entry : boot_image_type_patches_) {
const TypeReference& target_type = entry.first;
@@ -3873,12 +4133,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
target_type.dex_file,
target_type.type_index));
}
- for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
- linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.GetLocation(),
- &info.target_dex_file,
- info.pc_insn_label->GetLocation(),
- info.offset_or_index));
- }
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+ linker_patches);
for (const auto& entry : boot_image_address_patches_) {
DCHECK(GetCompilerOptions().GetIncludePatchInformation());
vixl::aarch64::Literal<uint32_t>* literal = entry.second;
@@ -3946,17 +4202,6 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
- if (kEmitCompilerReadBarrier) {
- switch (desired_class_load_kind) {
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageAddress:
- // TODO: Implement for read barrier.
- return HLoadClass::LoadKind::kDexCacheViaMethod;
- default:
- break;
- }
- }
switch (desired_class_load_kind) {
case HLoadClass::LoadKind::kReferrersClass:
break;
@@ -3991,10 +4236,15 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
return;
}
- LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
@@ -4006,10 +4256,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
if (cls->NeedsAccessCheck()) {
codegen_->MoveConstant(cls->GetLocations()->GetTemp(0), cls->GetTypeIndex());
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
- cls,
- cls->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
@@ -4017,6 +4264,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
Location out_loc = cls->GetLocations()->Out();
Register out = OutputRegister(cls);
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
bool generate_null_check = false;
switch (cls->GetLoadKind()) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -4024,38 +4272,34 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
DCHECK(!cls->MustGenerateClinitCheck());
// /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
Register current_method = InputRegisterAt(cls, 0);
- GenerateGcRootFieldLoad(
- cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ current_method,
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ /* fixup_label */ nullptr,
+ requires_read_barrier);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!requires_read_barrier);
__ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
cls->GetTypeIndex()));
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!requires_read_barrier);
// Add ADRP with its PC-relative type patch.
const DexFile& dex_file = cls->GetDexFile();
uint32_t type_index = cls->GetTypeIndex();
vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
- {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(adrp_label);
- __ adrp(out.X(), /* offset placeholder */ 0);
- }
+ codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
// Add ADD with its PC-relative type patch.
vixl::aarch64::Label* add_label =
codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
- {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(add_label);
- __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
- }
+ codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
break;
}
case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!requires_read_barrier);
DCHECK(cls->GetAddress() != 0u && IsUint<32>(cls->GetAddress()));
__ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(cls->GetAddress()));
break;
@@ -4073,7 +4317,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
uint32_t offset = cls->GetAddress() & MaxInt<uint64_t>(offset_bits);
__ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address));
// /* GcRoot<mirror::Class> */ out = *(base_address + offset)
- GenerateGcRootFieldLoad(cls, out_loc, out.X(), offset);
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ out.X(),
+ offset,
+ /* fixup_label */ nullptr,
+ requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
}
@@ -4083,16 +4332,17 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
uint32_t element_offset = cls->GetDexCacheElementOffset();
vixl::aarch64::Label* adrp_label =
codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
- {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(adrp_label);
- __ adrp(out.X(), /* offset placeholder */ 0);
- }
+ codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
// Add LDR with its PC-relative DexCache access patch.
vixl::aarch64::Label* ldr_label =
codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
// /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
- GenerateGcRootFieldLoad(cls, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ out.X(),
+ /* offset placeholder */ 0,
+ ldr_label,
+ requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
}
@@ -4104,8 +4354,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
Register current_method = InputRegisterAt(cls, 0);
__ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
// /* GcRoot<mirror::Class> */ out = out[type_index]
- GenerateGcRootFieldLoad(
- cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ out.X(),
+ CodeGenerator::GetCacheOffset(cls->GetTypeIndex()),
+ /* fixup_label */ nullptr,
+ requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
}
@@ -4151,17 +4405,6 @@ void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear A
HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
- if (kEmitCompilerReadBarrier) {
- switch (desired_string_load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageAddress:
- // TODO: Implement for read barrier.
- return HLoadString::LoadKind::kDexCacheViaMethod;
- default:
- break;
- }
- }
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
DCHECK(!GetCompilerOptions().GetCompilePic());
@@ -4174,7 +4417,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
case HLoadString::LoadKind::kDexCacheAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kDexCachePcRelative:
+ case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadString::LoadKind::kDexCacheViaMethod:
@@ -4184,110 +4427,96 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
}
void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
- LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
- ? LocationSummary::kCallOnSlowPath
+ LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+ ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod)
+ ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kCallOnSlowPath)
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
- locations->SetInAt(0, Location::RequiresRegister());
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
+ } else {
+ locations->SetOut(Location::RequiresRegister());
+ if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and/or marking to save everything, including temps.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+ DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
+ RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
+ Primitive::kPrimNot).GetCode());
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
- locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
- Location out_loc = load->GetLocations()->Out();
Register out = OutputRegister(load);
switch (load->GetLoadKind()) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- DCHECK(!kEmitCompilerReadBarrier);
__ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
load->GetStringIndex()));
return; // No dex cache slow path.
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
- DCHECK(!kEmitCompilerReadBarrier);
// Add ADRP with its PC-relative String patch.
const DexFile& dex_file = load->GetDexFile();
uint32_t string_index = load->GetStringIndex();
+ DCHECK(codegen_->GetCompilerOptions().IsBootImage());
vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
- {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(adrp_label);
- __ adrp(out.X(), /* offset placeholder */ 0);
- }
+ codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
// Add ADD with its PC-relative String patch.
vixl::aarch64::Label* add_label =
codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
- {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(add_label);
- __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
- }
+ codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
return; // No dex cache slow path.
}
case HLoadString::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
DCHECK(load->GetAddress() != 0u && IsUint<32>(load->GetAddress()));
__ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress()));
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads
- // that gives a 16KiB range. To try and reduce the number of literals if we load
- // multiple strings, simply split the dex cache address to a 16KiB aligned base
- // loaded from a literal and the remaining offset embedded in the load.
- static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
- DCHECK_ALIGNED(load->GetAddress(), 4u);
- constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2;
- uint64_t base_address = load->GetAddress() & ~MaxInt<uint64_t>(offset_bits);
- uint32_t offset = load->GetAddress() & MaxInt<uint64_t>(offset_bits);
- __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address));
- // /* GcRoot<mirror::String> */ out = *(base_address + offset)
- GenerateGcRootFieldLoad(load, out_loc, out.X(), offset);
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- // Add ADRP with its PC-relative DexCache access patch.
+ case HLoadString::LoadKind::kBssEntry: {
+ // Add ADRP with its PC-relative String .bss entry patch.
const DexFile& dex_file = load->GetDexFile();
- uint32_t element_offset = load->GetDexCacheElementOffset();
- vixl::aarch64::Label* adrp_label =
- codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
- {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(adrp_label);
- __ adrp(out.X(), /* offset placeholder */ 0);
- }
- // Add LDR with its PC-relative DexCache access patch.
+ uint32_t string_index = load->GetStringIndex();
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+ Register temp = temps.AcquireX();
+ vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
+ codegen_->EmitAdrpPlaceholder(adrp_label, temp);
+ // Add LDR with its PC-relative String patch.
vixl::aarch64::Label* ldr_label =
- codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
- // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- Register current_method = InputRegisterAt(load, 0);
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- break;
+ codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
+ // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
+ GenerateGcRootFieldLoad(load,
+ load->GetLocations()->Out(),
+ temp,
+ /* offset placeholder */ 0u,
+ ldr_label,
+ kEmitCompilerReadBarrier);
+ SlowPathCodeARM64* slow_path =
+ new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label);
+ codegen_->AddSlowPath(slow_path);
+ __ Cbz(out.X(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
}
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
- codegen_->AddSlowPath(slow_path);
- __ Cbz(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
+ __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex());
+ codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
}
void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
@@ -4307,11 +4536,9 @@ void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction
}
void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
- codegen_->InvokeRuntime(instruction->IsEnter()
- ? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject: kQuickUnlockObject,
+ instruction,
+ instruction->GetDexPc());
if (instruction->IsEnter()) {
CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
} else {
@@ -4415,10 +4642,7 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
__ Mov(type_index, instruction->GetTypeIndex());
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
- codegen_->InvokeRuntime(instruction->GetEntrypoint(),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
}
@@ -4447,10 +4671,7 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction)
__ Blr(lr);
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
} else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
}
}
@@ -4484,14 +4705,8 @@ void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
}
void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
locations->SetInAt(0, Location::RequiresRegister());
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
@@ -4616,9 +4831,8 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
- int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
- : QUICK_ENTRY_POINT(pFmod);
- codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr);
+ QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod;
+ codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
if (type == Primitive::kPrimFloat) {
CheckEntrypointTypes<kQuickFmodf, float, float, float>();
} else {
@@ -4776,7 +4990,9 @@ void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
}
void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -4801,8 +5017,7 @@ void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
}
void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
- codegen_->InvokeRuntime(
- QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr);
+ codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
@@ -5060,9 +5275,12 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
Location root,
Register obj,
uint32_t offset,
- vixl::aarch64::Label* fixup_label) {
+ vixl::aarch64::Label* fixup_label,
+ bool requires_read_barrier) {
+ DCHECK(fixup_label == nullptr || offset == 0u);
Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
- if (kEmitCompilerReadBarrier) {
+ if (requires_read_barrier) {
+ DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
@@ -5076,9 +5294,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
if (fixup_label == nullptr) {
__ Ldr(root_reg, MemOperand(obj, offset));
} else {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(fixup_label);
- __ ldr(root_reg, MemOperand(obj, offset));
+ codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
}
static_assert(
sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
@@ -5088,7 +5304,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path used to mark the GC root `root`.
+ // Slow path marking the GC root `root`.
SlowPathCodeARM64* slow_path =
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
codegen_->AddSlowPath(slow_path);
@@ -5107,9 +5323,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
if (fixup_label == nullptr) {
__ Add(root_reg.X(), obj.X(), offset);
} else {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(fixup_label);
- __ add(root_reg.X(), obj.X(), offset);
+ codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
}
// /* mirror::Object* */ root = root->Read()
codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
@@ -5120,9 +5334,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
if (fixup_label == nullptr) {
__ Ldr(root_reg, MemOperand(obj, offset));
} else {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(fixup_label);
- __ ldr(root_reg, MemOperand(obj, offset));
+ codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
}
// Note that GC roots are not affected by heap poisoning, thus we
// do not have to unpoison `root_reg` here.
@@ -5141,7 +5353,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
// /* HeapReference<Object> */ ref = *(obj + offset)
Location no_index = Location::NoLocation();
- size_t no_scale_factor = 0U;
+ size_t no_scale_factor = 0u;
GenerateReferenceLoadWithBakerReadBarrier(instruction,
ref,
obj,
@@ -5192,7 +5404,8 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
size_t scale_factor,
Register temp,
bool needs_null_check,
- bool use_load_acquire) {
+ bool use_load_acquire,
+ bool always_update_field) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
// If we are emitting an array load, we should not be using a
@@ -5239,12 +5452,15 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// Introduce a dependency on the lock_word including rb_state,
// to prevent load-load reordering, and without using
// a memory barrier (which would be more expensive).
- // obj is unchanged by this operation, but its value now depends on temp.
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
__ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
// The actual reference load.
if (index.IsValid()) {
- // Load types involving an "index".
+ // Load types involving an "index": ArrayGet,
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics.
if (use_load_acquire) {
// UnsafeGetObjectVolatile intrinsic case.
// Register `index` is not an index in an object array, but an
@@ -5253,9 +5469,9 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
DCHECK(instruction->GetLocations()->Intrinsified());
DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
<< instruction->AsInvoke()->GetIntrinsic();
- DCHECK_EQ(offset, 0U);
- DCHECK_EQ(scale_factor, 0U);
- DCHECK_EQ(needs_null_check, 0U);
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, 0u);
+ DCHECK_EQ(needs_null_check, 0u);
// /* HeapReference<Object> */ ref = *(obj + index)
MemOperand field = HeapOperand(obj, XRegisterFrom(index));
LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
@@ -5266,10 +5482,10 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
Load(type, ref_reg, HeapOperand(obj, computed_offset));
} else {
- Register temp2 = temps.AcquireW();
- __ Add(temp2, obj, offset);
- Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor));
- temps.Release(temp2);
+ Register temp3 = temps.AcquireW();
+ __ Add(temp3, obj, offset);
+ Load(type, ref_reg, HeapOperand(temp3, XRegisterFrom(index), LSL, scale_factor));
+ temps.Release(temp3);
}
}
} else {
@@ -5285,9 +5501,20 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// Object* ref = ref_addr->AsMirrorPtr()
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
- // Slow path used to mark the object `ref` when it is gray.
- SlowPathCodeARM64* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
+ // Slow path marking the object `ref` when it is gray.
+ SlowPathCodeARM64* slow_path;
+ if (always_update_field) {
+ // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports
+ // address of the form `obj + field_offset`, where `obj` is a
+ // register and `field_offset` is a register. Thus `offset` and
+ // `scale_factor` above are expected to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, 0u); /* "times 1" */
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64(
+ instruction, ref, obj, /* field_offset */ index, temp);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
+ }
AddSlowPath(slow_path);
// if (rb_state == ReadBarrier::gray_ptr_)
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 1b5fa857e7..7f54b4b6b2 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -27,11 +27,11 @@
#include "utils/arm64/assembler_arm64.h"
#include "utils/type_reference.h"
-// TODO: make vixl clean wrt -Wshadow.
+// TODO(VIXL): Make VIXL compile with -Wshadow.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
#pragma GCC diagnostic pop
namespace art {
@@ -289,12 +289,13 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
//
// root <- *(obj + offset)
//
- // while honoring read barriers (if any).
+ // while honoring read barriers if `requires_read_barrier` is true.
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
vixl::aarch64::Register obj,
uint32_t offset,
- vixl::aarch64::Label* fixup_label = nullptr);
+ vixl::aarch64::Label* fixup_label,
+ bool requires_read_barrier);
// Generate a floating-point comparison.
void GenerateFcmp(HInstruction* instruction);
@@ -491,12 +492,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path) OVERRIDE;
-
- void InvokeRuntime(int32_t offset,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path);
+ SlowPathCode* slow_path = nullptr) OVERRIDE;
// Generate code to invoke a runtime entry point, but do not record
// PC-related information in a stack map.
@@ -504,6 +500,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
HInstruction* instruction,
SlowPathCode* slow_path);
+ void GenerateInvokeRuntime(int32_t entry_point_offset);
+
ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; }
bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
@@ -524,7 +522,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method) OVERRIDE;
+ HInvokeStaticOrDirect* invoke) OVERRIDE;
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
@@ -566,6 +564,14 @@ class CodeGeneratorARM64 : public CodeGenerator {
vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
vixl::aarch64::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
+ void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg);
+ void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
+ vixl::aarch64::Register out,
+ vixl::aarch64::Register base);
+ void EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
+ vixl::aarch64::Register out,
+ vixl::aarch64::Register base);
+
void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
// Fast path implementation of ReadBarrier::Barrier for a heap
@@ -588,6 +594,13 @@ class CodeGeneratorARM64 : public CodeGenerator {
bool needs_null_check);
// Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
+ //
+ // Load the object reference located at the address
+ // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+ // `ref`, and mark it if needed.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl::aarch64::Register obj,
@@ -596,7 +609,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
size_t scale_factor,
vixl::aarch64::Register temp,
bool needs_null_check,
- bool use_load_acquire);
+ bool use_load_acquire,
+ bool always_update_field = false);
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
@@ -646,10 +660,10 @@ class CodeGeneratorARM64 : public CodeGenerator {
// artReadBarrierForRootSlow.
void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
- void GenerateNop();
+ void GenerateNop() OVERRIDE;
- void GenerateImplicitNullCheck(HNullCheck* instruction);
- void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
private:
using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
@@ -693,6 +707,10 @@ class CodeGeneratorARM64 : public CodeGenerator {
void EmitJumpTables();
+ template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
+ ArenaVector<LinkerPatch>* linker_patches);
+
// Labels for each block that will be compiled.
// We use a deque so that the `vixl::aarch64::Label` objects do not move in memory.
ArenaDeque<vixl::aarch64::Label> block_labels_; // Indexed by block id.
@@ -715,12 +733,12 @@ class CodeGeneratorARM64 : public CodeGenerator {
MethodToLiteralMap call_patches_;
// Relative call patch info.
// Using ArenaDeque<> which retains element addresses on push/emplace_back().
- ArenaDeque<MethodPatchInfo<vixl::aarch64::Label>> relative_call_patches_;
+ ArenaDeque<PatchInfo<vixl::aarch64::Label>> relative_call_patches_;
// PC-relative DexCache access info.
ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
// Deduplication map for boot string literals for kBootImageLinkTimeAddress.
BootStringToLiteralMap boot_image_string_patches_;
- // PC-relative String patch info.
+ // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
// Deduplication map for boot type literals for kBootImageLinkTimeAddress.
BootTypeToLiteralMap boot_image_type_patches_;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
new file mode 100644
index 0000000000..f1d11354fa
--- /dev/null
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -0,0 +1,4249 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm_vixl.h"
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "art_method.h"
+#include "code_generator_utils.h"
+#include "common_arm.h"
+#include "compiled_method.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "gc/accounting/card_table.h"
+#include "mirror/array-inl.h"
+#include "mirror/class-inl.h"
+#include "thread.h"
+#include "utils/arm/assembler_arm_vixl.h"
+#include "utils/arm/managed_register_arm.h"
+#include "utils/assembler.h"
+#include "utils/stack_checks.h"
+
+namespace art {
+namespace arm {
+
+namespace vixl32 = vixl::aarch32;
+using namespace vixl32; // NOLINT(build/namespaces)
+
+using helpers::DRegisterFrom;
+using helpers::DWARFReg;
+using helpers::FromLowSToD;
+using helpers::HighDRegisterFrom;
+using helpers::HighRegisterFrom;
+using helpers::InputOperandAt;
+using helpers::InputRegisterAt;
+using helpers::InputSRegisterAt;
+using helpers::InputVRegisterAt;
+using helpers::LocationFrom;
+using helpers::LowRegisterFrom;
+using helpers::LowSRegisterFrom;
+using helpers::OutputRegister;
+using helpers::OutputSRegister;
+using helpers::OutputVRegister;
+using helpers::RegisterFrom;
+using helpers::SRegisterFrom;
+
+using RegisterList = vixl32::RegisterList;
+
+static bool ExpectedPairLayout(Location location) {
+ // We expected this for both core and fpu register pairs.
+ return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
+}
+
+static constexpr size_t kArmInstrMaxSizeInBytes = 4u;
+
+#ifdef __
+#error "ARM Codegen VIXL macro-assembler macro already defined."
+#endif
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()-> // NOLINT
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
+
+// Marker that code is yet to be, and must, be implemented.
+#define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
+
+// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
+// for each live D registers they treat two corresponding S registers as live ones.
+//
+// Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
+// from a list of contiguous S registers a list of contiguous D registers (processing first/last
+// S registers corner cases) and save/restore this new list treating them as D registers.
+// - decreasing code size
+// - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
+// restored and then used in regular non SlowPath code as D register.
+//
+// For the following example (v means the S register is live):
+// D names: | D0 | D1 | D2 | D4 | ...
+// S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
+// Live? | | v | v | v | v | v | v | | ...
+//
+// S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
+// as D registers.
+//
+// TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers
+// for lists of floating-point registers.
+static size_t SaveContiguousSRegisterList(size_t first,
+ size_t last,
+ CodeGenerator* codegen,
+ size_t stack_offset) {
+ static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
+ static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
+ DCHECK_LE(first, last);
+ if ((first == last) && (first == 0)) {
+ __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
+ return stack_offset + kSRegSizeInBytes;
+ }
+ if (first % 2 == 1) {
+ __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
+ stack_offset += kSRegSizeInBytes;
+ }
+
+ bool save_last = false;
+ if (last % 2 == 0) {
+ save_last = true;
+ --last;
+ }
+
+ if (first < last) {
+ vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
+ DCHECK_EQ((last - first + 1) % 2, 0u);
+ size_t number_of_d_regs = (last - first + 1) / 2;
+
+ if (number_of_d_regs == 1) {
+ __ Vstr(d_reg, MemOperand(sp, stack_offset));
+ } else if (number_of_d_regs > 1) {
+ UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
+ vixl32::Register base = sp;
+ if (stack_offset != 0) {
+ base = temps.Acquire();
+ __ Add(base, sp, stack_offset);
+ }
+ __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
+ }
+ stack_offset += number_of_d_regs * kDRegSizeInBytes;
+ }
+
+ if (save_last) {
+ __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
+ stack_offset += kSRegSizeInBytes;
+ }
+
+ return stack_offset;
+}
+
+static size_t RestoreContiguousSRegisterList(size_t first,
+ size_t last,
+ CodeGenerator* codegen,
+ size_t stack_offset) {
+ static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
+ static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
+ DCHECK_LE(first, last);
+ if ((first == last) && (first == 0)) {
+ __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
+ return stack_offset + kSRegSizeInBytes;
+ }
+ if (first % 2 == 1) {
+ __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
+ stack_offset += kSRegSizeInBytes;
+ }
+
+ bool restore_last = false;
+ if (last % 2 == 0) {
+ restore_last = true;
+ --last;
+ }
+
+ if (first < last) {
+ vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
+ DCHECK_EQ((last - first + 1) % 2, 0u);
+ size_t number_of_d_regs = (last - first + 1) / 2;
+ if (number_of_d_regs == 1) {
+ __ Vldr(d_reg, MemOperand(sp, stack_offset));
+ } else if (number_of_d_regs > 1) {
+ UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
+ vixl32::Register base = sp;
+ if (stack_offset != 0) {
+ base = temps.Acquire();
+ __ Add(base, sp, stack_offset);
+ }
+ __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
+ }
+ stack_offset += number_of_d_regs * kDRegSizeInBytes;
+ }
+
+ if (restore_last) {
+ __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
+ stack_offset += kSRegSizeInBytes;
+ }
+
+ return stack_offset;
+}
+
+void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
+ size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+ size_t orig_offset = stack_offset;
+
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ for (uint32_t i : LowToHighBits(core_spills)) {
+ // If the register holds an object, update the stack mask.
+ if (locations->RegisterContainsObject(i)) {
+ locations->SetStackBit(stack_offset / kVRegSize);
+ }
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_core_stack_offsets_[i] = stack_offset;
+ stack_offset += kArmWordSize;
+ }
+
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
+
+ uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ orig_offset = stack_offset;
+ for (uint32_t i : LowToHighBits(fp_spills)) {
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_fpu_stack_offsets_[i] = stack_offset;
+ stack_offset += kArmWordSize;
+ }
+
+ stack_offset = orig_offset;
+ while (fp_spills != 0u) {
+ uint32_t begin = CTZ(fp_spills);
+ uint32_t tmp = fp_spills + (1u << begin);
+ fp_spills &= tmp; // Clear the contiguous range of 1s.
+ uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
+ stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
+ }
+ DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+}
+
+void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
+ size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+ size_t orig_offset = stack_offset;
+
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ for (uint32_t i : LowToHighBits(core_spills)) {
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ stack_offset += kArmWordSize;
+ }
+
+ // TODO(VIXL): Check the coherency of stack_offset after this with a test.
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
+
+ uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ while (fp_spills != 0u) {
+ uint32_t begin = CTZ(fp_spills);
+ uint32_t tmp = fp_spills + (1u << begin);
+ fp_spills &= tmp; // Clear the contiguous range of 1s.
+ uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
+ stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
+ }
+ DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+}
+
+class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+ explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ __ Bind(GetEntryLabel());
+ if (instruction_->CanThrowIntoCatchBlock()) {
+ // Live registers will be restored in the catch block if caught.
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ }
+ arm_codegen->InvokeRuntime(kQuickThrowNullPointer,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
+ }
+
+ bool IsFatal() const OVERRIDE { return true; }
+
+ const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARMVIXL"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
+};
+
+class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+ explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
+ : SlowPathCodeARMVIXL(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ __ Bind(GetEntryLabel());
+ arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
+ }
+
+ bool IsFatal() const OVERRIDE { return true; }
+
+ const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARMVIXL"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
+};
+
+class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+ SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
+ : SlowPathCodeARMVIXL(instruction), successor_(successor) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ __ Bind(GetEntryLabel());
+ arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ if (successor_ == nullptr) {
+ __ B(GetReturnLabel());
+ } else {
+ __ B(arm_codegen->GetLabelOf(successor_));
+ }
+ }
+
+ vixl32::Label* GetReturnLabel() {
+ DCHECK(successor_ == nullptr);
+ return &return_label_;
+ }
+
+ HBasicBlock* GetSuccessor() const {
+ return successor_;
+ }
+
+ const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARMVIXL"; }
+
+ private:
+ // If not null, the block to branch to after the suspend check.
+ HBasicBlock* const successor_;
+
+ // If `successor_` is null, the label to branch to after the suspend check.
+ vixl32::Label return_label_;
+
+ DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
+};
+
+class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+ LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit)
+ : SlowPathCodeARMVIXL(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+ DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = at_->GetLocations();
+
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ __ Mov(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
+ QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
+ : kQuickInitializeType;
+ arm_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
+ if (do_clinit_) {
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+ } else {
+ CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+ }
+
+ // Move the class to the desired location.
+ Location out = locations->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+ arm_codegen->Move32(locations->Out(), LocationFrom(r0));
+ }
+ RestoreLiveRegisters(codegen, locations);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARMVIXL"; }
+
+ private:
+ // The class this slow path will load.
+ HLoadClass* const cls_;
+
+ // The instruction where this slow path is happening.
+ // (Might be the load class or an initialization check).
+ HInstruction* const at_;
+
+ // The dex PC of `at_`.
+ const uint32_t dex_pc_;
+
+ // Whether to initialize the class.
+ const bool do_clinit_;
+
+ DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
+};
+
+inline vixl32::Condition ARMCondition(IfCondition cond) {
+ switch (cond) {
+ case kCondEQ: return eq;
+ case kCondNE: return ne;
+ case kCondLT: return lt;
+ case kCondLE: return le;
+ case kCondGT: return gt;
+ case kCondGE: return ge;
+ case kCondB: return lo;
+ case kCondBE: return ls;
+ case kCondA: return hi;
+ case kCondAE: return hs;
+ }
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+}
+
+// Maps signed condition to unsigned condition.
+inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) {
+ switch (cond) {
+ case kCondEQ: return eq;
+ case kCondNE: return ne;
+ // Signed to unsigned.
+ case kCondLT: return lo;
+ case kCondLE: return ls;
+ case kCondGT: return hi;
+ case kCondGE: return hs;
+ // Unsigned remain unchanged.
+ case kCondB: return lo;
+ case kCondBE: return ls;
+ case kCondA: return hi;
+ case kCondAE: return hs;
+ }
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+}
+
+inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
+ // The ARM condition codes can express all the necessary branches, see the
+ // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
+ // There is no dex instruction or HIR that would need the missing conditions
+ // "equal or unordered" or "not equal".
+ switch (cond) {
+ case kCondEQ: return eq;
+ case kCondNE: return ne /* unordered */;
+ case kCondLT: return gt_bias ? cc : lt /* unordered */;
+ case kCondLE: return gt_bias ? ls : le /* unordered */;
+ case kCondGT: return gt_bias ? hi /* unordered */ : gt;
+ case kCondGE: return gt_bias ? cs /* unordered */ : ge;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ }
+}
+
+void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
+ stream << vixl32::Register(reg);
+}
+
+void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
+ stream << vixl32::SRegister(reg);
+}
+
+static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
+ uint32_t mask = 0;
+ for (uint32_t i = regs.GetFirstSRegister().GetCode();
+ i <= regs.GetLastSRegister().GetCode();
+ ++i) {
+ mask |= (1 << i);
+ }
+ return mask;
+}
+
+#undef __
+
+CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
+ const ArmInstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options,
+ OptimizingCompilerStats* stats)
+ : CodeGenerator(graph,
+ kNumberOfCoreRegisters,
+ kNumberOfSRegisters,
+ kNumberOfRegisterPairs,
+ kCoreCalleeSaves.GetList(),
+ ComputeSRegisterListMask(kFpuCalleeSaves),
+ compiler_options,
+ stats),
+ block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ location_builder_(graph, this),
+ instruction_visitor_(graph, this),
+ move_resolver_(graph->GetArena(), this),
+ assembler_(graph->GetArena()),
+ isa_features_(isa_features) {
+ // Always save the LR register to mimic Quick.
+ AddAllocatedRegister(Location::RegisterLocation(LR));
+ // Give d14 and d15 as scratch registers to VIXL.
+ // They are removed from the register allocator in `SetupBlockedRegisters()`.
+ // TODO(VIXL): We need two scratch D registers for `EmitSwap` when swapping two double stack
+ // slots. If that is sufficiently rare, and we have pressure on FP registers, we could instead
+ // spill in `EmitSwap`. But if we actually are guaranteed to have 32 D registers, we could give
+ // d30 and d31 to VIXL to avoid removing registers from the allocator. If that is the case, we may
+ // also want to investigate giving those 14 other D registers to the allocator.
+ GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d14);
+ GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15);
+}
+
+#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->
+
+void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
+ GetAssembler()->FinalizeCode();
+ CodeGenerator::Finalize(allocator);
+}
+
+void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
+ // Stack register, LR and PC are always reserved.
+ blocked_core_registers_[SP] = true;
+ blocked_core_registers_[LR] = true;
+ blocked_core_registers_[PC] = true;
+
+ // Reserve thread register.
+ blocked_core_registers_[TR] = true;
+
+ // Reserve temp register.
+ blocked_core_registers_[IP] = true;
+
+ // Registers s28-s31 (d14-d15) are left to VIXL for scratch registers.
+ // (They are given to the `MacroAssembler` in `CodeGeneratorARMVIXL::CodeGeneratorARMVIXL`.)
+ blocked_fpu_registers_[28] = true;
+ blocked_fpu_registers_[29] = true;
+ blocked_fpu_registers_[30] = true;
+ blocked_fpu_registers_[31] = true;
+
+ if (GetGraph()->IsDebuggable()) {
+ // Stubs do not save callee-save floating point registers. If the graph
+ // is debuggable, we need to deal with these registers differently. For
+ // now, just block them.
+ for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode();
+ i <= kFpuCalleeSaves.GetLastSRegister().GetCode();
+ ++i) {
+ blocked_fpu_registers_[i] = true;
+ }
+ }
+}
+
+InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph,
+ CodeGeneratorARMVIXL* codegen)
+ : InstructionCodeGenerator(graph, codegen),
+ assembler_(codegen->GetAssembler()),
+ codegen_(codegen) {}
+
+void CodeGeneratorARMVIXL::ComputeSpillMask() {
+ core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
+ DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+ // There is no easy instruction to restore just the PC on thumb2. We spill and
+ // restore another arbitrary register.
+ core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister.GetCode());
+ fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
+ // We use vpush and vpop for saving and restoring floating point registers, which take
+ // a SRegister and the number of registers to save/restore after that SRegister. We
+ // therefore update the `fpu_spill_mask_` to also contain those registers not allocated,
+ // but in the range.
+ if (fpu_spill_mask_ != 0) {
+ uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_);
+ uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_);
+ for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) {
+ fpu_spill_mask_ |= (1 << i);
+ }
+ }
+}
+
+void CodeGeneratorARMVIXL::GenerateFrameEntry() {
+ bool skip_overflow_check =
+ IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
+ DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+ __ Bind(&frame_entry_label_);
+
+ if (HasEmptyFrame()) {
+ return;
+ }
+
+ if (!skip_overflow_check) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
+ // The load must immediately precede RecordPcInfo.
+ AssemblerAccurateScope aas(GetVIXLAssembler(),
+ kArmInstrMaxSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ ldr(temp, MemOperand(temp));
+ RecordPcInfo(nullptr, 0);
+ }
+
+ __ Push(RegisterList(core_spill_mask_));
+ GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
+ GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
+ 0,
+ core_spill_mask_,
+ kArmWordSize);
+ if (fpu_spill_mask_ != 0) {
+ uint32_t first = LeastSignificantBit(fpu_spill_mask_);
+
+ // Check that list is contiguous.
+ DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
+
+ __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
+ GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
+ GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0), 0, fpu_spill_mask_, kArmWordSize);
+ }
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ Sub(sp, sp, adjust);
+ GetAssembler()->cfi().AdjustCFAOffset(adjust);
+ GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
+}
+
+void CodeGeneratorARMVIXL::GenerateFrameExit() {
+ if (HasEmptyFrame()) {
+ __ Bx(lr);
+ return;
+ }
+ GetAssembler()->cfi().RememberState();
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ Add(sp, sp, adjust);
+ GetAssembler()->cfi().AdjustCFAOffset(-adjust);
+ if (fpu_spill_mask_ != 0) {
+ uint32_t first = LeastSignificantBit(fpu_spill_mask_);
+
+ // Check that list is contiguous.
+ DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
+
+ __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
+ GetAssembler()->cfi().AdjustCFAOffset(
+ -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_));
+ GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_);
+ }
+ // Pop LR into PC to return.
+ DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U);
+ uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode;
+ __ Pop(RegisterList(pop_mask));
+ GetAssembler()->cfi().RestoreState();
+ GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
+}
+
+void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) {
+ __ Bind(GetLabelOf(block));
+}
+
+void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
+ if (source.Equals(destination)) {
+ return;
+ }
+ if (destination.IsRegister()) {
+ if (source.IsRegister()) {
+ __ Mov(RegisterFrom(destination), RegisterFrom(source));
+ } else if (source.IsFpuRegister()) {
+ __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
+ } else {
+ GetAssembler()->LoadFromOffset(kLoadWord,
+ RegisterFrom(destination),
+ sp,
+ source.GetStackIndex());
+ }
+ } else if (destination.IsFpuRegister()) {
+ if (source.IsRegister()) {
+ __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
+ } else if (source.IsFpuRegister()) {
+ __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
+ } else {
+ GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
+ }
+ } else {
+ DCHECK(destination.IsStackSlot()) << destination;
+ if (source.IsRegister()) {
+ GetAssembler()->StoreToOffset(kStoreWord,
+ RegisterFrom(source),
+ sp,
+ destination.GetStackIndex());
+ } else if (source.IsFpuRegister()) {
+ GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
+ } else {
+ DCHECK(source.IsStackSlot()) << source;
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
+ GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
+ }
+ }
+}
+
+void CodeGeneratorARMVIXL::MoveConstant(Location destination ATTRIBUTE_UNUSED,
+ int32_t value ATTRIBUTE_UNUSED) {
+ TODO_VIXL32(FATAL);
+}
+
+void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
+ // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in
+ // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend.
+ HParallelMove move(GetGraph()->GetArena());
+ move.AddMove(src, dst, dst_type, nullptr);
+ GetMoveResolver()->EmitNativeCode(&move);
+}
+
+void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location ATTRIBUTE_UNUSED,
+ LocationSummary* locations ATTRIBUTE_UNUSED) {
+ TODO_VIXL32(FATAL);
+}
+
+void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
+ HInstruction* instruction,
+ uint32_t dex_pc,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+ GenerateInvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value());
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the
+ // previous instruction.
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
+}
+
+void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ GenerateInvokeRuntime(entry_point_offset);
+}
+
+void CodeGeneratorARMVIXL::GenerateInvokeRuntime(int32_t entry_point_offset) {
+ GetAssembler()->LoadFromOffset(kLoadWord, lr, tr, entry_point_offset);
+ __ Blx(lr);
+}
+
+void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
+ DCHECK(!successor->IsExitBlock());
+ HBasicBlock* block = got->GetBlock();
+ HInstruction* previous = got->GetPrevious();
+ HLoopInformation* info = block->GetLoopInformation();
+
+ if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+ GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+ return;
+ }
+ if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+ GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+ }
+ if (!codegen_->GoesToNextBlock(block, successor)) {
+ __ B(codegen_->GetLabelOf(successor));
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) {
+ got->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) {
+ HandleGoto(got, got->GetSuccessor());
+}
+
+void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
+ try_boundary->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
+ HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
+ if (!successor->IsExitBlock()) {
+ HandleGoto(try_boundary, successor);
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
+ exit->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateVcmp(HInstruction* instruction) {
+ Primitive::Type type = instruction->InputAt(0)->GetType();
+ Location lhs_loc = instruction->GetLocations()->InAt(0);
+ Location rhs_loc = instruction->GetLocations()->InAt(1);
+ if (rhs_loc.IsConstant()) {
+ // 0.0 is the only immediate that can be encoded directly in
+ // a VCMP instruction.
+ //
+ // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+ // specify that in a floating-point comparison, positive zero
+ // and negative zero are considered equal, so we can use the
+ // literal 0.0 for both cases here.
+ //
+ // Note however that some methods (Float.equal, Float.compare,
+ // Float.compareTo, Double.equal, Double.compare,
+ // Double.compareTo, Math.max, Math.min, StrictMath.max,
+ // StrictMath.min) consider 0.0 to be (strictly) greater than
+ // -0.0. So if we ever translate calls to these methods into a
+ // HCompare instruction, we must handle the -0.0 case with
+ // care here.
+ DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+ if (type == Primitive::kPrimFloat) {
+ __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ __ Vcmp(F64, FromLowSToD(LowSRegisterFrom(lhs_loc)), 0.0);
+ }
+ } else {
+ if (type == Primitive::kPrimFloat) {
+ __ Vcmp(InputSRegisterAt(instruction, 0), InputSRegisterAt(instruction, 1));
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ __ Vcmp(FromLowSToD(LowSRegisterFrom(lhs_loc)), FromLowSToD(LowSRegisterFrom(rhs_loc)));
+ }
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateFPJumps(HCondition* cond,
+ vixl32::Label* true_label,
+ vixl32::Label* false_label ATTRIBUTE_UNUSED) {
+ // To branch on the result of the FP compare we transfer FPSCR to APSR (encoded as PC in VMRS).
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ __ B(ARMFPCondition(cond->GetCondition(), cond->IsGtBias()), true_label);
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond,
+ vixl32::Label* true_label,
+ vixl32::Label* false_label) {
+ LocationSummary* locations = cond->GetLocations();
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+ IfCondition if_cond = cond->GetCondition();
+
+ vixl32::Register left_high = HighRegisterFrom(left);
+ vixl32::Register left_low = LowRegisterFrom(left);
+ IfCondition true_high_cond = if_cond;
+ IfCondition false_high_cond = cond->GetOppositeCondition();
+ vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part
+
+ // Set the conditions for the test, remembering that == needs to be
+ // decided using the low words.
+ // TODO: consider avoiding jumps with temporary and CMP low+SBC high
+ switch (if_cond) {
+ case kCondEQ:
+ case kCondNE:
+ // Nothing to do.
+ break;
+ case kCondLT:
+ false_high_cond = kCondGT;
+ break;
+ case kCondLE:
+ true_high_cond = kCondLT;
+ break;
+ case kCondGT:
+ false_high_cond = kCondLT;
+ break;
+ case kCondGE:
+ true_high_cond = kCondGT;
+ break;
+ case kCondB:
+ false_high_cond = kCondA;
+ break;
+ case kCondBE:
+ true_high_cond = kCondB;
+ break;
+ case kCondA:
+ false_high_cond = kCondB;
+ break;
+ case kCondAE:
+ true_high_cond = kCondA;
+ break;
+ }
+ if (right.IsConstant()) {
+ int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+ int32_t val_low = Low32Bits(value);
+ int32_t val_high = High32Bits(value);
+
+ __ Cmp(left_high, val_high);
+ if (if_cond == kCondNE) {
+ __ B(ARMCondition(true_high_cond), true_label);
+ } else if (if_cond == kCondEQ) {
+ __ B(ARMCondition(false_high_cond), false_label);
+ } else {
+ __ B(ARMCondition(true_high_cond), true_label);
+ __ B(ARMCondition(false_high_cond), false_label);
+ }
+ // Must be equal high, so compare the lows.
+ __ Cmp(left_low, val_low);
+ } else {
+ vixl32::Register right_high = HighRegisterFrom(right);
+ vixl32::Register right_low = LowRegisterFrom(right);
+
+ __ Cmp(left_high, right_high);
+ if (if_cond == kCondNE) {
+ __ B(ARMCondition(true_high_cond), true_label);
+ } else if (if_cond == kCondEQ) {
+ __ B(ARMCondition(false_high_cond), false_label);
+ } else {
+ __ B(ARMCondition(true_high_cond), true_label);
+ __ B(ARMCondition(false_high_cond), false_label);
+ }
+ // Must be equal high, so compare the lows.
+ __ Cmp(left_low, right_low);
+ }
+ // The last comparison might be unsigned.
+ // TODO: optimize cases where this is always true/false
+ __ B(final_condition, true_label);
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
+ vixl32::Label* true_target_in,
+ vixl32::Label* false_target_in) {
+ // Generated branching requires both targets to be explicit. If either of the
+ // targets is nullptr (fallthrough) use and bind `fallthrough` instead.
+ vixl32::Label fallthrough;
+ vixl32::Label* true_target = (true_target_in == nullptr) ? &fallthrough : true_target_in;
+ vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in;
+
+ Primitive::Type type = condition->InputAt(0)->GetType();
+ switch (type) {
+ case Primitive::kPrimLong:
+ GenerateLongComparesAndJumps(condition, true_target, false_target);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ GenerateVcmp(condition);
+ GenerateFPJumps(condition, true_target, false_target);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected compare type " << type;
+ }
+
+ if (false_target != &fallthrough) {
+ __ B(false_target);
+ }
+
+ if (true_target_in == nullptr || false_target_in == nullptr) {
+ __ Bind(&fallthrough);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
+ vixl32::Label* true_target,
+ vixl32::Label* false_target) {
+ HInstruction* cond = instruction->InputAt(condition_input_index);
+
+ if (true_target == nullptr && false_target == nullptr) {
+ // Nothing to do. The code always falls through.
+ return;
+ } else if (cond->IsIntConstant()) {
+ // Constant condition, statically compared against "true" (integer value 1).
+ if (cond->AsIntConstant()->IsTrue()) {
+ if (true_target != nullptr) {
+ __ B(true_target);
+ }
+ } else {
+ DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
+ if (false_target != nullptr) {
+ __ B(false_target);
+ }
+ }
+ return;
+ }
+
+ // The following code generates these patterns:
+ // (1) true_target == nullptr && false_target != nullptr
+ // - opposite condition true => branch to false_target
+ // (2) true_target != nullptr && false_target == nullptr
+ // - condition true => branch to true_target
+ // (3) true_target != nullptr && false_target != nullptr
+ // - condition true => branch to true_target
+ // - branch to false_target
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
+ // Condition has been materialized, compare the output to 0.
+ if (kIsDebugBuild) {
+ Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
+ DCHECK(cond_val.IsRegister());
+ }
+ if (true_target == nullptr) {
+ __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
+ } else {
+ __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
+ }
+ } else {
+ // Condition has not been materialized. Use its inputs as the comparison and
+ // its condition as the branch condition.
+ HCondition* condition = cond->AsCondition();
+
+ // If this is a long or FP comparison that has been folded into
+ // the HCondition, generate the comparison directly.
+ Primitive::Type type = condition->InputAt(0)->GetType();
+ if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+ GenerateCompareTestAndBranch(condition, true_target, false_target);
+ return;
+ }
+
+ LocationSummary* locations = cond->GetLocations();
+ DCHECK(locations->InAt(0).IsRegister());
+ vixl32::Register left = InputRegisterAt(cond, 0);
+ Location right = locations->InAt(1);
+ if (right.IsRegister()) {
+ __ Cmp(left, InputRegisterAt(cond, 1));
+ } else {
+ DCHECK(right.IsConstant());
+ __ Cmp(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+ }
+ if (true_target == nullptr) {
+ __ B(ARMCondition(condition->GetOppositeCondition()), false_target);
+ } else {
+ __ B(ARMCondition(condition->GetCondition()), true_target);
+ }
+ }
+
+ // If neither branch falls through (case 3), the conditional branch to `true_target`
+ // was already emitted (case 2) and we need to emit a jump to `false_target`.
+ if (true_target != nullptr && false_target != nullptr) {
+ __ B(false_target);
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
+ HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+ HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+ vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+ nullptr : codegen_->GetLabelOf(true_successor);
+ vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+ nullptr : codegen_->GetLabelOf(false_successor);
+ GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
+}
+
+void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+ if (Primitive::IsFloatingPointType(select->GetType())) {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+ if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+ locations->SetInAt(2, Location::RequiresRegister());
+ }
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
+ LocationSummary* locations = select->GetLocations();
+ vixl32::Label false_target;
+ GenerateTestAndBranch(select,
+ /* condition_input_index */ 2,
+ /* true_target */ nullptr,
+ &false_target);
+ codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+ __ Bind(&false_target);
+}
+
+void CodeGeneratorARMVIXL::GenerateNop() {
+ __ Nop();
+}
+
+void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
+ // Handle the long/FP comparisons made in instruction simplification.
+ switch (cond->InputAt(0)->GetType()) {
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
+ if (!cond->IsEmittedAtUseSite()) {
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
+ break;
+
+ // TODO(VIXL): https://android-review.googlesource.com/#/c/252265/
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ if (!cond->IsEmittedAtUseSite()) {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ }
+ break;
+
+ default:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
+ if (!cond->IsEmittedAtUseSite()) {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ }
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
+ if (cond->IsEmittedAtUseSite()) {
+ return;
+ }
+
+ vixl32::Register out = OutputRegister(cond);
+ vixl32::Label true_label, false_label;
+
+ switch (cond->InputAt(0)->GetType()) {
+ default: {
+ // Integer case.
+ __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
+ AssemblerAccurateScope aas(GetVIXLAssembler(),
+ kArmInstrMaxSizeInBytes * 3u,
+ CodeBufferCheckScope::kMaximumSize);
+ __ ite(ARMCondition(cond->GetCondition()));
+ __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1);
+ __ mov(ARMCondition(cond->GetOppositeCondition()), OutputRegister(cond), 0);
+ return;
+ }
+ case Primitive::kPrimLong:
+ GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ GenerateVcmp(cond);
+ GenerateFPJumps(cond, &true_label, &false_label);
+ break;
+ }
+
+ // Convert the jumps into the result.
+ vixl32::Label done_label;
+
+ // False case: result = 0.
+ __ Bind(&false_label);
+ __ Mov(out, 0);
+ __ B(&done_label);
+
+ // True case: result = 1.
+ __ Bind(&true_label);
+ __ Mov(out, 1);
+ __ Bind(&done_label);
+}
+
+void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
+ HandleCondition(comp);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitEqual(HEqual* comp) {
+ HandleCondition(comp);
+}
+
+void LocationsBuilderARMVIXL::VisitNotEqual(HNotEqual* comp) {
+ HandleCondition(comp);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNotEqual(HNotEqual* comp) {
+ HandleCondition(comp);
+}
+
+void LocationsBuilderARMVIXL::VisitLessThan(HLessThan* comp) {
+ HandleCondition(comp);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitLessThan(HLessThan* comp) {
+ HandleCondition(comp);
+}
+
+void LocationsBuilderARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+ HandleCondition(comp);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+ HandleCondition(comp);
+}
+
+void LocationsBuilderARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
+ HandleCondition(comp);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
+ HandleCondition(comp);
+}
+
+void LocationsBuilderARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+ HandleCondition(comp);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+ HandleCondition(comp);
+}
+
+void LocationsBuilderARMVIXL::VisitBelow(HBelow* comp) {
+ HandleCondition(comp);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitBelow(HBelow* comp) {
+ HandleCondition(comp);
+}
+
+void LocationsBuilderARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
+ HandleCondition(comp);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
+ HandleCondition(comp);
+}
+
+void LocationsBuilderARMVIXL::VisitAbove(HAbove* comp) {
+ HandleCondition(comp);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitAbove(HAbove* comp) {
+ HandleCondition(comp);
+}
+
+void LocationsBuilderARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
+ HandleCondition(comp);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
+ HandleCondition(comp);
+}
+
+void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+ memory_barrier->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+ codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+}
+
+void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
+ ret->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+ codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
+ locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType()));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret ATTRIBUTE_UNUSED) {
+ codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+
+ // TODO(VIXL): TryDispatch
+
+ HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+
+ // TODO(VIXL): TryGenerateIntrinsicCode
+
+ LocationSummary* locations = invoke->GetLocations();
+ DCHECK(locations->HasTemps());
+ codegen_->GenerateStaticOrDirectCall(invoke, locations->GetTemp(0));
+ // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the
+ // previous instruction.
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
+ InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+ CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+}
+
+void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ // TODO(VIXL): TryDispatch
+
+ HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ // TODO(VIXL): TryGenerateIntrinsicCode
+
+ codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
+ DCHECK(!codegen_->IsLeafMethod());
+ // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the
+ // previous instruction.
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
+ switch (neg->GetResultType()) {
+ case Primitive::kPrimInt: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ break;
+ }
+
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) {
+ LocationSummary* locations = neg->GetLocations();
+ Location out = locations->Out();
+ Location in = locations->InAt(0);
+ switch (neg->GetResultType()) {
+ case Primitive::kPrimInt:
+ __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0);
+ break;
+
+ case Primitive::kPrimLong:
+ // out.lo = 0 - in.lo (and update the carry/borrow (C) flag)
+ __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0);
+ // We cannot emit an RSC (Reverse Subtract with Carry)
+ // instruction here, as it does not exist in the Thumb-2
+ // instruction set. We use the following approach
+ // using SBC and SUB instead.
+ //
+ // out.hi = -C
+ __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out));
+ // out.hi = out.hi - in.hi
+ __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in));
+ break;
+
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ __ Vneg(OutputVRegister(neg), InputVRegisterAt(neg, 0));
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
+ Primitive::Type result_type = conversion->GetResultType();
+ Primitive::Type input_type = conversion->GetInputType();
+ DCHECK_NE(result_type, input_type);
+
+ // The float-to-long, double-to-long and long-to-float type conversions
+ // rely on a call to the runtime.
+ LocationSummary::CallKind call_kind =
+ (((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
+ && result_type == Primitive::kPrimLong)
+ || (input_type == Primitive::kPrimLong && result_type == Primitive::kPrimFloat))
+ ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kNoCall;
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+
+ // The Java language does not allow treating boolean as an integral type but
+ // our bit representation makes it safe.
+
+ switch (result_type) {
+ case Primitive::kPrimByte:
+ switch (input_type) {
+ case Primitive::kPrimLong:
+ // Type conversion from long to byte is a result of code transformations.
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ // Processing a Dex `int-to-byte' instruction.
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+ break;
+
+ case Primitive::kPrimShort:
+ switch (input_type) {
+ case Primitive::kPrimLong:
+ // Type conversion from long to short is a result of code transformations.
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimByte:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ // Processing a Dex `int-to-short' instruction.
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+ break;
+
+ case Primitive::kPrimInt:
+ switch (input_type) {
+ case Primitive::kPrimLong:
+ // Processing a Dex `long-to-int' instruction.
+ locations->SetInAt(0, Location::Any());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case Primitive::kPrimFloat:
+ // Processing a Dex `float-to-int' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+
+ case Primitive::kPrimDouble:
+ // Processing a Dex `double-to-int' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+ break;
+
+ case Primitive::kPrimLong:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ // Processing a Dex `int-to-long' instruction.
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case Primitive::kPrimFloat: {
+ // Processing a Dex `float-to-long' instruction.
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(LocationFrom(r0, r1));
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ // Processing a Dex `double-to-long' instruction.
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0),
+ calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(LocationFrom(r0, r1));
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+ break;
+
+ case Primitive::kPrimChar:
+ switch (input_type) {
+ case Primitive::kPrimLong:
+ // Type conversion from long to char is a result of code transformations.
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ // Processing a Dex `int-to-char' instruction.
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ // Processing a Dex `int-to-float' instruction.
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+
+ case Primitive::kPrimLong: {
+ // Processing a Dex `long-to-float' instruction.
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0),
+ calling_convention.GetRegisterAt(1)));
+ locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+ break;
+ }
+
+ case Primitive::kPrimDouble:
+ // Processing a Dex `double-to-float' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ };
+ break;
+
+ case Primitive::kPrimDouble:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ // Processing a Dex `int-to-double' instruction.
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+
+ case Primitive::kPrimLong:
+ // Processing a Dex `long-to-double' instruction.
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+
+ case Primitive::kPrimFloat:
+ // Processing a Dex `float-to-double' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ };
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
+ LocationSummary* locations = conversion->GetLocations();
+ Location out = locations->Out();
+ Location in = locations->InAt(0);
+ Primitive::Type result_type = conversion->GetResultType();
+ Primitive::Type input_type = conversion->GetInputType();
+ DCHECK_NE(result_type, input_type);
+ switch (result_type) {
+ case Primitive::kPrimByte:
+ switch (input_type) {
+ case Primitive::kPrimLong:
+ // Type conversion from long to byte is a result of code transformations.
+ __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
+ break;
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ // Processing a Dex `int-to-byte' instruction.
+ __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+ break;
+
+ case Primitive::kPrimShort:
+ switch (input_type) {
+ case Primitive::kPrimLong:
+ // Type conversion from long to short is a result of code transformations.
+ __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
+ break;
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimByte:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ // Processing a Dex `int-to-short' instruction.
+ __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+ break;
+
+ case Primitive::kPrimInt:
+ switch (input_type) {
+ case Primitive::kPrimLong:
+ // Processing a Dex `long-to-int' instruction.
+ DCHECK(out.IsRegister());
+ if (in.IsRegisterPair()) {
+ __ Mov(OutputRegister(conversion), LowRegisterFrom(in));
+ } else if (in.IsDoubleStackSlot()) {
+ GetAssembler()->LoadFromOffset(kLoadWord,
+ OutputRegister(conversion),
+ sp,
+ in.GetStackIndex());
+ } else {
+ DCHECK(in.IsConstant());
+ DCHECK(in.GetConstant()->IsLongConstant());
+ int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+ __ Mov(OutputRegister(conversion), static_cast<int32_t>(value));
+ }
+ break;
+
+ case Primitive::kPrimFloat: {
+ // Processing a Dex `float-to-int' instruction.
+ vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
+ __ Vcvt(I32, F32, temp, InputSRegisterAt(conversion, 0));
+ __ Vmov(OutputRegister(conversion), temp);
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ // Processing a Dex `double-to-int' instruction.
+ vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
+ __ Vcvt(I32, F64, temp_s, FromLowSToD(LowSRegisterFrom(in)));
+ __ Vmov(OutputRegister(conversion), temp_s);
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+ break;
+
+ case Primitive::kPrimLong:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ // Processing a Dex `int-to-long' instruction.
+ DCHECK(out.IsRegisterPair());
+ DCHECK(in.IsRegister());
+ __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0));
+ // Sign extension.
+ __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31);
+ break;
+
+ case Primitive::kPrimFloat:
+ // Processing a Dex `float-to-long' instruction.
+ codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
+ CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+ break;
+
+ case Primitive::kPrimDouble:
+ // Processing a Dex `double-to-long' instruction.
+ codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
+ CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+ break;
+
+ case Primitive::kPrimChar:
+ switch (input_type) {
+ case Primitive::kPrimLong:
+ // Type conversion from long to char is a result of code transformations.
+ __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
+ break;
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ // Processing a Dex `int-to-char' instruction.
+ __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar: {
+ // Processing a Dex `int-to-float' instruction.
+ __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
+ __ Vcvt(F32, I32, OutputSRegister(conversion), OutputSRegister(conversion));
+ break;
+ }
+
+ case Primitive::kPrimLong:
+ // Processing a Dex `long-to-float' instruction.
+ codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc());
+ CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+ break;
+
+ case Primitive::kPrimDouble:
+ // Processing a Dex `double-to-float' instruction.
+ __ Vcvt(F32, F64, OutputSRegister(conversion), FromLowSToD(LowSRegisterFrom(in)));
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ };
+ break;
+
+ case Primitive::kPrimDouble:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ // Boolean input is a result of code transformations.
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar: {
+ // Processing a Dex `int-to-double' instruction.
+ __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
+ __ Vcvt(F64, I32, FromLowSToD(LowSRegisterFrom(out)), LowSRegisterFrom(out));
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ // Processing a Dex `long-to-double' instruction.
+ vixl32::Register low = LowRegisterFrom(in);
+ vixl32::Register high = HighRegisterFrom(in);
+
+ vixl32::SRegister out_s = LowSRegisterFrom(out);
+ vixl32::DRegister out_d = FromLowSToD(out_s);
+
+ vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
+ vixl32::DRegister temp_d = FromLowSToD(temp_s);
+
+ vixl32::SRegister constant_s = LowSRegisterFrom(locations->GetTemp(1));
+ vixl32::DRegister constant_d = FromLowSToD(constant_s);
+
+ // temp_d = int-to-double(high)
+ __ Vmov(temp_s, high);
+ __ Vcvt(F64, I32, temp_d, temp_s);
+ // constant_d = k2Pow32EncodingForDouble
+ __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble));
+ // out_d = unsigned-to-double(low)
+ __ Vmov(out_s, low);
+ __ Vcvt(F64, U32, out_d, out_s);
+ // out_d += temp_d * constant_d
+ __ Vmla(F64, out_d, temp_d, constant_d);
+ break;
+ }
+
+ case Primitive::kPrimFloat:
+ // Processing a Dex `float-to-double' instruction.
+ __ Vcvt(F64, F32, FromLowSToD(LowSRegisterFrom(out)), InputSRegisterAt(conversion, 0));
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ };
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
+ switch (add->GetResultType()) {
+ case Primitive::kPrimInt: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) {
+ LocationSummary* locations = add->GetLocations();
+ Location out = locations->Out();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+
+ switch (add->GetResultType()) {
+ case Primitive::kPrimInt: {
+ __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1));
+ }
+ break;
+
+ // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
+ case Primitive::kPrimLong: {
+ DCHECK(second.IsRegisterPair());
+ __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
+ __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
+ break;
+ }
+
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1));
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitSub(HSub* sub) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
+ switch (sub->GetResultType()) {
+ case Primitive::kPrimInt: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) {
+ LocationSummary* locations = sub->GetLocations();
+ Location out = locations->Out();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+ switch (sub->GetResultType()) {
+ case Primitive::kPrimInt: {
+ __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1));
+ break;
+ }
+
+ // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
+ case Primitive::kPrimLong: {
+ DCHECK(second.IsRegisterPair());
+ __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
+ __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
+ break;
+ }
+
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1));
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitMul(HMul* mul) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
+ switch (mul->GetResultType()) {
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) {
+ LocationSummary* locations = mul->GetLocations();
+ Location out = locations->Out();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+ switch (mul->GetResultType()) {
+ case Primitive::kPrimInt: {
+ __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
+ break;
+ }
+ case Primitive::kPrimLong: {
+ vixl32::Register out_hi = HighRegisterFrom(out);
+ vixl32::Register out_lo = LowRegisterFrom(out);
+ vixl32::Register in1_hi = HighRegisterFrom(first);
+ vixl32::Register in1_lo = LowRegisterFrom(first);
+ vixl32::Register in2_hi = HighRegisterFrom(second);
+ vixl32::Register in2_lo = LowRegisterFrom(second);
+
+ // Extra checks to protect caused by the existence of R1_R2.
+ // The algorithm is wrong if out.hi is either in1.lo or in2.lo:
+ // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2);
+ DCHECK_NE(out_hi.GetCode(), in1_lo.GetCode());
+ DCHECK_NE(out_hi.GetCode(), in2_lo.GetCode());
+
+ // input: in1 - 64 bits, in2 - 64 bits
+ // output: out
+ // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
+ // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
+ // parts: out.lo = (in1.lo * in2.lo)[31:0]
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ // temp <- in1.lo * in2.hi
+ __ Mul(temp, in1_lo, in2_hi);
+ // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo
+ __ Mla(out_hi, in1_hi, in2_lo, temp);
+ // out.lo <- (in1.lo * in2.lo)[31:0];
+ __ Umull(out_lo, temp, in1_lo, in2_lo);
+ // out.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
+ __ Add(out_hi, out_hi, temp);
+ break;
+ }
+
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1));
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ Location second = instruction->GetLocations()->InAt(1);
+ DCHECK(second.IsConstant());
+
+ vixl32::Register out = OutputRegister(instruction);
+ vixl32::Register dividend = InputRegisterAt(instruction, 0);
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+ DCHECK(imm == 1 || imm == -1);
+
+ if (instruction->IsRem()) {
+ __ Mov(out, 0);
+ } else {
+ if (imm == 1) {
+ __ Mov(out, dividend);
+ } else {
+ __ Rsb(out, dividend, 0);
+ }
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ vixl32::Register out = OutputRegister(instruction);
+ vixl32::Register dividend = InputRegisterAt(instruction, 0);
+ vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+ uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
+ int ctz_imm = CTZ(abs_imm);
+
+ if (ctz_imm == 1) {
+ __ Lsr(temp, dividend, 32 - ctz_imm);
+ } else {
+ __ Asr(temp, dividend, 31);
+ __ Lsr(temp, temp, 32 - ctz_imm);
+ }
+ __ Add(out, temp, dividend);
+
+ if (instruction->IsDiv()) {
+ __ Asr(out, out, ctz_imm);
+ if (imm < 0) {
+ __ Rsb(out, out, 0);
+ }
+ } else {
+ __ Ubfx(out, out, 0, ctz_imm);
+ __ Sub(out, out, temp);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ vixl32::Register out = OutputRegister(instruction);
+ vixl32::Register dividend = InputRegisterAt(instruction, 0);
+ vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
+ vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
+ int64_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+ int64_t magic;
+ int shift;
+ CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+ __ Mov(temp1, magic);
+ __ Smull(temp2, temp1, dividend, temp1);
+
+ if (imm > 0 && magic < 0) {
+ __ Add(temp1, temp1, dividend);
+ } else if (imm < 0 && magic > 0) {
+ __ Sub(temp1, temp1, dividend);
+ }
+
+ if (shift != 0) {
+ __ Asr(temp1, temp1, shift);
+ }
+
+ if (instruction->IsDiv()) {
+ __ Sub(out, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
+ } else {
+ __ Sub(temp1, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
+ // TODO: Strength reduction for mls.
+ __ Mov(temp2, imm);
+ __ Mls(out, temp1, temp2, dividend);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral(
+ HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+ Location second = instruction->GetLocations()->InAt(1);
+ DCHECK(second.IsConstant());
+
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ DivRemByPowerOfTwo(instruction);
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ GenerateDivRemWithAnyConstant(instruction);
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) {
+ LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+ if (div->GetResultType() == Primitive::kPrimLong) {
+ // pLdiv runtime call.
+ call_kind = LocationSummary::kCallOnMainOnly;
+ } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) {
+ // sdiv will be replaced by other instruction sequence.
+ } else if (div->GetResultType() == Primitive::kPrimInt &&
+ !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ // pIdivmod runtime call.
+ call_kind = LocationSummary::kCallOnMainOnly;
+ }
+
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
+
+ switch (div->GetResultType()) {
+ case Primitive::kPrimInt: {
+ if (div->InputAt(1)->IsConstant()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ int32_t value = div->InputAt(1)->AsIntConstant()->GetValue();
+ if (value == 1 || value == 0 || value == -1) {
+ // No temp register required.
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ if (!IsPowerOfTwo(AbsOrMin(value))) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ }
+ } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ } else {
+ TODO_VIXL32(FATAL);
+ }
+ break;
+ }
+ case Primitive::kPrimLong: {
+ TODO_VIXL32(FATAL);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected div type " << div->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
+ Location rhs = div->GetLocations()->InAt(1);
+
+ switch (div->GetResultType()) {
+ case Primitive::kPrimInt: {
+ if (rhs.IsConstant()) {
+ GenerateDivRemConstantIntegral(div);
+ } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
+ } else {
+ TODO_VIXL32(FATAL);
+ }
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ TODO_VIXL32(FATAL);
+ break;
+ }
+
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1));
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected div type " << div->GetResultType();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+ // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/
+ LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
+ if (instruction->HasUses()) {
+ locations->SetOut(Location::SameAsFirstInput());
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+ DivZeroCheckSlowPathARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) DivZeroCheckSlowPathARMVIXL(instruction);
+ codegen_->AddSlowPath(slow_path);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location value = locations->InAt(0);
+
+ switch (instruction->GetType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt: {
+ if (value.IsRegister()) {
+ __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
+ } else {
+ DCHECK(value.IsConstant()) << value;
+ if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+ __ B(slow_path->GetEntryLabel());
+ }
+ }
+ break;
+ }
+ case Primitive::kPrimLong: {
+ if (value.IsRegisterPair()) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value));
+ __ B(eq, slow_path->GetEntryLabel());
+ } else {
+ DCHECK(value.IsConstant()) << value;
+ if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+ __ B(slow_path->GetEntryLabel());
+ }
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HRor* ror) {
+ LocationSummary* locations = ror->GetLocations();
+ vixl32::Register in = InputRegisterAt(ror, 0);
+ Location rhs = locations->InAt(1);
+ vixl32::Register out = OutputRegister(ror);
+
+ if (rhs.IsConstant()) {
+ // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
+ // so map all rotations to a +ve. equivalent in that range.
+ // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
+ uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F;
+ if (rot) {
+ // Rotate, mapping left rotations to right equivalents if necessary.
+ // (e.g. left by 2 bits == right by 30.)
+ __ Ror(out, in, rot);
+ } else if (!out.Is(in)) {
+ __ Mov(out, in);
+ }
+ } else {
+ __ Ror(out, in, RegisterFrom(rhs));
+ }
+}
+
+// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
+// rotates by swapping input regs (effectively rotating by the first 32-bits of
+// a larger rotation) or flipping direction (thus treating larger right/left
+// rotations as sub-word sized rotations in the other direction) as appropriate.
+void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
+ LocationSummary* locations = ror->GetLocations();
+ vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
+ vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
+ Location rhs = locations->InAt(1);
+ vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
+ vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
+
+ if (rhs.IsConstant()) {
+ uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
+ // Map all rotations to +ve. equivalents on the interval [0,63].
+ rot &= kMaxLongShiftDistance;
+ // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
+ // logic below to a simple pair of binary orr.
+ // (e.g. 34 bits == in_reg swap + 2 bits right.)
+ if (rot >= kArmBitsPerWord) {
+ rot -= kArmBitsPerWord;
+ std::swap(in_reg_hi, in_reg_lo);
+ }
+ // Rotate, or mov to out for zero or word size rotations.
+ if (rot != 0u) {
+ __ Lsr(out_reg_hi, in_reg_hi, rot);
+ __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot));
+ __ Lsr(out_reg_lo, in_reg_lo, rot);
+ __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot));
+ } else {
+ __ Mov(out_reg_lo, in_reg_lo);
+ __ Mov(out_reg_hi, in_reg_hi);
+ }
+ } else {
+ vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0));
+ vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
+ vixl32::Label end;
+ vixl32::Label shift_by_32_plus_shift_right;
+
+ __ And(shift_right, RegisterFrom(rhs), 0x1F);
+ __ Lsrs(shift_left, RegisterFrom(rhs), 6);
+ // TODO(VIXL): Check that flags are kept after "vixl32::LeaveFlags" enabled.
+ __ Rsb(shift_left, shift_right, kArmBitsPerWord);
+ __ B(cc, &shift_by_32_plus_shift_right);
+
+ // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
+ // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
+ __ Lsl(out_reg_hi, in_reg_hi, shift_left);
+ __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+ __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
+ __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+ __ Lsr(shift_left, in_reg_hi, shift_right);
+ __ Add(out_reg_lo, out_reg_lo, shift_left);
+ __ B(&end);
+
+ __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
+ // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
+ // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
+ __ Lsr(out_reg_hi, in_reg_hi, shift_right);
+ __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+ __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
+ __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+ __ Lsl(shift_right, in_reg_hi, shift_left);
+ __ Add(out_reg_lo, out_reg_lo, shift_right);
+
+ __ Bind(&end);
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+ switch (ror->GetResultType()) {
+ case Primitive::kPrimInt: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (ror->InputAt(1)->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) {
+ Primitive::Type type = ror->GetResultType();
+ switch (type) {
+ case Primitive::kPrimInt: {
+ HandleIntegerRotate(ror);
+ break;
+ }
+ case Primitive::kPrimLong: {
+ HandleLongRotate(ror);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected operation type " << type;
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
+ DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
+
+ switch (op->GetResultType()) {
+ case Primitive::kPrimInt: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (op->InputAt(1)->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Make the output overlap, as it will be used to hold the masked
+ // second input.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
+ break;
+ }
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (op->InputAt(1)->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+ // For simplicity, use kOutputOverlap even though we only require that low registers
+ // don't clash with high registers which the register allocator currently guarantees.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) {
+ DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+ LocationSummary* locations = op->GetLocations();
+ Location out = locations->Out();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+
+ Primitive::Type type = op->GetResultType();
+ switch (type) {
+ case Primitive::kPrimInt: {
+ vixl32::Register out_reg = OutputRegister(op);
+ vixl32::Register first_reg = InputRegisterAt(op, 0);
+ if (second.IsRegister()) {
+ vixl32::Register second_reg = RegisterFrom(second);
+ // ARM doesn't mask the shift count so we need to do it ourselves.
+ __ And(out_reg, second_reg, kMaxIntShiftDistance);
+ if (op->IsShl()) {
+ __ Lsl(out_reg, first_reg, out_reg);
+ } else if (op->IsShr()) {
+ __ Asr(out_reg, first_reg, out_reg);
+ } else {
+ __ Lsr(out_reg, first_reg, out_reg);
+ }
+ } else {
+ int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+ uint32_t shift_value = cst & kMaxIntShiftDistance;
+ if (shift_value == 0) { // ARM does not support shifting with 0 immediate.
+ __ Mov(out_reg, first_reg);
+ } else if (op->IsShl()) {
+ __ Lsl(out_reg, first_reg, shift_value);
+ } else if (op->IsShr()) {
+ __ Asr(out_reg, first_reg, shift_value);
+ } else {
+ __ Lsr(out_reg, first_reg, shift_value);
+ }
+ }
+ break;
+ }
+ case Primitive::kPrimLong: {
+ vixl32::Register o_h = HighRegisterFrom(out);
+ vixl32::Register o_l = LowRegisterFrom(out);
+
+ vixl32::Register high = HighRegisterFrom(first);
+ vixl32::Register low = LowRegisterFrom(first);
+
+ if (second.IsRegister()) {
+ vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+
+ vixl32::Register second_reg = RegisterFrom(second);
+
+ if (op->IsShl()) {
+ __ And(o_l, second_reg, kMaxLongShiftDistance);
+ // Shift the high part
+ __ Lsl(o_h, high, o_l);
+ // Shift the low part and `or` what overflew on the high part
+ __ Rsb(temp, o_l, kArmBitsPerWord);
+ __ Lsr(temp, low, temp);
+ __ Orr(o_h, o_h, temp);
+ // If the shift is > 32 bits, override the high part
+ __ Subs(temp, o_l, kArmBitsPerWord);
+ {
+ AssemblerAccurateScope guard(GetVIXLAssembler(),
+ 3 * kArmInstrMaxSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(pl);
+ __ lsl(pl, o_h, low, temp);
+ }
+ // Shift the low part
+ __ Lsl(o_l, low, o_l);
+ } else if (op->IsShr()) {
+ __ And(o_h, second_reg, kMaxLongShiftDistance);
+ // Shift the low part
+ __ Lsr(o_l, low, o_h);
+ // Shift the high part and `or` what underflew on the low part
+ __ Rsb(temp, o_h, kArmBitsPerWord);
+ __ Lsl(temp, high, temp);
+ __ Orr(o_l, o_l, temp);
+ // If the shift is > 32 bits, override the low part
+ __ Subs(temp, o_h, kArmBitsPerWord);
+ {
+ AssemblerAccurateScope guard(GetVIXLAssembler(),
+ 3 * kArmInstrMaxSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(pl);
+ __ asr(pl, o_l, high, temp);
+ }
+ // Shift the high part
+ __ Asr(o_h, high, o_h);
+ } else {
+ __ And(o_h, second_reg, kMaxLongShiftDistance);
+ // same as Shr except we use `Lsr`s and not `Asr`s
+ __ Lsr(o_l, low, o_h);
+ __ Rsb(temp, o_h, kArmBitsPerWord);
+ __ Lsl(temp, high, temp);
+ __ Orr(o_l, o_l, temp);
+ __ Subs(temp, o_h, kArmBitsPerWord);
+ {
+ AssemblerAccurateScope guard(GetVIXLAssembler(),
+ 3 * kArmInstrMaxSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(pl);
+ __ lsr(pl, o_l, high, temp);
+ }
+ __ Lsr(o_h, high, o_h);
+ }
+ } else {
+ // Register allocator doesn't create partial overlap.
+ DCHECK(!o_l.Is(high));
+ DCHECK(!o_h.Is(low));
+ int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+ uint32_t shift_value = cst & kMaxLongShiftDistance;
+ if (shift_value > 32) {
+ if (op->IsShl()) {
+ __ Lsl(o_h, low, shift_value - 32);
+ __ Mov(o_l, 0);
+ } else if (op->IsShr()) {
+ __ Asr(o_l, high, shift_value - 32);
+ __ Asr(o_h, high, 31);
+ } else {
+ __ Lsr(o_l, high, shift_value - 32);
+ __ Mov(o_h, 0);
+ }
+ } else if (shift_value == 32) {
+ if (op->IsShl()) {
+ __ Mov(o_h, low);
+ __ Mov(o_l, 0);
+ } else if (op->IsShr()) {
+ __ Mov(o_l, high);
+ __ Asr(o_h, high, 31);
+ } else {
+ __ Mov(o_l, high);
+ __ Mov(o_h, 0);
+ }
+ } else if (shift_value == 1) {
+ if (op->IsShl()) {
+ __ Lsls(o_l, low, 1);
+ __ Adc(o_h, high, high);
+ } else if (op->IsShr()) {
+ __ Asrs(o_h, high, 1);
+ __ Rrx(o_l, low);
+ } else {
+ __ Lsrs(o_h, high, 1);
+ __ Rrx(o_l, low);
+ }
+ } else {
+ DCHECK(2 <= shift_value && shift_value < 32) << shift_value;
+ if (op->IsShl()) {
+ __ Lsl(o_h, high, shift_value);
+ __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value));
+ __ Lsl(o_l, low, shift_value);
+ } else if (op->IsShr()) {
+ __ Lsr(o_l, low, shift_value);
+ __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
+ __ Asr(o_h, high, shift_value);
+ } else {
+ __ Lsr(o_l, low, shift_value);
+ __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
+ __ Lsr(o_h, high, shift_value);
+ }
+ }
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected operation type " << type;
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitShl(HShl* shl) {
+ HandleShift(shl);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) {
+ HandleShift(shl);
+}
+
+void LocationsBuilderARMVIXL::VisitShr(HShr* shr) {
+ HandleShift(shr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) {
+ HandleShift(shr);
+}
+
+void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) {
+ HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
+ HandleShift(ushr);
+}
+
+void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+ if (instruction->IsStringAlloc()) {
+ locations->AddTemp(LocationFrom(kMethodRegister));
+ } else {
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+ }
+ locations->SetOut(LocationFrom(r0));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
+ // Note: if heap poisoning is enabled, the entry point takes cares
+ // of poisoning the reference.
+ if (instruction->IsStringAlloc()) {
+ // String is allocated through StringFactory. Call NewEmptyString entry point.
+ vixl32::Register temp = RegisterFrom(instruction->GetLocations()->GetTemp(0));
+ MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
+ GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString));
+ GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value());
+ AssemblerAccurateScope aas(GetVIXLAssembler(),
+ kArmInstrMaxSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ blx(lr);
+ codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+ } else {
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetOut(LocationFrom(r0));
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2)));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ __ Mov(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
+ // Note: if heap poisoning is enabled, the entry point takes cares
+ // of poisoning the reference.
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
+}
+
+void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+ if (location.IsStackSlot()) {
+ location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+ } else if (location.IsDoubleStackSlot()) {
+ location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+ }
+ locations->SetOut(location);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
+ HParameterValue* instruction ATTRIBUTE_UNUSED) {
+ // Nothing to do, the parameter is already at its location.
+}
+
+void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetOut(LocationFrom(kMethodRegister));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
+ HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+ // Nothing to do, the method is already at its location.
+}
+
+void LocationsBuilderARMVIXL::VisitNot(HNot* not_) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
+ LocationSummary* locations = not_->GetLocations();
+ Location out = locations->Out();
+ Location in = locations->InAt(0);
+ switch (not_->GetResultType()) {
+ case Primitive::kPrimInt:
+ __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0));
+ break;
+
+ case Primitive::kPrimLong:
+ __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in));
+ __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in));
+ break;
+
+ default:
+ LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
+ switch (compare->InputAt(0)->GetType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Output overlaps because it is written before doing the low comparison.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
+ LocationSummary* locations = compare->GetLocations();
+ vixl32::Register out = OutputRegister(compare);
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+
+ vixl32::Label less, greater, done;
+ Primitive::Type type = compare->InputAt(0)->GetType();
+ vixl32::Condition less_cond = vixl32::Condition(kNone);
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt: {
+ // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
+ __ Mov(out, 0);
+ __ Cmp(RegisterFrom(left), RegisterFrom(right)); // Signed compare.
+ less_cond = lt;
+ break;
+ }
+ case Primitive::kPrimLong: {
+ __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare.
+ __ B(lt, &less);
+ __ B(gt, &greater);
+ // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
+ __ Mov(out, 0);
+ __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare.
+ less_cond = lo;
+ break;
+ }
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ __ Mov(out, 0);
+ GenerateVcmp(compare);
+ // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected compare type " << type;
+ UNREACHABLE();
+ }
+
+ __ B(eq, &done);
+ __ B(less_cond, &less);
+
+ __ Bind(&greater);
+ __ Mov(out, 1);
+ __ B(&done);
+
+ __ Bind(&less);
+ __ Mov(out, -1);
+
+ __ Bind(&done);
+}
+
+void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
+ locations->SetInAt(i, Location::Any());
+ }
+ locations->SetOut(Location::Any());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "Unreachable";
+}
+
+void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) {
+ // TODO (ported from quick): revisit ARM barrier kinds.
+ DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings.
+ switch (kind) {
+ case MemBarrierKind::kAnyStore:
+ case MemBarrierKind::kLoadAny:
+ case MemBarrierKind::kAnyAny: {
+ flavor = DmbOptions::ISH;
+ break;
+ }
+ case MemBarrierKind::kStoreStore: {
+ flavor = DmbOptions::ISHST;
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected memory barrier " << kind;
+ }
+ __ Dmb(flavor);
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr,
+ uint32_t offset,
+ vixl32::Register out_lo,
+ vixl32::Register out_hi) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ if (offset != 0) {
+ vixl32::Register temp = temps.Acquire();
+ __ Add(temp, addr, offset);
+ addr = temp;
+ }
+ __ Ldrexd(out_lo, out_hi, addr);
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr,
+ uint32_t offset,
+ vixl32::Register value_lo,
+ vixl32::Register value_hi,
+ vixl32::Register temp1,
+ vixl32::Register temp2,
+ HInstruction* instruction) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Label fail;
+ if (offset != 0) {
+ vixl32::Register temp = temps.Acquire();
+ __ Add(temp, addr, offset);
+ addr = temp;
+ }
+ __ Bind(&fail);
+ // We need a load followed by store. (The address used in a STREX instruction must
+ // be the same as the address in the most recently executed LDREX instruction.)
+ __ Ldrexd(temp1, temp2, addr);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ Strexd(temp1, value_lo, value_hi, addr);
+ __ Cbnz(temp1, &fail);
+}
+
+void LocationsBuilderARMVIXL::HandleFieldSet(
+ HInstruction* instruction, const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+
+ Primitive::Type field_type = field_info.GetFieldType();
+ if (Primitive::IsFloatingPointType(field_type)) {
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+
+ bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble;
+ bool generate_volatile = field_info.IsVolatile()
+ && is_wide
+ && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
+ // Temporary registers for the write barrier.
+ // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
+ if (needs_write_barrier) {
+ locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
+ locations->AddTemp(Location::RequiresRegister());
+ } else if (generate_volatile) {
+ // ARM encoding have some additional constraints for ldrexd/strexd:
+ // - registers need to be consecutive
+ // - the first register should be even but not R14.
+ // We don't test for ARM yet, and the assertion makes sure that we
+ // revisit this if we ever enable ARM encoding.
+ DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
+
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ if (field_type == Primitive::kPrimDouble) {
+ // For doubles we need two more registers to copy the value.
+ locations->AddTemp(LocationFrom(r2));
+ locations->AddTemp(LocationFrom(r3));
+ }
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ bool value_can_be_null) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::Register base = InputRegisterAt(instruction, 0);
+ Location value = locations->InAt(1);
+
+ bool is_volatile = field_info.IsVolatile();
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+ Primitive::Type field_type = field_info.GetFieldType();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
+
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ }
+
+ switch (field_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte: {
+ GetAssembler()->StoreToOffset(kStoreByte, RegisterFrom(value), base, offset);
+ break;
+ }
+
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar: {
+ GetAssembler()->StoreToOffset(kStoreHalfword, RegisterFrom(value), base, offset);
+ break;
+ }
+
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot: {
+ if (kPoisonHeapReferences && needs_write_barrier) {
+ // Note that in the case where `value` is a null reference,
+ // we do not enter this block, as a null reference does not
+ // need poisoning.
+ DCHECK_EQ(field_type, Primitive::kPrimNot);
+ vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+ __ Mov(temp, RegisterFrom(value));
+ GetAssembler()->PoisonHeapReference(temp);
+ GetAssembler()->StoreToOffset(kStoreWord, temp, base, offset);
+ } else {
+ GetAssembler()->StoreToOffset(kStoreWord, RegisterFrom(value), base, offset);
+ }
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ if (is_volatile && !atomic_ldrd_strd) {
+ GenerateWideAtomicStore(base,
+ offset,
+ LowRegisterFrom(value),
+ HighRegisterFrom(value),
+ RegisterFrom(locations->GetTemp(0)),
+ RegisterFrom(locations->GetTemp(1)),
+ instruction);
+ } else {
+ GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+ break;
+ }
+
+ case Primitive::kPrimFloat: {
+ GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset);
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ vixl32::DRegister value_reg = FromLowSToD(LowSRegisterFrom(value));
+ if (is_volatile && !atomic_ldrd_strd) {
+ vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0));
+ vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1));
+
+ __ Vmov(value_reg_lo, value_reg_hi, value_reg);
+
+ GenerateWideAtomicStore(base,
+ offset,
+ value_reg_lo,
+ value_reg_hi,
+ RegisterFrom(locations->GetTemp(2)),
+ RegisterFrom(locations->GetTemp(3)),
+ instruction);
+ } else {
+ GetAssembler()->StoreDToOffset(value_reg, base, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+ break;
+ }
+
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unreachable type " << field_type;
+ UNREACHABLE();
+ }
+
+ // Longs and doubles are handled in the switch.
+ if (field_type != Primitive::kPrimLong && field_type != Primitive::kPrimDouble) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+ vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+ vixl32::Register card = RegisterFrom(locations->GetTemp(1));
+ codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
+ }
+
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+}
+
+void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+ bool object_field_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_field_get_with_read_barrier ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ locations->SetInAt(0, Location::RequiresRegister());
+
+ bool volatile_for_double = field_info.IsVolatile()
+ && (field_info.GetFieldType() == Primitive::kPrimDouble)
+ && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+ // The output overlaps in case of volatile long: we don't want the
+ // code generated by GenerateWideAtomicLoad to overwrite the
+ // object's location. Likewise, in the case of an object field get
+ // with read barriers enabled, we do not want the load to overwrite
+ // the object's location, as we need it to emit the read barrier.
+ bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) ||
+ object_field_get_with_read_barrier;
+
+ if (Primitive::IsFloatingPointType(instruction->GetType())) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetOut(Location::RequiresRegister(),
+ (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+ }
+ if (volatile_for_double) {
+ // ARM encoding have some additional constraints for ldrexd/strexd:
+ // - registers need to be consecutive
+ // - the first register should be even but not R14.
+ // We don't test for ARM yet, and the assertion makes sure that we
+ // revisit this if we ever enable ARM encoding.
+ DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) {
+ DCHECK(Primitive::IsFloatingPointType(input->GetType())) << input->GetType();
+ if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
+ (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
+ return Location::ConstantLocation(input->AsConstant());
+ } else {
+ return Location::RequiresFpuRegister();
+ }
+}
+
+Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
+ Opcode opcode) {
+ DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
+ if (constant->IsConstant() &&
+ CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
+ return Location::ConstantLocation(constant->AsConstant());
+ }
+ return Location::RequiresRegister();
+}
+
+bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst,
+ Opcode opcode) {
+ uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
+ if (Primitive::Is64BitType(input_cst->GetType())) {
+ Opcode high_opcode = opcode;
+ SetCc low_set_cc = kCcDontCare;
+ switch (opcode) {
+ case SUB:
+ // Flip the operation to an ADD.
+ value = -value;
+ opcode = ADD;
+ FALLTHROUGH_INTENDED;
+ case ADD:
+ if (Low32Bits(value) == 0u) {
+ return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare);
+ }
+ high_opcode = ADC;
+ low_set_cc = kCcSet;
+ break;
+ default:
+ break;
+ }
+ return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) &&
+ CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare);
+ } else {
+ return CanEncodeConstantAsImmediate(Low32Bits(value), opcode);
+ }
+}
+
+// TODO(VIXL): Replace art::arm::SetCc` with `vixl32::FlagsUpdate after flags set optimization
+// enabled.
+bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(uint32_t value,
+ Opcode opcode,
+ SetCc set_cc) {
+ ArmVIXLAssembler* assembler = codegen_->GetAssembler();
+ if (assembler->ShifterOperandCanHold(opcode, value, set_cc)) {
+ return true;
+ }
+ Opcode neg_opcode = kNoOperand;
+ switch (opcode) {
+ case AND: neg_opcode = BIC; value = ~value; break;
+ case ORR: neg_opcode = ORN; value = ~value; break;
+ case ADD: neg_opcode = SUB; value = -value; break;
+ case ADC: neg_opcode = SBC; value = ~value; break;
+ case SUB: neg_opcode = ADD; value = -value; break;
+ case SBC: neg_opcode = ADC; value = ~value; break;
+ default:
+ return false;
+ }
+ return assembler->ShifterOperandCanHold(neg_opcode, value, set_cc);
+}
+
+void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::Register base = InputRegisterAt(instruction, 0);
+ Location out = locations->Out();
+ bool is_volatile = field_info.IsVolatile();
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+ Primitive::Type field_type = field_info.GetFieldType();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+
+ switch (field_type) {
+ case Primitive::kPrimBoolean:
+ GetAssembler()->LoadFromOffset(kLoadUnsignedByte, RegisterFrom(out), base, offset);
+ break;
+
+ case Primitive::kPrimByte:
+ GetAssembler()->LoadFromOffset(kLoadSignedByte, RegisterFrom(out), base, offset);
+ break;
+
+ case Primitive::kPrimShort:
+ GetAssembler()->LoadFromOffset(kLoadSignedHalfword, RegisterFrom(out), base, offset);
+ break;
+
+ case Primitive::kPrimChar:
+ GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, RegisterFrom(out), base, offset);
+ break;
+
+ case Primitive::kPrimInt:
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
+ break;
+
+ case Primitive::kPrimNot: {
+ // /* HeapReference<Object> */ out = *(base + offset)
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ TODO_VIXL32(FATAL);
+ } else {
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
+ // TODO(VIXL): Scope to guarantee the position immediately after the load.
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, locations->InAt(0), offset);
+ }
+ break;
+ }
+
+ case Primitive::kPrimLong:
+ if (is_volatile && !atomic_ldrd_strd) {
+ GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out));
+ } else {
+ GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset);
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset);
+ break;
+
+ case Primitive::kPrimDouble: {
+ vixl32::DRegister out_dreg = FromLowSToD(LowSRegisterFrom(out));
+ if (is_volatile && !atomic_ldrd_strd) {
+ vixl32::Register lo = RegisterFrom(locations->GetTemp(0));
+ vixl32::Register hi = RegisterFrom(locations->GetTemp(1));
+ GenerateWideAtomicLoad(base, offset, lo, hi);
+ // TODO(VIXL): Do we need to be immediately after the ldrexd instruction? If so we need a
+ // scope.
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ Vmov(out_dreg, lo, hi);
+ } else {
+ GetAssembler()->LoadDFromOffset(out_dreg, base, offset);
+ // TODO(VIXL): Scope to guarantee the position immediately after the load.
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+ break;
+ }
+
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unreachable type " << field_type;
+ UNREACHABLE();
+ }
+
+ if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) {
+ // Potential implicit null checks, in the case of reference or
+ // double fields, are handled in the previous switch statement.
+ } else {
+ // Address cases other than reference and double that may require an implicit null check.
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (is_volatile) {
+ if (field_type == Primitive::kPrimNot) {
+ // Memory barriers, in the case of references, are also handled
+ // in the previous switch statement.
+ } else {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+}
+
+void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
+ // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/
+ LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (instruction->HasUses()) {
+ locations->SetOut(Location::SameAsFirstInput());
+ }
+}
+
+void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
+ if (CanMoveNullCheckToUser(instruction)) {
+ return;
+ }
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ AssemblerAccurateScope aas(GetVIXLAssembler(),
+ kArmInstrMaxSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0)));
+ RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) {
+ NullCheckSlowPathARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) NullCheckSlowPathARMVIXL(instruction);
+ AddSlowPath(slow_path);
+ __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
+ codegen_->GenerateNullCheck(instruction);
+}
+
+void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) {
+ uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
+ vixl32::Register obj = InputRegisterAt(instruction, 0);
+ vixl32::Register out = OutputRegister(instruction);
+ GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // TODO(VIXL): https://android-review.googlesource.com/#/c/272625/
+}
+
+void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
+ vixl32::Register card,
+ vixl32::Register object,
+ vixl32::Register value,
+ bool can_be_null) {
+ vixl32::Label is_null;
+ if (can_be_null) {
+ __ Cbz(value, &is_null);
+ }
+ GetAssembler()->LoadFromOffset(
+ kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
+ __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
+ __ Strb(card, MemOperand(card, temp));
+ if (can_be_null) {
+ __ Bind(&is_null);
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) {
+ codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ and related.
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
+ HBasicBlock* block = instruction->GetBlock();
+ if (block->GetLoopInformation() != nullptr) {
+ DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+ // The back edge will generate the suspend check.
+ return;
+ }
+ if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+ // The goto will generate the suspend check.
+ return;
+ }
+ GenerateSuspendCheck(instruction, nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
+ HBasicBlock* successor) {
+ SuspendCheckSlowPathARMVIXL* slow_path =
+ down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath());
+ if (slow_path == nullptr) {
+ slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARMVIXL(instruction, successor);
+ instruction->SetSlowPath(slow_path);
+ codegen_->AddSlowPath(slow_path);
+ if (successor != nullptr) {
+ DCHECK(successor->IsLoopHeader());
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+ }
+ } else {
+ DCHECK_EQ(slow_path->GetSuccessor(), successor);
+ }
+
+ UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ GetAssembler()->LoadFromOffset(
+ kLoadUnsignedHalfword, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
+ if (successor == nullptr) {
+ __ Cbnz(temp, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetReturnLabel());
+ } else {
+ __ Cbz(temp, codegen_->GetLabelOf(successor));
+ __ B(slow_path->GetEntryLabel());
+ }
+}
+
+ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const {
+ return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
+ UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+ MoveOperands* move = moves_[index];
+ Location source = move->GetSource();
+ Location destination = move->GetDestination();
+
+ if (source.IsRegister()) {
+ if (destination.IsRegister()) {
+ __ Mov(RegisterFrom(destination), RegisterFrom(source));
+ } else if (destination.IsFpuRegister()) {
+ __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
+ } else {
+ DCHECK(destination.IsStackSlot());
+ GetAssembler()->StoreToOffset(kStoreWord,
+ RegisterFrom(source),
+ sp,
+ destination.GetStackIndex());
+ }
+ } else if (source.IsStackSlot()) {
+ if (destination.IsRegister()) {
+ GetAssembler()->LoadFromOffset(kLoadWord,
+ RegisterFrom(destination),
+ sp,
+ source.GetStackIndex());
+ } else if (destination.IsFpuRegister()) {
+ GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
+ } else {
+ DCHECK(destination.IsStackSlot());
+ vixl32::Register temp = temps.Acquire();
+ GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
+ GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
+ }
+ } else if (source.IsFpuRegister()) {
+ if (destination.IsRegister()) {
+ TODO_VIXL32(FATAL);
+ } else if (destination.IsFpuRegister()) {
+ __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
+ } else {
+ DCHECK(destination.IsStackSlot());
+ GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
+ }
+ } else if (source.IsDoubleStackSlot()) {
+ if (destination.IsDoubleStackSlot()) {
+ vixl32::DRegister temp = temps.AcquireD();
+ GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex());
+ GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex());
+ } else if (destination.IsRegisterPair()) {
+ DCHECK(ExpectedPairLayout(destination));
+ GetAssembler()->LoadFromOffset(
+ kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex());
+ } else {
+ DCHECK(destination.IsFpuRegisterPair()) << destination;
+ GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex());
+ }
+ } else if (source.IsRegisterPair()) {
+ if (destination.IsRegisterPair()) {
+ __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
+ __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source));
+ } else if (destination.IsFpuRegisterPair()) {
+ __ Vmov(FromLowSToD(LowSRegisterFrom(destination)),
+ LowRegisterFrom(source),
+ HighRegisterFrom(source));
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ DCHECK(ExpectedPairLayout(source));
+ GetAssembler()->StoreToOffset(kStoreWordPair,
+ LowRegisterFrom(source),
+ sp,
+ destination.GetStackIndex());
+ }
+ } else if (source.IsFpuRegisterPair()) {
+ if (destination.IsRegisterPair()) {
+ TODO_VIXL32(FATAL);
+ } else if (destination.IsFpuRegisterPair()) {
+ __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex());
+ }
+ } else {
+ DCHECK(source.IsConstant()) << source;
+ HConstant* constant = source.GetConstant();
+ if (constant->IsIntConstant() || constant->IsNullConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(constant);
+ if (destination.IsRegister()) {
+ __ Mov(RegisterFrom(destination), value);
+ } else {
+ DCHECK(destination.IsStackSlot());
+ vixl32::Register temp = temps.Acquire();
+ __ Mov(temp, value);
+ GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
+ }
+ } else if (constant->IsLongConstant()) {
+ int64_t value = constant->AsLongConstant()->GetValue();
+ if (destination.IsRegisterPair()) {
+ __ Mov(LowRegisterFrom(destination), Low32Bits(value));
+ __ Mov(HighRegisterFrom(destination), High32Bits(value));
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ vixl32::Register temp = temps.Acquire();
+ __ Mov(temp, Low32Bits(value));
+ GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
+ __ Mov(temp, High32Bits(value));
+ GetAssembler()->StoreToOffset(kStoreWord,
+ temp,
+ sp,
+ destination.GetHighStackIndex(kArmWordSize));
+ }
+ } else if (constant->IsDoubleConstant()) {
+ double value = constant->AsDoubleConstant()->GetValue();
+ if (destination.IsFpuRegisterPair()) {
+ __ Vmov(FromLowSToD(LowSRegisterFrom(destination)), value);
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ uint64_t int_value = bit_cast<uint64_t, double>(value);
+ vixl32::Register temp = temps.Acquire();
+ __ Mov(temp, Low32Bits(int_value));
+ GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
+ __ Mov(temp, High32Bits(int_value));
+ GetAssembler()->StoreToOffset(kStoreWord,
+ temp,
+ sp,
+ destination.GetHighStackIndex(kArmWordSize));
+ }
+ } else {
+ DCHECK(constant->IsFloatConstant()) << constant->DebugName();
+ float value = constant->AsFloatConstant()->GetValue();
+ if (destination.IsFpuRegister()) {
+ __ Vmov(SRegisterFrom(destination), value);
+ } else {
+ DCHECK(destination.IsStackSlot());
+ vixl32::Register temp = temps.Acquire();
+ __ Mov(temp, bit_cast<int32_t, float>(value));
+ GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
+ }
+ }
+ }
+}
+
+void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
+ UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ __ Mov(temp, reg);
+ GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem);
+ GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
+}
+
+void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
+ // TODO(VIXL32): Double check the performance of this implementation.
+ UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ vixl32::SRegister temp_s = temps.AcquireS();
+
+ __ Ldr(temp, MemOperand(sp, mem1));
+ __ Vldr(temp_s, MemOperand(sp, mem2));
+ __ Str(temp, MemOperand(sp, mem2));
+ __ Vstr(temp_s, MemOperand(sp, mem1));
+}
+
+void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
+ MoveOperands* move = moves_[index];
+ Location source = move->GetSource();
+ Location destination = move->GetDestination();
+ UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+
+ if (source.IsRegister() && destination.IsRegister()) {
+ vixl32::Register temp = temps.Acquire();
+ DCHECK(!RegisterFrom(source).Is(temp));
+ DCHECK(!RegisterFrom(destination).Is(temp));
+ __ Mov(temp, RegisterFrom(destination));
+ __ Mov(RegisterFrom(destination), RegisterFrom(source));
+ __ Mov(RegisterFrom(source), temp);
+ } else if (source.IsRegister() && destination.IsStackSlot()) {
+ Exchange(RegisterFrom(source), destination.GetStackIndex());
+ } else if (source.IsStackSlot() && destination.IsRegister()) {
+ Exchange(RegisterFrom(destination), source.GetStackIndex());
+ } else if (source.IsStackSlot() && destination.IsStackSlot()) {
+ TODO_VIXL32(FATAL);
+ } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
+ TODO_VIXL32(FATAL);
+ } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
+ vixl32::DRegister temp = temps.AcquireD();
+ __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
+ __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination));
+ __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination));
+ __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp);
+ } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
+ vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination);
+ int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex();
+ DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
+ vixl32::DRegister temp = temps.AcquireD();
+ __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1));
+ GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
+ GetAssembler()->StoreDToOffset(temp, sp, mem);
+ } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
+ TODO_VIXL32(FATAL);
+ } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
+ TODO_VIXL32(FATAL);
+ } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
+ TODO_VIXL32(FATAL);
+ } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
+ vixl32::DRegister temp1 = temps.AcquireD();
+ vixl32::DRegister temp2 = temps.AcquireD();
+ __ Vldr(temp1, MemOperand(sp, source.GetStackIndex()));
+ __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex()));
+ __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex()));
+ __ Vstr(temp2, MemOperand(sp, source.GetStackIndex()));
+ } else {
+ LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
+ }
+}
+
+void ParallelMoveResolverARMVIXL::SpillScratch(int reg ATTRIBUTE_UNUSED) {
+ TODO_VIXL32(FATAL);
+}
+
+void ParallelMoveResolverARMVIXL::RestoreScratch(int reg ATTRIBUTE_UNUSED) {
+ TODO_VIXL32(FATAL);
+}
+
+// Check if the desired_class_load_kind is supported. If it is, return it,
+// otherwise return a fall-back kind that should be used instead.
+HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
+ HLoadClass::LoadKind desired_class_load_kind ATTRIBUTE_UNUSED) {
+ // TODO(VIXL): Implement optimized code paths.
+ return HLoadClass::LoadKind::kDexCacheViaMethod;
+}
+
+void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
+ if (cls->NeedsAccessCheck()) {
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ CodeGenerator::CreateLoadClassLocationSummary(
+ cls,
+ LocationFrom(calling_convention.GetRegisterAt(0)),
+ LocationFrom(r0),
+ /* code_generator_supports_read_barrier */ true);
+ return;
+ }
+
+ // TODO(VIXL): read barrier code.
+ LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+ if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+ load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
+ load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) {
+ LocationSummary* locations = cls->GetLocations();
+ if (cls->NeedsAccessCheck()) {
+ codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
+ codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
+ CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
+ return;
+ }
+
+ Location out_loc = locations->Out();
+ vixl32::Register out = OutputRegister(cls);
+
+ // TODO(VIXL): read barrier code.
+ bool generate_null_check = false;
+ switch (cls->GetLoadKind()) {
+ case HLoadClass::LoadKind::kReferrersClass: {
+ DCHECK(!cls->CanCallRuntime());
+ DCHECK(!cls->MustGenerateClinitCheck());
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ vixl32::Register current_method = InputRegisterAt(cls, 0);
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ current_method,
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ kEmitCompilerReadBarrier);
+ break;
+ }
+ case HLoadClass::LoadKind::kDexCacheViaMethod: {
+ // /* GcRoot<mirror::Class>[] */ out =
+ // current_method.ptr_sized_fields_->dex_cache_resolved_types_
+ vixl32::Register current_method = InputRegisterAt(cls, 0);
+ const int32_t resolved_types_offset =
+ ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value();
+ GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset);
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+ GenerateGcRootFieldLoad(cls, out_loc, out, offset, kEmitCompilerReadBarrier);
+ generate_null_check = !cls->IsInDexCache();
+ break;
+ }
+ default:
+ TODO_VIXL32(FATAL);
+ }
+
+ if (generate_null_check || cls->MustGenerateClinitCheck()) {
+ DCHECK(cls->CanCallRuntime());
+ LoadClassSlowPathARMVIXL* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL(
+ cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ codegen_->AddSlowPath(slow_path);
+ if (generate_null_check) {
+ __ Cbz(out, slow_path->GetEntryLabel());
+ }
+ if (cls->MustGenerateClinitCheck()) {
+ GenerateClassInitializationCheck(slow_path, out);
+ } else {
+ __ Bind(slow_path->GetExitLabel());
+ }
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (check->HasUses()) {
+ locations->SetOut(Location::SameAsFirstInput());
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) {
+ // We assume the class is not null.
+ LoadClassSlowPathARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL(check->GetLoadClass(),
+ check,
+ check->GetDexPc(),
+ /* do_clinit */ true);
+ codegen_->AddSlowPath(slow_path);
+ GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
+ LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ GetAssembler()->LoadFromOffset(kLoadWord,
+ temp,
+ class_reg,
+ mirror::Class::StatusOffset().Int32Value());
+ __ Cmp(temp, mirror::Class::kStatusInitialized);
+ __ B(lt, slow_path->GetEntryLabel());
+ // Even if the initialized flag is set, we may be in a situation where caches are not synced
+ // properly. Therefore, we do a memory fence.
+ __ Dmb(ISH);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+// Check if the desired_string_load_kind is supported. If it is, return it,
+// otherwise return a fall-back kind that should be used instead.
+HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
+ HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) {
+ // TODO(VIXL): Implement optimized code paths. For now we always use the simpler fallback code.
+ return HLoadString::LoadKind::kDexCacheViaMethod;
+}
+
+void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
+ LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+ ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+
+ // TODO(VIXL): Implement optimized code paths.
+ // See InstructionCodeGeneratorARMVIXL::VisitLoadString.
+ HLoadString::LoadKind load_kind = load->GetLoadKind();
+ if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ // TODO(VIXL): Use InvokeRuntimeCallingConventionARMVIXL instead.
+ locations->SetOut(LocationFrom(r0));
+ } else {
+ locations->SetOut(Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) {
+ // TODO(VIXL): Implement optimized code paths.
+ // We implemented the simplest solution to get first ART tests passing, we deferred the
+ // optimized path until later, we should implement it using ARM64 implementation as a
+ // reference. The same related to LocationsBuilderARMVIXL::VisitLoadString.
+
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex());
+ codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+}
+
+static int32_t GetExceptionTlsOffset() {
+ return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
+}
+
+void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) {
+ vixl32::Register out = OutputRegister(load);
+ GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset());
+}
+
+
+void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
+ new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ __ Mov(temp, 0);
+ GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset());
+}
+
+void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
+ codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
+ HandleBitwiseOperation(instruction, AND);
+}
+
+void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) {
+ HandleBitwiseOperation(instruction, ORR);
+}
+
+void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) {
+ HandleBitwiseOperation(instruction, EOR);
+}
+
+void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ DCHECK(instruction->GetResultType() == Primitive::kPrimInt
+ || instruction->GetResultType() == Primitive::kPrimLong);
+ // Note: GVN reorders commutative operations to have the constant on the right hand side.
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) {
+ HandleBitwiseOperation(instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) {
+ HandleBitwiseOperation(instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) {
+ HandleBitwiseOperation(instruction);
+}
+
+// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
+void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
+ vixl32::Register first,
+ uint32_t value) {
+ // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
+ if (value == 0xffffffffu) {
+ if (!out.Is(first)) {
+ __ Mov(out, first);
+ }
+ return;
+ }
+ if (value == 0u) {
+ __ Mov(out, 0);
+ return;
+ }
+ if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
+ __ And(out, first, value);
+ } else {
+ DCHECK(GetAssembler()->ShifterOperandCanHold(BIC, ~value));
+ __ Bic(out, first, ~value);
+ }
+}
+
+// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
+void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out,
+ vixl32::Register first,
+ uint32_t value) {
+ // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
+ if (value == 0u) {
+ if (!out.Is(first)) {
+ __ Mov(out, first);
+ }
+ return;
+ }
+ if (value == 0xffffffffu) {
+ __ Mvn(out, 0);
+ return;
+ }
+ if (GetAssembler()->ShifterOperandCanHold(ORR, value)) {
+ __ Orr(out, first, value);
+ } else {
+ DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value));
+ __ Orn(out, first, ~value);
+ }
+}
+
+// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
+void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out,
+ vixl32::Register first,
+ uint32_t value) {
+ // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
+ if (value == 0u) {
+ if (!out.Is(first)) {
+ __ Mov(out, first);
+ }
+ return;
+ }
+ __ Eor(out, first, value);
+}
+
+void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+ Location out = locations->Out();
+
+ if (second.IsConstant()) {
+ uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+ uint32_t value_low = Low32Bits(value);
+ if (instruction->GetResultType() == Primitive::kPrimInt) {
+ vixl32::Register first_reg = InputRegisterAt(instruction, 0);
+ vixl32::Register out_reg = OutputRegister(instruction);
+ if (instruction->IsAnd()) {
+ GenerateAndConst(out_reg, first_reg, value_low);
+ } else if (instruction->IsOr()) {
+ GenerateOrrConst(out_reg, first_reg, value_low);
+ } else {
+ DCHECK(instruction->IsXor());
+ GenerateEorConst(out_reg, first_reg, value_low);
+ }
+ } else {
+ DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+ uint32_t value_high = High32Bits(value);
+ vixl32::Register first_low = LowRegisterFrom(first);
+ vixl32::Register first_high = HighRegisterFrom(first);
+ vixl32::Register out_low = LowRegisterFrom(out);
+ vixl32::Register out_high = HighRegisterFrom(out);
+ if (instruction->IsAnd()) {
+ GenerateAndConst(out_low, first_low, value_low);
+ GenerateAndConst(out_high, first_high, value_high);
+ } else if (instruction->IsOr()) {
+ GenerateOrrConst(out_low, first_low, value_low);
+ GenerateOrrConst(out_high, first_high, value_high);
+ } else {
+ DCHECK(instruction->IsXor());
+ GenerateEorConst(out_low, first_low, value_low);
+ GenerateEorConst(out_high, first_high, value_high);
+ }
+ }
+ return;
+ }
+
+ if (instruction->GetResultType() == Primitive::kPrimInt) {
+ vixl32::Register first_reg = InputRegisterAt(instruction, 0);
+ vixl32::Register second_reg = InputRegisterAt(instruction, 1);
+ vixl32::Register out_reg = OutputRegister(instruction);
+ if (instruction->IsAnd()) {
+ __ And(out_reg, first_reg, second_reg);
+ } else if (instruction->IsOr()) {
+ __ Orr(out_reg, first_reg, second_reg);
+ } else {
+ DCHECK(instruction->IsXor());
+ __ Eor(out_reg, first_reg, second_reg);
+ }
+ } else {
+ DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+ vixl32::Register first_low = LowRegisterFrom(first);
+ vixl32::Register first_high = HighRegisterFrom(first);
+ vixl32::Register second_low = LowRegisterFrom(second);
+ vixl32::Register second_high = HighRegisterFrom(second);
+ vixl32::Register out_low = LowRegisterFrom(out);
+ vixl32::Register out_high = HighRegisterFrom(out);
+ if (instruction->IsAnd()) {
+ __ And(out_low, first_low, second_low);
+ __ And(out_high, first_high, second_high);
+ } else if (instruction->IsOr()) {
+ __ Orr(out_low, first_low, second_low);
+ __ Orr(out_high, first_high, second_high);
+ } else {
+ DCHECK(instruction->IsXor());
+ __ Eor(out_low, first_low, second_low);
+ __ Eor(out_high, first_high, second_high);
+ }
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
+ HInstruction* instruction ATTRIBUTE_UNUSED,
+ Location root,
+ vixl32::Register obj,
+ uint32_t offset,
+ bool requires_read_barrier) {
+ vixl32::Register root_reg = RegisterFrom(root);
+ if (requires_read_barrier) {
+ TODO_VIXL32(FATAL);
+ } else {
+ // Plain GC root load with no read barrier.
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ // Note that GC roots are not affected by heap poisoning, thus we
+ // do not have to unpoison `root_reg` here.
+ }
+}
+
+void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED,
+ Location out,
+ Location ref ATTRIBUTE_UNUSED,
+ Location obj ATTRIBUTE_UNUSED,
+ uint32_t offset ATTRIBUTE_UNUSED,
+ Location index ATTRIBUTE_UNUSED) {
+ if (kEmitCompilerReadBarrier) {
+ DCHECK(!kUseBakerReadBarrier);
+ TODO_VIXL32(FATAL);
+ } else if (kPoisonHeapReferences) {
+ GetAssembler()->UnpoisonHeapReference(RegisterFrom(out));
+ }
+}
+
+// Check if the desired_dispatch_info is supported. If it is, return it,
+// otherwise return a fall-back info that should be used instead.
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info ATTRIBUTE_UNUSED,
+ HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
+ // TODO(VIXL): Implement optimized code paths.
+ return {
+ HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+ HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+ 0u,
+ 0u
+ };
+}
+
+vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter(
+ HInvokeStaticOrDirect* invoke, vixl32::Register temp) {
+ DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+ Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ if (!invoke->GetLocations()->Intrinsified()) {
+ return RegisterFrom(location);
+ }
+ // For intrinsics we allow any location, so it may be on the stack.
+ if (!location.IsRegister()) {
+ GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, location.GetStackIndex());
+ return temp;
+ }
+ // For register locations, check if the register was saved. If so, get it from the stack.
+ // Note: There is a chance that the register was saved but not overwritten, so we could
+ // save one load. However, since this is just an intrinsic slow path we prefer this
+ // simple and more robust approach rather that trying to determine if that's the case.
+ SlowPathCode* slow_path = GetCurrentSlowPath();
+ DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
+ if (slow_path->IsCoreRegisterSaved(RegisterFrom(location).GetCode())) {
+ int stack_offset = slow_path->GetStackOffsetOfCoreRegister(RegisterFrom(location).GetCode());
+ GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, stack_offset);
+ return temp;
+ }
+ return RegisterFrom(location);
+}
+
+void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
+ HInvokeStaticOrDirect* invoke, Location temp) {
+ Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
+ vixl32::Register temp_reg = RegisterFrom(temp);
+
+ switch (invoke->GetMethodLoadKind()) {
+ case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
+ uint32_t offset =
+ GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
+ // temp = thread->string_init_entrypoint
+ GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, tr, offset);
+ break;
+ }
+ case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
+ Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ vixl32::Register method_reg;
+ if (current_method.IsRegister()) {
+ method_reg = RegisterFrom(current_method);
+ } else {
+ TODO_VIXL32(FATAL);
+ }
+ // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
+ GetAssembler()->LoadFromOffset(
+ kLoadWord,
+ temp_reg,
+ method_reg,
+ ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
+ // temp = temp[index_in_cache];
+ // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+ uint32_t index_in_cache = invoke->GetDexMethodIndex();
+ GetAssembler()->LoadFromOffset(
+ kLoadWord, temp_reg, temp_reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
+ break;
+ }
+ default:
+ TODO_VIXL32(FATAL);
+ }
+
+ // TODO(VIXL): Support `CodePtrLocation` values other than `kCallArtMethod`.
+ if (invoke->GetCodePtrLocation() != HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod) {
+ TODO_VIXL32(FATAL);
+ }
+
+ // LR = callee_method->entry_point_from_quick_compiled_code_
+ GetAssembler()->LoadFromOffset(
+ kLoadWord,
+ lr,
+ RegisterFrom(callee_method),
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+ // LR()
+ __ Blx(lr);
+
+ DCHECK(!IsLeafMethod());
+}
+
+void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
+ vixl32::Register temp = RegisterFrom(temp_location);
+ uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+ invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
+
+ // Use the calling convention instead of the location of the receiver, as
+ // intrinsics may have put the receiver in a different register. In the intrinsics
+ // slow path, the arguments have been moved to the right place, so here we are
+ // guaranteed that the receiver is the first register of the calling convention.
+ InvokeDexCallingConventionARMVIXL calling_convention;
+ vixl32::Register receiver = calling_convention.GetRegisterAt(0);
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ // /* HeapReference<Class> */ temp = receiver->klass_
+ GetAssembler()->LoadFromOffset(kLoadWord, temp, receiver, class_offset);
+ MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+ // temp = temp->GetMethodAt(method_offset);
+ uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArmPointerSize).Int32Value();
+ GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
+ // LR = temp->GetEntryPoint();
+ GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
+ // LR();
+ __ Blx(lr);
+}
+
+// Copy the result of a call into the given target.
+void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
+ Primitive::Type type ATTRIBUTE_UNUSED) {
+ TODO_VIXL32(FATAL);
+}
+
+#undef __
+#undef QUICK_ENTRY_POINT
+#undef TODO_VIXL32
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
new file mode 100644
index 0000000000..02bf960e18
--- /dev/null
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -0,0 +1,566 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_
+
+#include "code_generator_arm.h"
+#include "utils/arm/assembler_arm_vixl.h"
+
+// TODO(VIXL): make vixl clean wrt -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch32/constants-aarch32.h"
+#include "aarch32/instructions-aarch32.h"
+#include "aarch32/macro-assembler-aarch32.h"
+#pragma GCC diagnostic pop
+
+// True if VIXL32 should be used for codegen on ARM.
+#ifdef ART_USE_VIXL_ARM_BACKEND
+static constexpr bool kArmUseVIXL32 = true;
+#else
+static constexpr bool kArmUseVIXL32 = false;
+#endif
+
+namespace art {
+namespace arm {
+
+static const vixl::aarch32::Register kParameterCoreRegistersVIXL[] = {
+ vixl::aarch32::r1,
+ vixl::aarch32::r2,
+ vixl::aarch32::r3
+};
+static const size_t kParameterCoreRegistersLengthVIXL = arraysize(kParameterCoreRegisters);
+static const vixl::aarch32::SRegister kParameterFpuRegistersVIXL[] = {
+ vixl::aarch32::s0,
+ vixl::aarch32::s1,
+ vixl::aarch32::s2,
+ vixl::aarch32::s3,
+ vixl::aarch32::s4,
+ vixl::aarch32::s5,
+ vixl::aarch32::s6,
+ vixl::aarch32::s7,
+ vixl::aarch32::s8,
+ vixl::aarch32::s9,
+ vixl::aarch32::s10,
+ vixl::aarch32::s11,
+ vixl::aarch32::s12,
+ vixl::aarch32::s13,
+ vixl::aarch32::s14,
+ vixl::aarch32::s15
+};
+static const size_t kParameterFpuRegistersLengthVIXL = arraysize(kParameterFpuRegisters);
+
+static const vixl::aarch32::Register kMethodRegister = vixl::aarch32::r0;
+
+static const vixl::aarch32::Register kCoreAlwaysSpillRegister = vixl::aarch32::r5;
+
+// Callee saves core registers r5, r6, r7, r8, r10, r11, and lr.
+static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::RegisterList::Union(
+ vixl::aarch32::RegisterList(vixl::aarch32::r5,
+ vixl::aarch32::r6,
+ vixl::aarch32::r7,
+ vixl::aarch32::r8),
+ vixl::aarch32::RegisterList(vixl::aarch32::r10,
+ vixl::aarch32::r11,
+ vixl::aarch32::lr));
+
+// Callee saves FP registers s16 to s31 inclusive.
+static const vixl::aarch32::SRegisterList kFpuCalleeSaves =
+ vixl::aarch32::SRegisterList(vixl::aarch32::s16, 16);
+
+static const vixl::aarch32::Register kRuntimeParameterCoreRegistersVIXL[] = {
+ vixl::aarch32::r0,
+ vixl::aarch32::r1,
+ vixl::aarch32::r2,
+ vixl::aarch32::r3
+};
+static const size_t kRuntimeParameterCoreRegistersLengthVIXL =
+ arraysize(kRuntimeParameterCoreRegisters);
+static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = {
+ vixl::aarch32::s0,
+ vixl::aarch32::s1,
+ vixl::aarch32::s2,
+ vixl::aarch32::s3
+};
+static const size_t kRuntimeParameterFpuRegistersLengthVIXL =
+ arraysize(kRuntimeParameterFpuRegisters);
+
+class LoadClassSlowPathARMVIXL;
+
+#define FOR_EACH_IMPLEMENTED_INSTRUCTION(M) \
+ M(Above) \
+ M(AboveOrEqual) \
+ M(Add) \
+ M(And) \
+ M(ArrayLength) \
+ M(Below) \
+ M(BelowOrEqual) \
+ M(ClearException) \
+ M(ClinitCheck) \
+ M(Compare) \
+ M(CurrentMethod) \
+ M(Div) \
+ M(DivZeroCheck) \
+ M(DoubleConstant) \
+ M(Equal) \
+ M(Exit) \
+ M(FloatConstant) \
+ M(Goto) \
+ M(GreaterThan) \
+ M(GreaterThanOrEqual) \
+ M(If) \
+ M(InstanceFieldGet) \
+ M(InstanceFieldSet) \
+ M(IntConstant) \
+ M(InvokeStaticOrDirect) \
+ M(InvokeVirtual) \
+ M(LessThan) \
+ M(LessThanOrEqual) \
+ M(LoadClass) \
+ M(LoadException) \
+ M(LoadString) \
+ M(LongConstant) \
+ M(MemoryBarrier) \
+ M(Mul) \
+ M(Neg) \
+ M(NewArray) \
+ M(NewInstance) \
+ M(Not) \
+ M(NotEqual) \
+ M(NullCheck) \
+ M(NullConstant) \
+ M(Or) \
+ M(ParallelMove) \
+ M(ParameterValue) \
+ M(Phi) \
+ M(Return) \
+ M(ReturnVoid) \
+ M(Ror) \
+ M(Select) \
+ M(Shl) \
+ M(Shr) \
+ M(StaticFieldGet) \
+ M(Sub) \
+ M(SuspendCheck) \
+ M(Throw) \
+ M(TryBoundary) \
+ M(TypeConversion) \
+ M(UShr) \
+ M(Xor) \
+
+// TODO: Remove once the VIXL32 backend is implemented completely.
+#define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \
+ M(ArrayGet) \
+ M(ArraySet) \
+ M(BooleanNot) \
+ M(BoundsCheck) \
+ M(BoundType) \
+ M(CheckCast) \
+ M(ClassTableGet) \
+ M(Deoptimize) \
+ M(InstanceOf) \
+ M(InvokeInterface) \
+ M(InvokeUnresolved) \
+ M(MonitorOperation) \
+ M(NativeDebugInfo) \
+ M(PackedSwitch) \
+ M(Rem) \
+ M(StaticFieldSet) \
+ M(UnresolvedInstanceFieldGet) \
+ M(UnresolvedInstanceFieldSet) \
+ M(UnresolvedStaticFieldGet) \
+ M(UnresolvedStaticFieldSet) \
+
+class CodeGeneratorARMVIXL;
+
+class InvokeRuntimeCallingConventionARMVIXL
+ : public CallingConvention<vixl::aarch32::Register, vixl::aarch32::SRegister> {
+ public:
+ InvokeRuntimeCallingConventionARMVIXL()
+ : CallingConvention(kRuntimeParameterCoreRegistersVIXL,
+ kRuntimeParameterCoreRegistersLengthVIXL,
+ kRuntimeParameterFpuRegistersVIXL,
+ kRuntimeParameterFpuRegistersLengthVIXL,
+ kArmPointerSize) {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConventionARMVIXL);
+};
+
+class InvokeDexCallingConventionARMVIXL
+ : public CallingConvention<vixl::aarch32::Register, vixl::aarch32::SRegister> {
+ public:
+ InvokeDexCallingConventionARMVIXL()
+ : CallingConvention(kParameterCoreRegistersVIXL,
+ kParameterCoreRegistersLengthVIXL,
+ kParameterFpuRegistersVIXL,
+ kParameterFpuRegistersLengthVIXL,
+ kArmPointerSize) {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionARMVIXL);
+};
+
+class SlowPathCodeARMVIXL : public SlowPathCode {
+ public:
+ explicit SlowPathCodeARMVIXL(HInstruction* instruction)
+ : SlowPathCode(instruction), entry_label_(), exit_label_() {}
+
+ vixl::aarch32::Label* GetEntryLabel() { return &entry_label_; }
+ vixl::aarch32::Label* GetExitLabel() { return &exit_label_; }
+
+ void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
+ void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
+
+ private:
+ vixl::aarch32::Label entry_label_;
+ vixl::aarch32::Label exit_label_;
+
+ DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARMVIXL);
+};
+
+class ParallelMoveResolverARMVIXL : public ParallelMoveResolverWithSwap {
+ public:
+ ParallelMoveResolverARMVIXL(ArenaAllocator* allocator, CodeGeneratorARMVIXL* codegen)
+ : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
+
+ void EmitMove(size_t index) OVERRIDE;
+ void EmitSwap(size_t index) OVERRIDE;
+ void SpillScratch(int reg) OVERRIDE;
+ void RestoreScratch(int reg) OVERRIDE;
+
+ ArmVIXLAssembler* GetAssembler() const;
+
+ private:
+ void Exchange(vixl32::Register reg, int mem);
+ void Exchange(int mem1, int mem2);
+
+ CodeGeneratorARMVIXL* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARMVIXL);
+};
+
+#define DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR(Name) \
+ void Visit##Name(H##Name*) OVERRIDE;
+
+#define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR(Name) \
+ void Visit##Name(H##Name* instr) OVERRIDE { \
+ VisitUnimplemementedInstruction(instr); }
+
+class LocationsBuilderARMVIXL : public HGraphVisitor {
+ public:
+ LocationsBuilderARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen)
+ : HGraphVisitor(graph), codegen_(codegen) {}
+
+ FOR_EACH_IMPLEMENTED_INSTRUCTION(DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR)
+
+ FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR)
+
+ private:
+ void VisitUnimplemementedInstruction(HInstruction* instruction) {
+ LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName();
+ }
+
+ void HandleInvoke(HInvoke* invoke);
+ void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
+ void HandleCondition(HCondition* condition);
+ void HandleIntegerRotate(LocationSummary* locations);
+ void HandleLongRotate(LocationSummary* locations);
+ void HandleShift(HBinaryOperation* operation);
+ void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+ Location ArithmeticZeroOrFpuRegister(HInstruction* input);
+ Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode);
+ bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode);
+ bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare);
+
+ CodeGeneratorARMVIXL* const codegen_;
+ InvokeDexCallingConventionVisitorARM parameter_visitor_;
+
+ DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARMVIXL);
+};
+
+class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
+ public:
+ InstructionCodeGeneratorARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen);
+
+ FOR_EACH_IMPLEMENTED_INSTRUCTION(DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR)
+
+ FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR)
+
+ ArmVIXLAssembler* GetAssembler() const { return assembler_; }
+ vixl::aarch32::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
+
+ private:
+ void VisitUnimplemementedInstruction(HInstruction* instruction) {
+ LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName();
+ }
+
+ // Generate code for the given suspend check. If not null, `successor`
+ // is the block to branch to if the suspend check is not needed, and after
+ // the suspend call.
+ void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
+ void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path,
+ vixl32::Register class_reg);
+ void HandleGoto(HInstruction* got, HBasicBlock* successor);
+ void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
+ void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
+ void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
+ void HandleBitwiseOperation(HBinaryOperation* operation);
+ void HandleCondition(HCondition* condition);
+ void HandleIntegerRotate(HRor* ror);
+ void HandleLongRotate(HRor* ror);
+ void HandleShift(HBinaryOperation* operation);
+
+ void GenerateWideAtomicStore(vixl::aarch32::Register addr,
+ uint32_t offset,
+ vixl::aarch32::Register value_lo,
+ vixl::aarch32::Register value_hi,
+ vixl::aarch32::Register temp1,
+ vixl::aarch32::Register temp2,
+ HInstruction* instruction);
+ void GenerateWideAtomicLoad(vixl::aarch32::Register addr,
+ uint32_t offset,
+ vixl::aarch32::Register out_lo,
+ vixl::aarch32::Register out_hi);
+
+ void HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ bool value_can_be_null);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers if `requires_read_barrier` is true.
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ vixl::aarch32::Register obj,
+ uint32_t offset,
+ bool requires_read_barrier);
+ void GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
+ vixl::aarch32::Label* true_target,
+ vixl::aarch32::Label* false_target);
+ void GenerateCompareTestAndBranch(HCondition* condition,
+ vixl::aarch32::Label* true_target,
+ vixl::aarch32::Label* false_target);
+ void GenerateVcmp(HInstruction* instruction);
+ void GenerateFPJumps(HCondition* cond,
+ vixl::aarch32::Label* true_label,
+ vixl::aarch32::Label* false_label);
+ void GenerateLongComparesAndJumps(HCondition* cond,
+ vixl::aarch32::Label* true_label,
+ vixl::aarch32::Label* false_label);
+ void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+ void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+ void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+ void GenerateDivRemConstantIntegral(HBinaryOperation* instruction);
+
+ ArmVIXLAssembler* const assembler_;
+ CodeGeneratorARMVIXL* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorARMVIXL);
+};
+
+class CodeGeneratorARMVIXL : public CodeGenerator {
+ public:
+ CodeGeneratorARMVIXL(HGraph* graph,
+ const ArmInstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options,
+ OptimizingCompilerStats* stats = nullptr);
+
+ virtual ~CodeGeneratorARMVIXL() {}
+
+ void Initialize() OVERRIDE {
+ block_labels_.resize(GetGraph()->GetBlocks().size());
+ }
+
+ void GenerateFrameEntry() OVERRIDE;
+ void GenerateFrameExit() OVERRIDE;
+
+ void Bind(HBasicBlock* block) OVERRIDE;
+
+ vixl::aarch32::Label* GetLabelOf(HBasicBlock* block) {
+ block = FirstNonEmptyBlock(block);
+ return &(block_labels_[block->GetBlockId()]);
+ }
+
+ void MoveConstant(Location destination, int32_t value) OVERRIDE;
+ void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
+ void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+
+ ArmVIXLAssembler* GetAssembler() OVERRIDE { return &assembler_; }
+
+ const ArmVIXLAssembler& GetAssembler() const OVERRIDE { return assembler_; }
+
+ vixl::aarch32::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
+
+ size_t GetWordSize() const OVERRIDE { return kArmWordSize; }
+
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return vixl::aarch32::kRegSizeInBytes; }
+
+ uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
+ vixl::aarch32::Label* block_entry_label = GetLabelOf(block);
+ DCHECK(block_entry_label->IsBound());
+ return block_entry_label->GetLocation();
+ }
+
+ HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
+
+ HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
+
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+ void Finalize(CodeAllocator* allocator) OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
+
+ void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
+ void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+
+ InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kThumb2; }
+
+ // Helper method to move a 32-bit value between two locations.
+ void Move32(Location destination, Location source);
+
+ const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; }
+
+ vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; }
+
+ // Saves the register in the stack. Returns the size taken on stack.
+ size_t SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
+ uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE {
+ UNIMPLEMENTED(INFO) << "TODO: SaveCoreRegister";
+ return 0;
+ }
+
+ // Restores the register from the stack. Returns the size taken on stack.
+ size_t RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
+ uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE {
+ UNIMPLEMENTED(INFO) << "TODO: RestoreCoreRegister";
+ return 0;
+ }
+
+ size_t SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
+ uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE {
+ UNIMPLEMENTED(INFO) << "TODO: SaveFloatingPointRegister";
+ return 0;
+ }
+
+ size_t RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
+ uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE {
+ UNIMPLEMENTED(INFO) << "TODO: RestoreFloatingPointRegister";
+ return 0;
+ }
+
+ bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
+ return type == Primitive::kPrimDouble || type == Primitive::kPrimLong;
+ }
+
+ void ComputeSpillMask() OVERRIDE;
+
+ void GenerateImplicitNullCheck(HNullCheck* null_check) OVERRIDE;
+ void GenerateExplicitNullCheck(HNullCheck* null_check) OVERRIDE;
+
+ ParallelMoveResolver* GetMoveResolver() OVERRIDE {
+ return &move_resolver_;
+ }
+
+ // Generate code to invoke a runtime entry point.
+ void InvokeRuntime(QuickEntrypointEnum entrypoint,
+ HInstruction* instruction,
+ uint32_t dex_pc,
+ SlowPathCode* slow_path = nullptr) OVERRIDE;
+
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
+ void GenerateInvokeRuntime(int32_t entry_point_offset);
+
+ // Emit a write barrier.
+ void MarkGCCard(vixl::aarch32::Register temp,
+ vixl::aarch32::Register card,
+ vixl::aarch32::Register object,
+ vixl::aarch32::Register value,
+ bool can_be_null);
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Check if the desired_string_load_kind is supported. If it is, return it,
+ // otherwise return a fall-back kind that should be used instead.
+ HLoadString::LoadKind GetSupportedLoadStringKind(
+ HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
+ // Check if the desired_class_load_kind is supported. If it is, return it,
+ // otherwise return a fall-back kind that should be used instead.
+ HLoadClass::LoadKind GetSupportedLoadClassKind(
+ HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
+ // Check if the desired_dispatch_info is supported. If it is, return it,
+ // otherwise return a fall-back info that should be used instead.
+ HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+ HInvokeStaticOrDirect* invoke) OVERRIDE;
+
+ void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
+ void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+
+ void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
+
+ void GenerateNop() OVERRIDE;
+
+ private:
+ vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+ vixl::aarch32::Register temp);
+
+ // Labels for each block that will be compiled.
+ // We use a deque so that the `vixl::aarch32::Label` objects do not move in memory.
+ ArenaDeque<vixl::aarch32::Label> block_labels_; // Indexed by block id.
+ vixl::aarch32::Label frame_entry_label_;
+
+ LocationsBuilderARMVIXL location_builder_;
+ InstructionCodeGeneratorARMVIXL instruction_visitor_;
+ ParallelMoveResolverARMVIXL move_resolver_;
+
+ ArmVIXLAssembler assembler_;
+ const ArmInstructionSetFeatures& isa_features_;
+
+ DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL);
+};
+
+#undef FOR_EACH_IMPLEMENTED_INSTRUCTION
+#undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION
+#undef DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR
+#undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR
+
+
+} // namespace arm
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8dd82ef9cb..f4a804f70c 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -20,6 +20,7 @@
#include "arch/mips/instruction_set_features_mips.h"
#include "art_method.h"
#include "code_generator_utils.h"
+#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
@@ -145,8 +146,8 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type)
return MipsReturnLocation(type);
}
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value()
class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
@@ -170,14 +171,10 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
locations->InAt(1),
Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
Primitive::kPrimInt);
- uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
- ? QUICK_ENTRY_POINT(pThrowStringBounds)
- : QUICK_ENTRY_POINT(pThrowArrayBounds);
- mips_codegen->InvokeRuntime(entry_point_offset,
- instruction_,
- instruction_->GetDexPc(),
- this,
- IsDirectEntrypoint(kQuickThrowArrayBounds));
+ QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
+ ? kQuickThrowStringBounds
+ : kQuickThrowArrayBounds;
+ mips_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
@@ -197,15 +194,7 @@ class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
- if (instruction_->CanThrowIntoCatchBlock()) {
- // Live registers will be restored in the catch block if caught.
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- }
- mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
- instruction_,
- instruction_->GetDexPc(),
- this,
- IsDirectEntrypoint(kQuickThrowDivZero));
+ mips_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
@@ -237,12 +226,9 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
InvokeRuntimeCallingConvention calling_convention;
__ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
- int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
- : QUICK_ENTRY_POINT(pInitializeType);
- bool direct = do_clinit_ ? IsDirectEntrypoint(kQuickInitializeStaticStorage)
- : IsDirectEntrypoint(kQuickInitializeType);
-
- mips_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this, direct);
+ QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
+ : kQuickInitializeType;
+ mips_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
if (do_clinit_) {
CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
} else {
@@ -293,13 +279,10 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
- const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+ HLoadString* load = instruction_->AsLoadString();
+ const uint32_t string_index = load->GetStringIndex();
__ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
- mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
- instruction_,
- instruction_->GetDexPc(),
- this,
- IsDirectEntrypoint(kQuickResolveString));
+ mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
Primitive::Type type = instruction_->GetType();
mips_codegen->MoveLocation(locations->Out(),
@@ -307,6 +290,19 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
type);
RestoreLiveRegisters(codegen, locations);
+
+ // Store the resolved String to the BSS entry.
+ // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the
+ // .bss entry address in the fast path, so that we can avoid another calculation here.
+ bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+ Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+ DCHECK_NE(out, AT);
+ CodeGeneratorMIPS::PcRelativePatchInfo* info =
+ mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+ mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base);
+ __ StoreToOffset(kStoreWord, out, TMP, 0);
+
__ B(GetExitLabel());
}
@@ -327,11 +323,10 @@ class NullCheckSlowPathMIPS : public SlowPathCodeMIPS {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
- mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
+ mips_codegen->InvokeRuntime(kQuickThrowNullPointer,
instruction_,
instruction_->GetDexPc(),
- this,
- IsDirectEntrypoint(kQuickThrowNullPointer));
+ this);
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
@@ -351,14 +346,8 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
- instruction_,
- instruction_->GetDexPc(),
- this,
- IsDirectEntrypoint(kQuickTestSuspend));
+ mips_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
@@ -409,11 +398,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
Primitive::kPrimNot);
if (instruction_->IsInstanceOf()) {
- mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
- instruction_,
- dex_pc,
- this,
- IsDirectEntrypoint(kQuickInstanceofNonTrivial));
+ mips_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
CheckEntrypointTypes<
kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
@@ -421,11 +406,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
} else {
DCHECK(instruction_->IsCheckCast());
- mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
- instruction_,
- dex_pc,
- this,
- IsDirectEntrypoint(kQuickCheckCast));
+ mips_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
}
@@ -447,12 +428,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
- instruction_,
- instruction_->GetDexPc(),
- this,
- IsDirectEntrypoint(kQuickDeoptimize));
+ mips_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -503,8 +479,8 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
}
#undef __
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value()
void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) {
@@ -699,16 +675,17 @@ void CodeGeneratorMIPS::ComputeSpillMask() {
if ((fpu_spill_mask_ != 0) && (POPCOUNT(core_spill_mask_) % 2 != 0)) {
core_spill_mask_ |= (1 << ZERO);
}
+}
+
+bool CodeGeneratorMIPS::HasAllocatedCalleeSaveRegisters() const {
// If RA is clobbered by PC-relative operations on R2 and it's the only spilled register
- // (this can happen in leaf methods), artificially spill the ZERO register in order to
- // force explicit saving and restoring of RA. RA isn't saved/restored when it's the only
- // spilled register.
+ // (this can happen in leaf methods), force CodeGenerator::InitializeCodeGeneration()
+ // into the path that creates a stack frame so that RA can be explicitly saved and restored.
+ // RA can't otherwise be saved/restored when it's the only spilled register.
// TODO: Can this be improved? It causes creation of a stack frame (while RA might be
// saved in an unused temporary register) and saving of RA and the current method pointer
// in the frame.
- if (clobbered_ra_ && core_spill_mask_ == (1u << RA) && fpu_spill_mask_ == 0) {
- core_spill_mask_ |= (1 << ZERO);
- }
+ return CodeGenerator::HasAllocatedCalleeSaveRegisters() || clobbered_ra_;
}
static dwarf::Reg DWARFReg(Register reg) {
@@ -731,6 +708,9 @@ void CodeGeneratorMIPS::GenerateFrameEntry() {
}
if (HasEmptyFrame()) {
+ CHECK_EQ(fpu_spill_mask_, 0u);
+ CHECK_EQ(core_spill_mask_, 1u << RA);
+ CHECK(!clobbered_ra_);
return;
}
@@ -763,8 +743,12 @@ void CodeGeneratorMIPS::GenerateFrameEntry() {
// TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
}
- // Store the current method pointer.
- __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+ // Save the current method if we need it. Note that we do not
+ // do this in HCurrentMethod, as the instruction might have been removed
+ // in the SSA graph.
+ if (RequiresCurrentMethod()) {
+ __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+ }
}
void CodeGeneratorMIPS::GenerateFrameExit() {
@@ -794,12 +778,24 @@ void CodeGeneratorMIPS::GenerateFrameExit() {
// TODO: __ cfi().Restore(DWARFReg(reg));
}
- __ DecreaseFrameSize(GetFrameSize());
+ size_t frame_size = GetFrameSize();
+ // Adjust the stack pointer in the delay slot if doing so doesn't break CFI.
+ bool exchange = IsInt<16>(static_cast<int32_t>(frame_size));
+ bool reordering = __ SetReorder(false);
+ if (exchange) {
+ __ Jr(RA);
+ __ DecreaseFrameSize(frame_size); // Single instruction in delay slot.
+ } else {
+ __ DecreaseFrameSize(frame_size);
+ __ Jr(RA);
+ __ Nop(); // In delay slot.
+ }
+ __ SetReorder(reordering);
+ } else {
+ __ Jr(RA);
+ __ NopIfNoReordering();
}
- __ Jr(RA);
- __ Nop();
-
__ cfi().RestoreState();
__ cfi().DefCFAOffset(GetFrameSize());
}
@@ -922,7 +918,7 @@ void CodeGeneratorMIPS::MoveConstant(Location destination, HConstant* c) {
} else {
DCHECK(destination.IsStackSlot())
<< "Cannot move " << c->DebugName() << " to " << destination;
- __ StoreConst32ToOffset(value, SP, destination.GetStackIndex(), TMP);
+ __ StoreConstToOffset(kStoreWord, value, SP, destination.GetStackIndex(), TMP);
}
} else if (c->IsLongConstant()) {
// Move 64 bit constant.
@@ -934,7 +930,7 @@ void CodeGeneratorMIPS::MoveConstant(Location destination, HConstant* c) {
} else {
DCHECK(destination.IsDoubleStackSlot())
<< "Cannot move " << c->DebugName() << " to " << destination;
- __ StoreConst64ToOffset(value, SP, destination.GetStackIndex(), TMP);
+ __ StoreConstToOffset(kStoreDoubleword, value, SP, destination.GetStackIndex(), TMP);
}
} else if (c->IsFloatConstant()) {
// Move 32 bit float constant.
@@ -944,7 +940,7 @@ void CodeGeneratorMIPS::MoveConstant(Location destination, HConstant* c) {
} else {
DCHECK(destination.IsStackSlot())
<< "Cannot move " << c->DebugName() << " to " << destination;
- __ StoreConst32ToOffset(value, SP, destination.GetStackIndex(), TMP);
+ __ StoreConstToOffset(kStoreWord, value, SP, destination.GetStackIndex(), TMP);
}
} else {
// Move 64 bit double constant.
@@ -956,7 +952,7 @@ void CodeGeneratorMIPS::MoveConstant(Location destination, HConstant* c) {
} else {
DCHECK(destination.IsDoubleStackSlot())
<< "Cannot move " << c->DebugName() << " to " << destination;
- __ StoreConst64ToOffset(value, SP, destination.GetStackIndex(), TMP);
+ __ StoreConstToOffset(kStoreDoubleword, value, SP, destination.GetStackIndex(), TMP);
}
}
}
@@ -978,6 +974,24 @@ void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* lo
}
}
+template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches(
+ const ArenaDeque<PcRelativePatchInfo>& infos,
+ ArenaVector<LinkerPatch>* linker_patches) {
+ for (const PcRelativePatchInfo& info : infos) {
+ const DexFile& dex_file = info.target_dex_file;
+ size_t offset_or_index = info.offset_or_index;
+ DCHECK(info.high_label.IsBound());
+ uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+ // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
+ // the assembler's base label used for PC-relative addressing.
+ uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
+ ? __ GetLabelLocation(&info.pc_rel_label)
+ : __ GetPcRelBaseLabelLocation();
+ linker_patches->push_back(Factory(high_offset, &dex_file, pc_rel_offset, offset_or_index));
+ }
+}
+
void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -1008,48 +1022,17 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch
target_method.dex_file,
target_method.dex_method_index));
}
- for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
- const DexFile& dex_file = info.target_dex_file;
- size_t base_element_offset = info.offset_or_index;
- DCHECK(info.high_label.IsBound());
- uint32_t high_offset = __ GetLabelLocation(&info.high_label);
- DCHECK(info.pc_rel_label.IsBound());
- uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label);
- linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(high_offset,
- &dex_file,
- pc_rel_offset,
- base_element_offset));
- }
- for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
- const DexFile& dex_file = info.target_dex_file;
- size_t string_index = info.offset_or_index;
- DCHECK(info.high_label.IsBound());
- uint32_t high_offset = __ GetLabelLocation(&info.high_label);
- // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
- // the assembler's base label used for PC-relative literals.
- uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
- ? __ GetLabelLocation(&info.pc_rel_label)
- : __ GetPcRelBaseLabelLocation();
- linker_patches->push_back(LinkerPatch::RelativeStringPatch(high_offset,
- &dex_file,
- pc_rel_offset,
- string_index));
- }
- for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
- const DexFile& dex_file = info.target_dex_file;
- size_t type_index = info.offset_or_index;
- DCHECK(info.high_label.IsBound());
- uint32_t high_offset = __ GetLabelLocation(&info.high_label);
- // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
- // the assembler's base label used for PC-relative literals.
- uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
- ? __ GetLabelLocation(&info.pc_rel_label)
- : __ GetPcRelBaseLabelLocation();
- linker_patches->push_back(LinkerPatch::RelativeTypePatch(high_offset,
- &dex_file,
- pc_rel_offset,
- type_index));
+ EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
+ linker_patches);
+ if (!GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ linker_patches);
+ } else {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
+ linker_patches);
}
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+ linker_patches);
for (const auto& entry : boot_image_string_patches_) {
const StringReference& target_string = entry.first;
Literal* literal = entry.second;
@@ -1139,6 +1122,36 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address)
return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
}
+void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholder(
+ PcRelativePatchInfo* info, Register out, Register base) {
+ bool reordering = __ SetReorder(false);
+ if (GetInstructionSetFeatures().IsR6()) {
+ DCHECK_EQ(base, ZERO);
+ __ Bind(&info->high_label);
+ __ Bind(&info->pc_rel_label);
+ // Add a 32-bit offset to PC.
+ __ Auipc(out, /* placeholder */ 0x1234);
+ __ Addiu(out, out, /* placeholder */ 0x5678);
+ } else {
+ // If base is ZERO, emit NAL to obtain the actual base.
+ if (base == ZERO) {
+ // Generate a dummy PC-relative call to obtain PC.
+ __ Nal();
+ }
+ __ Bind(&info->high_label);
+ __ Lui(out, /* placeholder */ 0x1234);
+ // If we emitted the NAL, bind the pc_rel_label, otherwise base is a register holding
+ // the HMipsComputeBaseMethodAddress which has its own label stored in MipsAssembler.
+ if (base == ZERO) {
+ __ Bind(&info->pc_rel_label);
+ }
+ __ Ori(out, out, /* placeholder */ 0x5678);
+ // Add a 32-bit offset to PC.
+ __ Addu(out, out, (base == ZERO) ? RA : base);
+ }
+ __ SetReorder(reordering);
+}
+
void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
MipsLabel done;
Register card = AT;
@@ -1155,9 +1168,6 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
}
void CodeGeneratorMIPS::SetupBlockedRegisters() const {
- // Don't allocate the dalvik style register pair passing.
- blocked_register_pairs_[A1_A2] = true;
-
// ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
blocked_core_registers_[ZERO] = true;
blocked_core_registers_[K0] = true;
@@ -1192,19 +1202,6 @@ void CodeGeneratorMIPS::SetupBlockedRegisters() const {
blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
}
}
-
- UpdateBlockedPairRegisters();
-}
-
-void CodeGeneratorMIPS::UpdateBlockedPairRegisters() const {
- for (int i = 0; i < kNumberOfRegisterPairs; i++) {
- MipsManagedRegister current =
- MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
- if (blocked_core_registers_[current.AsRegisterPairLow()]
- || blocked_core_registers_[current.AsRegisterPairHigh()]) {
- blocked_register_pairs_[i] = true;
- }
- }
}
size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
@@ -1235,27 +1232,17 @@ void CodeGeneratorMIPS::DumpFloatingPointRegister(std::ostream& stream, int reg)
stream << FRegister(reg);
}
-void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(),
- instruction,
- dex_pc,
- slow_path,
- IsDirectEntrypoint(entrypoint));
-}
-
constexpr size_t kMipsDirectEntrypointRuntimeOffset = 16;
-void CodeGeneratorMIPS::InvokeRuntime(int32_t entry_point_offset,
+void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path,
- bool is_direct_entrypoint) {
- __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+ bool reordering = __ SetReorder(false);
+ __ LoadFromOffset(kLoadWord, T9, TR, GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value());
__ Jalr(T9);
- if (is_direct_entrypoint) {
+ if (IsDirectEntrypoint(entrypoint)) {
// Reserve argument space on stack (for $a0-$a3) for
// entrypoints that directly reference native implementations.
// Called function may use this space to store $a0-$a3 regs.
@@ -1264,7 +1251,10 @@ void CodeGeneratorMIPS::InvokeRuntime(int32_t entry_point_offset,
} else {
__ Nop(); // In delay slot.
}
- RecordPcInfo(instruction, dex_pc, slow_path);
+ __ SetReorder(reordering);
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
}
void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path,
@@ -1835,11 +1825,19 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) {
}
}
+auto InstructionCodeGeneratorMIPS::GetImplicitNullChecker(HInstruction* instruction) {
+ auto null_checker = [this, instruction]() {
+ this->codegen_->MaybeRecordImplicitNullCheck(instruction);
+ };
+ return null_checker;
+}
+
void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
Register obj = locations->InAt(0).AsRegister<Register>();
Location index = locations->InAt(1);
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+ auto null_checker = GetImplicitNullChecker(instruction);
Primitive::Type type = instruction->GetType();
switch (type) {
@@ -1848,10 +1846,10 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+ __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker);
} else {
__ Addu(TMP, obj, index.AsRegister<Register>());
- __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1861,10 +1859,10 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
+ __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker);
} else {
__ Addu(TMP, obj, index.AsRegister<Register>());
- __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1874,11 +1872,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
+ __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_2);
__ Addu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1888,11 +1886,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_2);
__ Addu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1904,11 +1902,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadFromOffset(kLoadWord, out, obj, offset);
+ __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
__ Addu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadWord, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1918,11 +1916,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadFromOffset(kLoadDoubleword, out, obj, offset);
+ __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
__ Addu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1932,11 +1930,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadSFromOffset(out, obj, offset);
+ __ LoadSFromOffset(out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
__ Addu(TMP, obj, TMP);
- __ LoadSFromOffset(out, TMP, data_offset);
+ __ LoadSFromOffset(out, TMP, data_offset, null_checker);
}
break;
}
@@ -1946,11 +1944,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadDFromOffset(out, obj, offset);
+ __ LoadDFromOffset(out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
__ Addu(TMP, obj, TMP);
- __ LoadDFromOffset(out, TMP, data_offset);
+ __ LoadDFromOffset(out, TMP, data_offset, null_checker);
}
break;
}
@@ -1959,7 +1957,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
}
void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) {
@@ -1977,6 +1974,25 @@ void InstructionCodeGeneratorMIPS::VisitArrayLength(HArrayLength* instruction) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
+Location LocationsBuilderMIPS::RegisterOrZeroConstant(HInstruction* instruction) {
+ return (instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern())
+ ? Location::ConstantLocation(instruction->AsConstant())
+ : Location::RequiresRegister();
+}
+
+Location LocationsBuilderMIPS::FpuRegisterOrConstantForStore(HInstruction* instruction) {
+ // We can store 0.0 directly (from the ZERO register) without loading it into an FPU register.
+ // We can store a non-zero float or double constant without first loading it into the FPU,
+ // but we should only prefer this if the constant has a single use.
+ if (instruction->IsConstant() &&
+ (instruction->AsConstant()->IsZeroBitPattern() ||
+ instruction->GetUses().HasExactlyOneElement())) {
+ return Location::ConstantLocation(instruction->AsConstant());
+ // Otherwise fall through and require an FPU register for the constant.
+ }
+ return Location::RequiresFpuRegister();
+}
+
void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) {
bool needs_runtime_call = instruction->NeedsTypeCheck();
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
@@ -1991,9 +2007,9 @@ void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
- locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
} else {
- locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
}
}
}
@@ -2002,23 +2018,29 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
LocationSummary* locations = instruction->GetLocations();
Register obj = locations->InAt(0).AsRegister<Register>();
Location index = locations->InAt(1);
+ Location value_location = locations->InAt(2);
Primitive::Type value_type = instruction->GetComponentType();
bool needs_runtime_call = locations->WillCall();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ auto null_checker = GetImplicitNullChecker(instruction);
+ Register base_reg = index.IsConstant() ? obj : TMP;
switch (value_type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
- Register value = locations->InAt(2).AsRegister<Register>();
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ StoreToOffset(kStoreByte, value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1;
} else {
- __ Addu(TMP, obj, index.AsRegister<Register>());
- __ StoreToOffset(kStoreByte, value, TMP, data_offset);
+ __ Addu(base_reg, obj, index.AsRegister<Register>());
+ }
+ if (value_location.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreByte, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ Register value = value_location.AsRegister<Register>();
+ __ StoreToOffset(kStoreByte, value, base_reg, data_offset, null_checker);
}
break;
}
@@ -2026,15 +2048,18 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimShort:
case Primitive::kPrimChar: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
- Register value = locations->InAt(2).AsRegister<Register>();
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ StoreToOffset(kStoreHalfword, value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2;
} else {
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_2);
- __ Addu(TMP, obj, TMP);
- __ StoreToOffset(kStoreHalfword, value, TMP, data_offset);
+ __ Sll(base_reg, index.AsRegister<Register>(), TIMES_2);
+ __ Addu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreHalfword, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ Register value = value_location.AsRegister<Register>();
+ __ StoreToOffset(kStoreHalfword, value, base_reg, data_offset, null_checker);
}
break;
}
@@ -2043,29 +2068,27 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimNot: {
if (!needs_runtime_call) {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Register value = locations->InAt(2).AsRegister<Register>();
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreToOffset(kStoreWord, value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- DCHECK(index.IsRegister()) << index;
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
- __ Addu(TMP, obj, TMP);
- __ StoreToOffset(kStoreWord, value, TMP, data_offset);
+ __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+ __ Addu(base_reg, obj, base_reg);
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- if (needs_write_barrier) {
- DCHECK_EQ(value_type, Primitive::kPrimNot);
- codegen_->MarkGCCard(obj, value);
+ if (value_location.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+ DCHECK(!needs_write_barrier);
+ } else {
+ Register value = value_location.AsRegister<Register>();
+ __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+ if (needs_write_barrier) {
+ DCHECK_EQ(value_type, Primitive::kPrimNot);
+ codegen_->MarkGCCard(obj, value);
+ }
}
} else {
DCHECK_EQ(value_type, Primitive::kPrimNot);
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
- instruction,
- instruction->GetDexPc(),
- nullptr,
- IsDirectEntrypoint(kQuickAputObject));
+ codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
}
break;
@@ -2073,47 +2096,54 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimLong: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
- Register value = locations->InAt(2).AsRegisterPairLow<Register>();
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ StoreToOffset(kStoreDoubleword, value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
} else {
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
- __ Addu(TMP, obj, TMP);
- __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset);
+ __ Sll(base_reg, index.AsRegister<Register>(), TIMES_8);
+ __ Addu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ Register value = value_location.AsRegisterPairLow<Register>();
+ __ StoreToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker);
}
break;
}
case Primitive::kPrimFloat: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
- FRegister value = locations->InAt(2).AsFpuRegister<FRegister>();
- DCHECK(locations->InAt(2).IsFpuRegister());
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreSToOffset(value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
- __ Addu(TMP, obj, TMP);
- __ StoreSToOffset(value, TMP, data_offset);
+ __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+ __ Addu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ FRegister value = value_location.AsFpuRegister<FRegister>();
+ __ StoreSToOffset(value, base_reg, data_offset, null_checker);
}
break;
}
case Primitive::kPrimDouble: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
- FRegister value = locations->InAt(2).AsFpuRegister<FRegister>();
- DCHECK(locations->InAt(2).IsFpuRegister());
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ StoreDToOffset(value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
} else {
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
- __ Addu(TMP, obj, TMP);
- __ StoreDToOffset(value, TMP, data_offset);
+ __ Sll(base_reg, index.AsRegister<Register>(), TIMES_8);
+ __ Addu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ FRegister value = value_location.AsFpuRegister<FRegister>();
+ __ StoreDToOffset(value, base_reg, data_offset, null_checker);
}
break;
}
@@ -2122,23 +2152,16 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
-
- // Ints and objects are handled in the switch.
- if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
}
void LocationsBuilderMIPS::VisitBoundsCheck(HBoundsCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -2216,6 +2239,11 @@ void LocationsBuilderMIPS::VisitCompare(HCompare* compare) {
case Primitive::kPrimShort:
case Primitive::kPrimChar:
case Primitive::kPrimInt:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
@@ -2404,13 +2432,8 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- // TODO: don't use branches.
- GenerateFpCompareAndBranch(instruction->GetCondition(),
- instruction->IsGtBias(),
- type,
- locations,
- &true_label);
- break;
+ GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations);
+ return;
}
// Convert the branches into the result.
@@ -2636,11 +2659,7 @@ void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) {
GenerateDivRemIntegral(instruction);
break;
case Primitive::kPrimLong: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv),
- instruction,
- instruction->GetDexPc(),
- nullptr,
- IsDirectEntrypoint(kQuickLdiv));
+ codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
break;
}
@@ -2662,14 +2681,8 @@ void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) {
}
void LocationsBuilderMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void InstructionCodeGeneratorMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) {
@@ -2799,19 +2812,36 @@ void InstructionCodeGeneratorMIPS::GenerateIntCompare(IfCondition cond,
switch (cond) {
case kCondEQ:
case kCondNE:
- if (use_imm && IsUint<16>(rhs_imm)) {
- __ Xori(dst, lhs, rhs_imm);
- } else {
- if (use_imm) {
- rhs_reg = TMP;
- __ LoadConst32(rhs_reg, rhs_imm);
+ if (use_imm && IsInt<16>(-rhs_imm)) {
+ if (rhs_imm == 0) {
+ if (cond == kCondEQ) {
+ __ Sltiu(dst, lhs, 1);
+ } else {
+ __ Sltu(dst, ZERO, lhs);
+ }
+ } else {
+ __ Addiu(dst, lhs, -rhs_imm);
+ if (cond == kCondEQ) {
+ __ Sltiu(dst, dst, 1);
+ } else {
+ __ Sltu(dst, ZERO, dst);
+ }
}
- __ Xor(dst, lhs, rhs_reg);
- }
- if (cond == kCondEQ) {
- __ Sltiu(dst, dst, 1);
} else {
- __ Sltu(dst, ZERO, dst);
+ if (use_imm && IsUint<16>(rhs_imm)) {
+ __ Xori(dst, lhs, rhs_imm);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst32(rhs_reg, rhs_imm);
+ }
+ __ Xor(dst, lhs, rhs_reg);
+ }
+ if (cond == kCondEQ) {
+ __ Sltiu(dst, dst, 1);
+ } else {
+ __ Sltu(dst, ZERO, dst);
+ }
}
break;
@@ -2911,13 +2941,111 @@ void InstructionCodeGeneratorMIPS::GenerateIntCompare(IfCondition cond,
}
}
+bool InstructionCodeGeneratorMIPS::MaterializeIntCompare(IfCondition cond,
+ LocationSummary* input_locations,
+ Register dst) {
+ Register lhs = input_locations->InAt(0).AsRegister<Register>();
+ Location rhs_location = input_locations->InAt(1);
+ Register rhs_reg = ZERO;
+ int64_t rhs_imm = 0;
+ bool use_imm = rhs_location.IsConstant();
+ if (use_imm) {
+ rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+ } else {
+ rhs_reg = rhs_location.AsRegister<Register>();
+ }
+
+ switch (cond) {
+ case kCondEQ:
+ case kCondNE:
+ if (use_imm && IsInt<16>(-rhs_imm)) {
+ __ Addiu(dst, lhs, -rhs_imm);
+ } else if (use_imm && IsUint<16>(rhs_imm)) {
+ __ Xori(dst, lhs, rhs_imm);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst32(rhs_reg, rhs_imm);
+ }
+ __ Xor(dst, lhs, rhs_reg);
+ }
+ return (cond == kCondEQ);
+
+ case kCondLT:
+ case kCondGE:
+ if (use_imm && IsInt<16>(rhs_imm)) {
+ __ Slti(dst, lhs, rhs_imm);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst32(rhs_reg, rhs_imm);
+ }
+ __ Slt(dst, lhs, rhs_reg);
+ }
+ return (cond == kCondGE);
+
+ case kCondLE:
+ case kCondGT:
+ if (use_imm && IsInt<16>(rhs_imm + 1)) {
+ // Simulate lhs <= rhs via lhs < rhs + 1.
+ __ Slti(dst, lhs, rhs_imm + 1);
+ return (cond == kCondGT);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst32(rhs_reg, rhs_imm);
+ }
+ __ Slt(dst, rhs_reg, lhs);
+ return (cond == kCondLE);
+ }
+
+ case kCondB:
+ case kCondAE:
+ if (use_imm && IsInt<16>(rhs_imm)) {
+ // Sltiu sign-extends its 16-bit immediate operand before
+ // the comparison and thus lets us compare directly with
+ // unsigned values in the ranges [0, 0x7fff] and
+ // [0xffff8000, 0xffffffff].
+ __ Sltiu(dst, lhs, rhs_imm);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst32(rhs_reg, rhs_imm);
+ }
+ __ Sltu(dst, lhs, rhs_reg);
+ }
+ return (cond == kCondAE);
+
+ case kCondBE:
+ case kCondA:
+ if (use_imm && (rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) {
+ // Simulate lhs <= rhs via lhs < rhs + 1.
+ // Note that this only works if rhs + 1 does not overflow
+ // to 0, hence the check above.
+ // Sltiu sign-extends its 16-bit immediate operand before
+ // the comparison and thus lets us compare directly with
+ // unsigned values in the ranges [0, 0x7fff] and
+ // [0xffff8000, 0xffffffff].
+ __ Sltiu(dst, lhs, rhs_imm + 1);
+ return (cond == kCondA);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst32(rhs_reg, rhs_imm);
+ }
+ __ Sltu(dst, rhs_reg, lhs);
+ return (cond == kCondBE);
+ }
+ }
+}
+
void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond,
LocationSummary* locations,
MipsLabel* label) {
Register lhs = locations->InAt(0).AsRegister<Register>();
Location rhs_location = locations->InAt(1);
Register rhs_reg = ZERO;
- int32_t rhs_imm = 0;
+ int64_t rhs_imm = 0;
bool use_imm = rhs_location.IsConstant();
if (use_imm) {
rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
@@ -2954,42 +3082,136 @@ void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond,
break;
}
} else {
- if (use_imm) {
- // TODO: more efficient comparison with 16-bit constants without loading them into TMP.
- rhs_reg = TMP;
- __ LoadConst32(rhs_reg, rhs_imm);
- }
- switch (cond) {
- case kCondEQ:
- __ Beq(lhs, rhs_reg, label);
- break;
- case kCondNE:
- __ Bne(lhs, rhs_reg, label);
- break;
- case kCondLT:
- __ Blt(lhs, rhs_reg, label);
- break;
- case kCondGE:
- __ Bge(lhs, rhs_reg, label);
- break;
- case kCondLE:
- __ Bge(rhs_reg, lhs, label);
- break;
- case kCondGT:
- __ Blt(rhs_reg, lhs, label);
- break;
- case kCondB:
- __ Bltu(lhs, rhs_reg, label);
- break;
- case kCondAE:
- __ Bgeu(lhs, rhs_reg, label);
- break;
- case kCondBE:
- __ Bgeu(rhs_reg, lhs, label);
- break;
- case kCondA:
- __ Bltu(rhs_reg, lhs, label);
- break;
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ if (isR6 || !use_imm) {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst32(rhs_reg, rhs_imm);
+ }
+ switch (cond) {
+ case kCondEQ:
+ __ Beq(lhs, rhs_reg, label);
+ break;
+ case kCondNE:
+ __ Bne(lhs, rhs_reg, label);
+ break;
+ case kCondLT:
+ __ Blt(lhs, rhs_reg, label);
+ break;
+ case kCondGE:
+ __ Bge(lhs, rhs_reg, label);
+ break;
+ case kCondLE:
+ __ Bge(rhs_reg, lhs, label);
+ break;
+ case kCondGT:
+ __ Blt(rhs_reg, lhs, label);
+ break;
+ case kCondB:
+ __ Bltu(lhs, rhs_reg, label);
+ break;
+ case kCondAE:
+ __ Bgeu(lhs, rhs_reg, label);
+ break;
+ case kCondBE:
+ __ Bgeu(rhs_reg, lhs, label);
+ break;
+ case kCondA:
+ __ Bltu(rhs_reg, lhs, label);
+ break;
+ }
+ } else {
+ // Special cases for more efficient comparison with constants on R2.
+ switch (cond) {
+ case kCondEQ:
+ __ LoadConst32(TMP, rhs_imm);
+ __ Beq(lhs, TMP, label);
+ break;
+ case kCondNE:
+ __ LoadConst32(TMP, rhs_imm);
+ __ Bne(lhs, TMP, label);
+ break;
+ case kCondLT:
+ if (IsInt<16>(rhs_imm)) {
+ __ Slti(TMP, lhs, rhs_imm);
+ __ Bnez(TMP, label);
+ } else {
+ __ LoadConst32(TMP, rhs_imm);
+ __ Blt(lhs, TMP, label);
+ }
+ break;
+ case kCondGE:
+ if (IsInt<16>(rhs_imm)) {
+ __ Slti(TMP, lhs, rhs_imm);
+ __ Beqz(TMP, label);
+ } else {
+ __ LoadConst32(TMP, rhs_imm);
+ __ Bge(lhs, TMP, label);
+ }
+ break;
+ case kCondLE:
+ if (IsInt<16>(rhs_imm + 1)) {
+ // Simulate lhs <= rhs via lhs < rhs + 1.
+ __ Slti(TMP, lhs, rhs_imm + 1);
+ __ Bnez(TMP, label);
+ } else {
+ __ LoadConst32(TMP, rhs_imm);
+ __ Bge(TMP, lhs, label);
+ }
+ break;
+ case kCondGT:
+ if (IsInt<16>(rhs_imm + 1)) {
+ // Simulate lhs > rhs via !(lhs < rhs + 1).
+ __ Slti(TMP, lhs, rhs_imm + 1);
+ __ Beqz(TMP, label);
+ } else {
+ __ LoadConst32(TMP, rhs_imm);
+ __ Blt(TMP, lhs, label);
+ }
+ break;
+ case kCondB:
+ if (IsInt<16>(rhs_imm)) {
+ __ Sltiu(TMP, lhs, rhs_imm);
+ __ Bnez(TMP, label);
+ } else {
+ __ LoadConst32(TMP, rhs_imm);
+ __ Bltu(lhs, TMP, label);
+ }
+ break;
+ case kCondAE:
+ if (IsInt<16>(rhs_imm)) {
+ __ Sltiu(TMP, lhs, rhs_imm);
+ __ Beqz(TMP, label);
+ } else {
+ __ LoadConst32(TMP, rhs_imm);
+ __ Bgeu(lhs, TMP, label);
+ }
+ break;
+ case kCondBE:
+ if ((rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) {
+ // Simulate lhs <= rhs via lhs < rhs + 1.
+ // Note that this only works if rhs + 1 does not overflow
+ // to 0, hence the check above.
+ __ Sltiu(TMP, lhs, rhs_imm + 1);
+ __ Bnez(TMP, label);
+ } else {
+ __ LoadConst32(TMP, rhs_imm);
+ __ Bgeu(TMP, lhs, label);
+ }
+ break;
+ case kCondA:
+ if ((rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) {
+ // Simulate lhs > rhs via !(lhs < rhs + 1).
+ // Note that this only works if rhs + 1 does not overflow
+ // to 0, hence the check above.
+ __ Sltiu(TMP, lhs, rhs_imm + 1);
+ __ Beqz(TMP, label);
+ } else {
+ __ LoadConst32(TMP, rhs_imm);
+ __ Bltu(TMP, lhs, label);
+ }
+ break;
+ }
}
}
}
@@ -3207,6 +3429,414 @@ void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond
}
}
+void InstructionCodeGeneratorMIPS::GenerateFpCompare(IfCondition cond,
+ bool gt_bias,
+ Primitive::Type type,
+ LocationSummary* locations) {
+ Register dst = locations->Out().AsRegister<Register>();
+ FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ if (type == Primitive::kPrimFloat) {
+ if (isR6) {
+ switch (cond) {
+ case kCondEQ:
+ __ CmpEqS(FTMP, lhs, rhs);
+ __ Mfc1(dst, FTMP);
+ __ Andi(dst, dst, 1);
+ break;
+ case kCondNE:
+ __ CmpEqS(FTMP, lhs, rhs);
+ __ Mfc1(dst, FTMP);
+ __ Addiu(dst, dst, 1);
+ break;
+ case kCondLT:
+ if (gt_bias) {
+ __ CmpLtS(FTMP, lhs, rhs);
+ } else {
+ __ CmpUltS(FTMP, lhs, rhs);
+ }
+ __ Mfc1(dst, FTMP);
+ __ Andi(dst, dst, 1);
+ break;
+ case kCondLE:
+ if (gt_bias) {
+ __ CmpLeS(FTMP, lhs, rhs);
+ } else {
+ __ CmpUleS(FTMP, lhs, rhs);
+ }
+ __ Mfc1(dst, FTMP);
+ __ Andi(dst, dst, 1);
+ break;
+ case kCondGT:
+ if (gt_bias) {
+ __ CmpUltS(FTMP, rhs, lhs);
+ } else {
+ __ CmpLtS(FTMP, rhs, lhs);
+ }
+ __ Mfc1(dst, FTMP);
+ __ Andi(dst, dst, 1);
+ break;
+ case kCondGE:
+ if (gt_bias) {
+ __ CmpUleS(FTMP, rhs, lhs);
+ } else {
+ __ CmpLeS(FTMP, rhs, lhs);
+ }
+ __ Mfc1(dst, FTMP);
+ __ Andi(dst, dst, 1);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition " << cond;
+ UNREACHABLE();
+ }
+ } else {
+ switch (cond) {
+ case kCondEQ:
+ __ CeqS(0, lhs, rhs);
+ __ LoadConst32(dst, 1);
+ __ Movf(dst, ZERO, 0);
+ break;
+ case kCondNE:
+ __ CeqS(0, lhs, rhs);
+ __ LoadConst32(dst, 1);
+ __ Movt(dst, ZERO, 0);
+ break;
+ case kCondLT:
+ if (gt_bias) {
+ __ ColtS(0, lhs, rhs);
+ } else {
+ __ CultS(0, lhs, rhs);
+ }
+ __ LoadConst32(dst, 1);
+ __ Movf(dst, ZERO, 0);
+ break;
+ case kCondLE:
+ if (gt_bias) {
+ __ ColeS(0, lhs, rhs);
+ } else {
+ __ CuleS(0, lhs, rhs);
+ }
+ __ LoadConst32(dst, 1);
+ __ Movf(dst, ZERO, 0);
+ break;
+ case kCondGT:
+ if (gt_bias) {
+ __ CultS(0, rhs, lhs);
+ } else {
+ __ ColtS(0, rhs, lhs);
+ }
+ __ LoadConst32(dst, 1);
+ __ Movf(dst, ZERO, 0);
+ break;
+ case kCondGE:
+ if (gt_bias) {
+ __ CuleS(0, rhs, lhs);
+ } else {
+ __ ColeS(0, rhs, lhs);
+ }
+ __ LoadConst32(dst, 1);
+ __ Movf(dst, ZERO, 0);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition " << cond;
+ UNREACHABLE();
+ }
+ }
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ if (isR6) {
+ switch (cond) {
+ case kCondEQ:
+ __ CmpEqD(FTMP, lhs, rhs);
+ __ Mfc1(dst, FTMP);
+ __ Andi(dst, dst, 1);
+ break;
+ case kCondNE:
+ __ CmpEqD(FTMP, lhs, rhs);
+ __ Mfc1(dst, FTMP);
+ __ Addiu(dst, dst, 1);
+ break;
+ case kCondLT:
+ if (gt_bias) {
+ __ CmpLtD(FTMP, lhs, rhs);
+ } else {
+ __ CmpUltD(FTMP, lhs, rhs);
+ }
+ __ Mfc1(dst, FTMP);
+ __ Andi(dst, dst, 1);
+ break;
+ case kCondLE:
+ if (gt_bias) {
+ __ CmpLeD(FTMP, lhs, rhs);
+ } else {
+ __ CmpUleD(FTMP, lhs, rhs);
+ }
+ __ Mfc1(dst, FTMP);
+ __ Andi(dst, dst, 1);
+ break;
+ case kCondGT:
+ if (gt_bias) {
+ __ CmpUltD(FTMP, rhs, lhs);
+ } else {
+ __ CmpLtD(FTMP, rhs, lhs);
+ }
+ __ Mfc1(dst, FTMP);
+ __ Andi(dst, dst, 1);
+ break;
+ case kCondGE:
+ if (gt_bias) {
+ __ CmpUleD(FTMP, rhs, lhs);
+ } else {
+ __ CmpLeD(FTMP, rhs, lhs);
+ }
+ __ Mfc1(dst, FTMP);
+ __ Andi(dst, dst, 1);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition " << cond;
+ UNREACHABLE();
+ }
+ } else {
+ switch (cond) {
+ case kCondEQ:
+ __ CeqD(0, lhs, rhs);
+ __ LoadConst32(dst, 1);
+ __ Movf(dst, ZERO, 0);
+ break;
+ case kCondNE:
+ __ CeqD(0, lhs, rhs);
+ __ LoadConst32(dst, 1);
+ __ Movt(dst, ZERO, 0);
+ break;
+ case kCondLT:
+ if (gt_bias) {
+ __ ColtD(0, lhs, rhs);
+ } else {
+ __ CultD(0, lhs, rhs);
+ }
+ __ LoadConst32(dst, 1);
+ __ Movf(dst, ZERO, 0);
+ break;
+ case kCondLE:
+ if (gt_bias) {
+ __ ColeD(0, lhs, rhs);
+ } else {
+ __ CuleD(0, lhs, rhs);
+ }
+ __ LoadConst32(dst, 1);
+ __ Movf(dst, ZERO, 0);
+ break;
+ case kCondGT:
+ if (gt_bias) {
+ __ CultD(0, rhs, lhs);
+ } else {
+ __ ColtD(0, rhs, lhs);
+ }
+ __ LoadConst32(dst, 1);
+ __ Movf(dst, ZERO, 0);
+ break;
+ case kCondGE:
+ if (gt_bias) {
+ __ CuleD(0, rhs, lhs);
+ } else {
+ __ ColeD(0, rhs, lhs);
+ }
+ __ LoadConst32(dst, 1);
+ __ Movf(dst, ZERO, 0);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition " << cond;
+ UNREACHABLE();
+ }
+ }
+ }
+}
+
+bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR2(IfCondition cond,
+ bool gt_bias,
+ Primitive::Type type,
+ LocationSummary* input_locations,
+ int cc) {
+ FRegister lhs = input_locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister rhs = input_locations->InAt(1).AsFpuRegister<FRegister>();
+ CHECK(!codegen_->GetInstructionSetFeatures().IsR6());
+ if (type == Primitive::kPrimFloat) {
+ switch (cond) {
+ case kCondEQ:
+ __ CeqS(cc, lhs, rhs);
+ return false;
+ case kCondNE:
+ __ CeqS(cc, lhs, rhs);
+ return true;
+ case kCondLT:
+ if (gt_bias) {
+ __ ColtS(cc, lhs, rhs);
+ } else {
+ __ CultS(cc, lhs, rhs);
+ }
+ return false;
+ case kCondLE:
+ if (gt_bias) {
+ __ ColeS(cc, lhs, rhs);
+ } else {
+ __ CuleS(cc, lhs, rhs);
+ }
+ return false;
+ case kCondGT:
+ if (gt_bias) {
+ __ CultS(cc, rhs, lhs);
+ } else {
+ __ ColtS(cc, rhs, lhs);
+ }
+ return false;
+ case kCondGE:
+ if (gt_bias) {
+ __ CuleS(cc, rhs, lhs);
+ } else {
+ __ ColeS(cc, rhs, lhs);
+ }
+ return false;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition";
+ UNREACHABLE();
+ }
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ switch (cond) {
+ case kCondEQ:
+ __ CeqD(cc, lhs, rhs);
+ return false;
+ case kCondNE:
+ __ CeqD(cc, lhs, rhs);
+ return true;
+ case kCondLT:
+ if (gt_bias) {
+ __ ColtD(cc, lhs, rhs);
+ } else {
+ __ CultD(cc, lhs, rhs);
+ }
+ return false;
+ case kCondLE:
+ if (gt_bias) {
+ __ ColeD(cc, lhs, rhs);
+ } else {
+ __ CuleD(cc, lhs, rhs);
+ }
+ return false;
+ case kCondGT:
+ if (gt_bias) {
+ __ CultD(cc, rhs, lhs);
+ } else {
+ __ ColtD(cc, rhs, lhs);
+ }
+ return false;
+ case kCondGE:
+ if (gt_bias) {
+ __ CuleD(cc, rhs, lhs);
+ } else {
+ __ ColeD(cc, rhs, lhs);
+ }
+ return false;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition";
+ UNREACHABLE();
+ }
+ }
+}
+
+bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR6(IfCondition cond,
+ bool gt_bias,
+ Primitive::Type type,
+ LocationSummary* input_locations,
+ FRegister dst) {
+ FRegister lhs = input_locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister rhs = input_locations->InAt(1).AsFpuRegister<FRegister>();
+ CHECK(codegen_->GetInstructionSetFeatures().IsR6());
+ if (type == Primitive::kPrimFloat) {
+ switch (cond) {
+ case kCondEQ:
+ __ CmpEqS(dst, lhs, rhs);
+ return false;
+ case kCondNE:
+ __ CmpEqS(dst, lhs, rhs);
+ return true;
+ case kCondLT:
+ if (gt_bias) {
+ __ CmpLtS(dst, lhs, rhs);
+ } else {
+ __ CmpUltS(dst, lhs, rhs);
+ }
+ return false;
+ case kCondLE:
+ if (gt_bias) {
+ __ CmpLeS(dst, lhs, rhs);
+ } else {
+ __ CmpUleS(dst, lhs, rhs);
+ }
+ return false;
+ case kCondGT:
+ if (gt_bias) {
+ __ CmpUltS(dst, rhs, lhs);
+ } else {
+ __ CmpLtS(dst, rhs, lhs);
+ }
+ return false;
+ case kCondGE:
+ if (gt_bias) {
+ __ CmpUleS(dst, rhs, lhs);
+ } else {
+ __ CmpLeS(dst, rhs, lhs);
+ }
+ return false;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition";
+ UNREACHABLE();
+ }
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ switch (cond) {
+ case kCondEQ:
+ __ CmpEqD(dst, lhs, rhs);
+ return false;
+ case kCondNE:
+ __ CmpEqD(dst, lhs, rhs);
+ return true;
+ case kCondLT:
+ if (gt_bias) {
+ __ CmpLtD(dst, lhs, rhs);
+ } else {
+ __ CmpUltD(dst, lhs, rhs);
+ }
+ return false;
+ case kCondLE:
+ if (gt_bias) {
+ __ CmpLeD(dst, lhs, rhs);
+ } else {
+ __ CmpUleD(dst, lhs, rhs);
+ }
+ return false;
+ case kCondGT:
+ if (gt_bias) {
+ __ CmpUltD(dst, rhs, lhs);
+ } else {
+ __ CmpLtD(dst, rhs, lhs);
+ }
+ return false;
+ case kCondGE:
+ if (gt_bias) {
+ __ CmpUleD(dst, rhs, lhs);
+ } else {
+ __ CmpLeD(dst, rhs, lhs);
+ }
+ return false;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition";
+ UNREACHABLE();
+ }
+ }
+}
+
void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond,
bool gt_bias,
Primitive::Type type,
@@ -3260,6 +3890,7 @@ void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond,
break;
default:
LOG(FATAL) << "Unexpected non-floating-point condition";
+ UNREACHABLE();
}
} else {
switch (cond) {
@@ -3305,6 +3936,7 @@ void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond,
break;
default:
LOG(FATAL) << "Unexpected non-floating-point condition";
+ UNREACHABLE();
}
}
} else {
@@ -3353,6 +3985,7 @@ void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond,
break;
default:
LOG(FATAL) << "Unexpected non-floating-point condition";
+ UNREACHABLE();
}
} else {
switch (cond) {
@@ -3398,6 +4031,7 @@ void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond,
break;
default:
LOG(FATAL) << "Unexpected non-floating-point condition";
+ UNREACHABLE();
}
}
}
@@ -3499,6 +4133,7 @@ void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) {
void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -3513,30 +4148,562 @@ void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
/* false_target */ nullptr);
}
-void LocationsBuilderMIPS::VisitSelect(HSelect* select) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
- if (Primitive::IsFloatingPointType(select->GetType())) {
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+// This function returns true if a conditional move can be generated for HSelect.
+// Otherwise it returns false and HSelect must be implemented in terms of conditonal
+// branches and regular moves.
+//
+// If `locations_to_set` isn't nullptr, its inputs and outputs are set for HSelect.
+//
+// While determining feasibility of a conditional move and setting inputs/outputs
+// are two distinct tasks, this function does both because they share quite a bit
+// of common logic.
+static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* locations_to_set) {
+ bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition());
+ HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+ HCondition* condition = cond->AsCondition();
+
+ Primitive::Type cond_type = materialized ? Primitive::kPrimInt : condition->InputAt(0)->GetType();
+ Primitive::Type dst_type = select->GetType();
+
+ HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
+ HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
+ bool is_true_value_zero_constant =
+ (cst_true_value != nullptr && cst_true_value->IsZeroBitPattern());
+ bool is_false_value_zero_constant =
+ (cst_false_value != nullptr && cst_false_value->IsZeroBitPattern());
+
+ bool can_move_conditionally = false;
+ bool use_const_for_false_in = false;
+ bool use_const_for_true_in = false;
+
+ if (!cond->IsConstant()) {
+ switch (cond_type) {
+ default:
+ switch (dst_type) {
+ default:
+ // Moving int on int condition.
+ if (is_r6) {
+ if (is_true_value_zero_constant) {
+ // seleqz out_reg, false_reg, cond_reg
+ can_move_conditionally = true;
+ use_const_for_true_in = true;
+ } else if (is_false_value_zero_constant) {
+ // selnez out_reg, true_reg, cond_reg
+ can_move_conditionally = true;
+ use_const_for_false_in = true;
+ } else if (materialized) {
+ // Not materializing unmaterialized int conditions
+ // to keep the instruction count low.
+ // selnez AT, true_reg, cond_reg
+ // seleqz TMP, false_reg, cond_reg
+ // or out_reg, AT, TMP
+ can_move_conditionally = true;
+ }
+ } else {
+ // movn out_reg, true_reg/ZERO, cond_reg
+ can_move_conditionally = true;
+ use_const_for_true_in = is_true_value_zero_constant;
+ }
+ break;
+ case Primitive::kPrimLong:
+ // Moving long on int condition.
+ if (is_r6) {
+ if (is_true_value_zero_constant) {
+ // seleqz out_reg_lo, false_reg_lo, cond_reg
+ // seleqz out_reg_hi, false_reg_hi, cond_reg
+ can_move_conditionally = true;
+ use_const_for_true_in = true;
+ } else if (is_false_value_zero_constant) {
+ // selnez out_reg_lo, true_reg_lo, cond_reg
+ // selnez out_reg_hi, true_reg_hi, cond_reg
+ can_move_conditionally = true;
+ use_const_for_false_in = true;
+ }
+ // Other long conditional moves would generate 6+ instructions,
+ // which is too many.
+ } else {
+ // movn out_reg_lo, true_reg_lo/ZERO, cond_reg
+ // movn out_reg_hi, true_reg_hi/ZERO, cond_reg
+ can_move_conditionally = true;
+ use_const_for_true_in = is_true_value_zero_constant;
+ }
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ // Moving float/double on int condition.
+ if (is_r6) {
+ if (materialized) {
+ // Not materializing unmaterialized int conditions
+ // to keep the instruction count low.
+ can_move_conditionally = true;
+ if (is_true_value_zero_constant) {
+ // sltu TMP, ZERO, cond_reg
+ // mtc1 TMP, temp_cond_reg
+ // seleqz.fmt out_reg, false_reg, temp_cond_reg
+ use_const_for_true_in = true;
+ } else if (is_false_value_zero_constant) {
+ // sltu TMP, ZERO, cond_reg
+ // mtc1 TMP, temp_cond_reg
+ // selnez.fmt out_reg, true_reg, temp_cond_reg
+ use_const_for_false_in = true;
+ } else {
+ // sltu TMP, ZERO, cond_reg
+ // mtc1 TMP, temp_cond_reg
+ // sel.fmt temp_cond_reg, false_reg, true_reg
+ // mov.fmt out_reg, temp_cond_reg
+ }
+ }
+ } else {
+ // movn.fmt out_reg, true_reg, cond_reg
+ can_move_conditionally = true;
+ }
+ break;
+ }
+ break;
+ case Primitive::kPrimLong:
+ // We don't materialize long comparison now
+ // and use conditional branches instead.
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ switch (dst_type) {
+ default:
+ // Moving int on float/double condition.
+ if (is_r6) {
+ if (is_true_value_zero_constant) {
+ // mfc1 TMP, temp_cond_reg
+ // seleqz out_reg, false_reg, TMP
+ can_move_conditionally = true;
+ use_const_for_true_in = true;
+ } else if (is_false_value_zero_constant) {
+ // mfc1 TMP, temp_cond_reg
+ // selnez out_reg, true_reg, TMP
+ can_move_conditionally = true;
+ use_const_for_false_in = true;
+ } else {
+ // mfc1 TMP, temp_cond_reg
+ // selnez AT, true_reg, TMP
+ // seleqz TMP, false_reg, TMP
+ // or out_reg, AT, TMP
+ can_move_conditionally = true;
+ }
+ } else {
+ // movt out_reg, true_reg/ZERO, cc
+ can_move_conditionally = true;
+ use_const_for_true_in = is_true_value_zero_constant;
+ }
+ break;
+ case Primitive::kPrimLong:
+ // Moving long on float/double condition.
+ if (is_r6) {
+ if (is_true_value_zero_constant) {
+ // mfc1 TMP, temp_cond_reg
+ // seleqz out_reg_lo, false_reg_lo, TMP
+ // seleqz out_reg_hi, false_reg_hi, TMP
+ can_move_conditionally = true;
+ use_const_for_true_in = true;
+ } else if (is_false_value_zero_constant) {
+ // mfc1 TMP, temp_cond_reg
+ // selnez out_reg_lo, true_reg_lo, TMP
+ // selnez out_reg_hi, true_reg_hi, TMP
+ can_move_conditionally = true;
+ use_const_for_false_in = true;
+ }
+ // Other long conditional moves would generate 6+ instructions,
+ // which is too many.
+ } else {
+ // movt out_reg_lo, true_reg_lo/ZERO, cc
+ // movt out_reg_hi, true_reg_hi/ZERO, cc
+ can_move_conditionally = true;
+ use_const_for_true_in = is_true_value_zero_constant;
+ }
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ // Moving float/double on float/double condition.
+ if (is_r6) {
+ can_move_conditionally = true;
+ if (is_true_value_zero_constant) {
+ // seleqz.fmt out_reg, false_reg, temp_cond_reg
+ use_const_for_true_in = true;
+ } else if (is_false_value_zero_constant) {
+ // selnez.fmt out_reg, true_reg, temp_cond_reg
+ use_const_for_false_in = true;
+ } else {
+ // sel.fmt temp_cond_reg, false_reg, true_reg
+ // mov.fmt out_reg, temp_cond_reg
+ }
+ } else {
+ // movt.fmt out_reg, true_reg, cc
+ can_move_conditionally = true;
+ }
+ break;
+ }
+ break;
+ }
+ }
+
+ if (can_move_conditionally) {
+ DCHECK(!use_const_for_false_in || !use_const_for_true_in);
} else {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ DCHECK(!use_const_for_false_in);
+ DCHECK(!use_const_for_true_in);
}
- if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
- locations->SetInAt(2, Location::RequiresRegister());
+
+ if (locations_to_set != nullptr) {
+ if (use_const_for_false_in) {
+ locations_to_set->SetInAt(0, Location::ConstantLocation(cst_false_value));
+ } else {
+ locations_to_set->SetInAt(0,
+ Primitive::IsFloatingPointType(dst_type)
+ ? Location::RequiresFpuRegister()
+ : Location::RequiresRegister());
+ }
+ if (use_const_for_true_in) {
+ locations_to_set->SetInAt(1, Location::ConstantLocation(cst_true_value));
+ } else {
+ locations_to_set->SetInAt(1,
+ Primitive::IsFloatingPointType(dst_type)
+ ? Location::RequiresFpuRegister()
+ : Location::RequiresRegister());
+ }
+ if (materialized) {
+ locations_to_set->SetInAt(2, Location::RequiresRegister());
+ }
+ // On R6 we don't require the output to be the same as the
+ // first input for conditional moves unlike on R2.
+ bool is_out_same_as_first_in = !can_move_conditionally || !is_r6;
+ if (is_out_same_as_first_in) {
+ locations_to_set->SetOut(Location::SameAsFirstInput());
+ } else {
+ locations_to_set->SetOut(Primitive::IsFloatingPointType(dst_type)
+ ? Location::RequiresFpuRegister()
+ : Location::RequiresRegister());
+ }
}
- locations->SetOut(Location::SameAsFirstInput());
+
+ return can_move_conditionally;
}
-void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) {
+void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) {
+ LocationSummary* locations = select->GetLocations();
+ Location dst = locations->Out();
+ Location src = locations->InAt(1);
+ Register src_reg = ZERO;
+ Register src_reg_high = ZERO;
+ HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+ Register cond_reg = TMP;
+ int cond_cc = 0;
+ Primitive::Type cond_type = Primitive::kPrimInt;
+ bool cond_inverted = false;
+ Primitive::Type dst_type = select->GetType();
+
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
+ cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>();
+ } else {
+ HCondition* condition = cond->AsCondition();
+ LocationSummary* cond_locations = cond->GetLocations();
+ IfCondition if_cond = condition->GetCondition();
+ cond_type = condition->InputAt(0)->GetType();
+ switch (cond_type) {
+ default:
+ DCHECK_NE(cond_type, Primitive::kPrimLong);
+ cond_inverted = MaterializeIntCompare(if_cond, cond_locations, cond_reg);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ cond_inverted = MaterializeFpCompareR2(if_cond,
+ condition->IsGtBias(),
+ cond_type,
+ cond_locations,
+ cond_cc);
+ break;
+ }
+ }
+
+ DCHECK(dst.Equals(locations->InAt(0)));
+ if (src.IsRegister()) {
+ src_reg = src.AsRegister<Register>();
+ } else if (src.IsRegisterPair()) {
+ src_reg = src.AsRegisterPairLow<Register>();
+ src_reg_high = src.AsRegisterPairHigh<Register>();
+ } else if (src.IsConstant()) {
+ DCHECK(src.GetConstant()->IsZeroBitPattern());
+ }
+
+ switch (cond_type) {
+ default:
+ switch (dst_type) {
+ default:
+ if (cond_inverted) {
+ __ Movz(dst.AsRegister<Register>(), src_reg, cond_reg);
+ } else {
+ __ Movn(dst.AsRegister<Register>(), src_reg, cond_reg);
+ }
+ break;
+ case Primitive::kPrimLong:
+ if (cond_inverted) {
+ __ Movz(dst.AsRegisterPairLow<Register>(), src_reg, cond_reg);
+ __ Movz(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_reg);
+ } else {
+ __ Movn(dst.AsRegisterPairLow<Register>(), src_reg, cond_reg);
+ __ Movn(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_reg);
+ }
+ break;
+ case Primitive::kPrimFloat:
+ if (cond_inverted) {
+ __ MovzS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg);
+ } else {
+ __ MovnS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg);
+ }
+ break;
+ case Primitive::kPrimDouble:
+ if (cond_inverted) {
+ __ MovzD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg);
+ } else {
+ __ MovnD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg);
+ }
+ break;
+ }
+ break;
+ case Primitive::kPrimLong:
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ switch (dst_type) {
+ default:
+ if (cond_inverted) {
+ __ Movf(dst.AsRegister<Register>(), src_reg, cond_cc);
+ } else {
+ __ Movt(dst.AsRegister<Register>(), src_reg, cond_cc);
+ }
+ break;
+ case Primitive::kPrimLong:
+ if (cond_inverted) {
+ __ Movf(dst.AsRegisterPairLow<Register>(), src_reg, cond_cc);
+ __ Movf(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_cc);
+ } else {
+ __ Movt(dst.AsRegisterPairLow<Register>(), src_reg, cond_cc);
+ __ Movt(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_cc);
+ }
+ break;
+ case Primitive::kPrimFloat:
+ if (cond_inverted) {
+ __ MovfS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc);
+ } else {
+ __ MovtS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc);
+ }
+ break;
+ case Primitive::kPrimDouble:
+ if (cond_inverted) {
+ __ MovfD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc);
+ } else {
+ __ MovtD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc);
+ }
+ break;
+ }
+ break;
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) {
LocationSummary* locations = select->GetLocations();
- MipsLabel false_target;
- GenerateTestAndBranch(select,
- /* condition_input_index */ 2,
- /* true_target */ nullptr,
- &false_target);
- codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
- __ Bind(&false_target);
+ Location dst = locations->Out();
+ Location false_src = locations->InAt(0);
+ Location true_src = locations->InAt(1);
+ HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+ Register cond_reg = TMP;
+ FRegister fcond_reg = FTMP;
+ Primitive::Type cond_type = Primitive::kPrimInt;
+ bool cond_inverted = false;
+ Primitive::Type dst_type = select->GetType();
+
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
+ cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>();
+ } else {
+ HCondition* condition = cond->AsCondition();
+ LocationSummary* cond_locations = cond->GetLocations();
+ IfCondition if_cond = condition->GetCondition();
+ cond_type = condition->InputAt(0)->GetType();
+ switch (cond_type) {
+ default:
+ DCHECK_NE(cond_type, Primitive::kPrimLong);
+ cond_inverted = MaterializeIntCompare(if_cond, cond_locations, cond_reg);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ cond_inverted = MaterializeFpCompareR6(if_cond,
+ condition->IsGtBias(),
+ cond_type,
+ cond_locations,
+ fcond_reg);
+ break;
+ }
+ }
+
+ if (true_src.IsConstant()) {
+ DCHECK(true_src.GetConstant()->IsZeroBitPattern());
+ }
+ if (false_src.IsConstant()) {
+ DCHECK(false_src.GetConstant()->IsZeroBitPattern());
+ }
+
+ switch (dst_type) {
+ default:
+ if (Primitive::IsFloatingPointType(cond_type)) {
+ __ Mfc1(cond_reg, fcond_reg);
+ }
+ if (true_src.IsConstant()) {
+ if (cond_inverted) {
+ __ Selnez(dst.AsRegister<Register>(), false_src.AsRegister<Register>(), cond_reg);
+ } else {
+ __ Seleqz(dst.AsRegister<Register>(), false_src.AsRegister<Register>(), cond_reg);
+ }
+ } else if (false_src.IsConstant()) {
+ if (cond_inverted) {
+ __ Seleqz(dst.AsRegister<Register>(), true_src.AsRegister<Register>(), cond_reg);
+ } else {
+ __ Selnez(dst.AsRegister<Register>(), true_src.AsRegister<Register>(), cond_reg);
+ }
+ } else {
+ DCHECK_NE(cond_reg, AT);
+ if (cond_inverted) {
+ __ Seleqz(AT, true_src.AsRegister<Register>(), cond_reg);
+ __ Selnez(TMP, false_src.AsRegister<Register>(), cond_reg);
+ } else {
+ __ Selnez(AT, true_src.AsRegister<Register>(), cond_reg);
+ __ Seleqz(TMP, false_src.AsRegister<Register>(), cond_reg);
+ }
+ __ Or(dst.AsRegister<Register>(), AT, TMP);
+ }
+ break;
+ case Primitive::kPrimLong: {
+ if (Primitive::IsFloatingPointType(cond_type)) {
+ __ Mfc1(cond_reg, fcond_reg);
+ }
+ Register dst_lo = dst.AsRegisterPairLow<Register>();
+ Register dst_hi = dst.AsRegisterPairHigh<Register>();
+ if (true_src.IsConstant()) {
+ Register src_lo = false_src.AsRegisterPairLow<Register>();
+ Register src_hi = false_src.AsRegisterPairHigh<Register>();
+ if (cond_inverted) {
+ __ Selnez(dst_lo, src_lo, cond_reg);
+ __ Selnez(dst_hi, src_hi, cond_reg);
+ } else {
+ __ Seleqz(dst_lo, src_lo, cond_reg);
+ __ Seleqz(dst_hi, src_hi, cond_reg);
+ }
+ } else {
+ DCHECK(false_src.IsConstant());
+ Register src_lo = true_src.AsRegisterPairLow<Register>();
+ Register src_hi = true_src.AsRegisterPairHigh<Register>();
+ if (cond_inverted) {
+ __ Seleqz(dst_lo, src_lo, cond_reg);
+ __ Seleqz(dst_hi, src_hi, cond_reg);
+ } else {
+ __ Selnez(dst_lo, src_lo, cond_reg);
+ __ Selnez(dst_hi, src_hi, cond_reg);
+ }
+ }
+ break;
+ }
+ case Primitive::kPrimFloat: {
+ if (!Primitive::IsFloatingPointType(cond_type)) {
+ // sel*.fmt tests bit 0 of the condition register, account for that.
+ __ Sltu(TMP, ZERO, cond_reg);
+ __ Mtc1(TMP, fcond_reg);
+ }
+ FRegister dst_reg = dst.AsFpuRegister<FRegister>();
+ if (true_src.IsConstant()) {
+ FRegister src_reg = false_src.AsFpuRegister<FRegister>();
+ if (cond_inverted) {
+ __ SelnezS(dst_reg, src_reg, fcond_reg);
+ } else {
+ __ SeleqzS(dst_reg, src_reg, fcond_reg);
+ }
+ } else if (false_src.IsConstant()) {
+ FRegister src_reg = true_src.AsFpuRegister<FRegister>();
+ if (cond_inverted) {
+ __ SeleqzS(dst_reg, src_reg, fcond_reg);
+ } else {
+ __ SelnezS(dst_reg, src_reg, fcond_reg);
+ }
+ } else {
+ if (cond_inverted) {
+ __ SelS(fcond_reg,
+ true_src.AsFpuRegister<FRegister>(),
+ false_src.AsFpuRegister<FRegister>());
+ } else {
+ __ SelS(fcond_reg,
+ false_src.AsFpuRegister<FRegister>(),
+ true_src.AsFpuRegister<FRegister>());
+ }
+ __ MovS(dst_reg, fcond_reg);
+ }
+ break;
+ }
+ case Primitive::kPrimDouble: {
+ if (!Primitive::IsFloatingPointType(cond_type)) {
+ // sel*.fmt tests bit 0 of the condition register, account for that.
+ __ Sltu(TMP, ZERO, cond_reg);
+ __ Mtc1(TMP, fcond_reg);
+ }
+ FRegister dst_reg = dst.AsFpuRegister<FRegister>();
+ if (true_src.IsConstant()) {
+ FRegister src_reg = false_src.AsFpuRegister<FRegister>();
+ if (cond_inverted) {
+ __ SelnezD(dst_reg, src_reg, fcond_reg);
+ } else {
+ __ SeleqzD(dst_reg, src_reg, fcond_reg);
+ }
+ } else if (false_src.IsConstant()) {
+ FRegister src_reg = true_src.AsFpuRegister<FRegister>();
+ if (cond_inverted) {
+ __ SeleqzD(dst_reg, src_reg, fcond_reg);
+ } else {
+ __ SelnezD(dst_reg, src_reg, fcond_reg);
+ }
+ } else {
+ if (cond_inverted) {
+ __ SelD(fcond_reg,
+ true_src.AsFpuRegister<FRegister>(),
+ false_src.AsFpuRegister<FRegister>());
+ } else {
+ __ SelD(fcond_reg,
+ false_src.AsFpuRegister<FRegister>(),
+ true_src.AsFpuRegister<FRegister>());
+ }
+ __ MovD(dst_reg, fcond_reg);
+ }
+ break;
+ }
+ }
+}
+
+void LocationsBuilderMIPS::VisitSelect(HSelect* select) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+ CanMoveConditionally(select, codegen_->GetInstructionSetFeatures().IsR6(), locations);
+}
+
+void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) {
+ bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6();
+ if (CanMoveConditionally(select, is_r6, /* locations_to_set */ nullptr)) {
+ if (is_r6) {
+ GenConditionalMoveR6(select);
+ } else {
+ GenConditionalMoveR2(select);
+ }
+ } else {
+ LocationSummary* locations = select->GetLocations();
+ MipsLabel false_target;
+ GenerateTestAndBranch(select,
+ /* condition_input_index */ 2,
+ /* true_target */ nullptr,
+ &false_target);
+ codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+ __ Bind(&false_target);
+ }
}
void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -3591,6 +4758,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
LoadOperandType load_type = kLoadUnsignedByte;
bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ auto null_checker = GetImplicitNullChecker(instruction);
switch (type) {
case Primitive::kPrimBoolean:
@@ -3625,11 +4793,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
// Do implicit Null check
__ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Load),
- instruction,
- dex_pc,
- nullptr,
- IsDirectEntrypoint(kQuickA64Load));
+ codegen_->InvokeRuntime(kQuickA64Load, instruction, dex_pc);
CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>();
if (type == Primitive::kPrimDouble) {
// FP results are returned in core registers. Need to move them.
@@ -3656,34 +4820,20 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
if (type == Primitive::kPrimLong) {
DCHECK(locations->Out().IsRegisterPair());
dst = locations->Out().AsRegisterPairLow<Register>();
- Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
- if (obj == dst) {
- __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ LoadFromOffset(kLoadWord, dst, obj, offset);
- } else {
- __ LoadFromOffset(kLoadWord, dst, obj, offset);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
- }
} else {
DCHECK(locations->Out().IsRegister());
dst = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(load_type, dst, obj, offset);
}
+ __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
} else {
DCHECK(locations->Out().IsFpuRegister());
FRegister dst = locations->Out().AsFpuRegister<FRegister>();
if (type == Primitive::kPrimFloat) {
- __ LoadSFromOffset(dst, obj, offset);
+ __ LoadSFromOffset(dst, obj, offset, null_checker);
} else {
- __ LoadDFromOffset(dst, obj, offset);
+ __ LoadDFromOffset(dst, obj, offset, null_checker);
}
}
- // Longs are handled earlier.
- if (type != Primitive::kPrimLong) {
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
}
if (is_volatile) {
@@ -3715,9 +4865,9 @@ void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const Field
}
} else {
if (Primitive::IsFloatingPointType(field_type)) {
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, FpuRegisterOrConstantForStore(instruction->InputAt(1)));
} else {
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, RegisterOrZeroConstant(instruction->InputAt(1)));
}
}
}
@@ -3728,9 +4878,11 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
Primitive::Type type = field_info.GetFieldType();
LocationSummary* locations = instruction->GetLocations();
Register obj = locations->InAt(0).AsRegister<Register>();
+ Location value_location = locations->InAt(1);
StoreOperandType store_type = kStoreByte;
bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ auto null_checker = GetImplicitNullChecker(instruction);
switch (type) {
case Primitive::kPrimBoolean:
@@ -3767,69 +4919,56 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
if (type == Primitive::kPrimDouble) {
// Pass FP parameters in core registers.
- Location in = locations->InAt(1);
- if (in.IsFpuRegister()) {
- __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), in.AsFpuRegister<FRegister>());
+ if (value_location.IsFpuRegister()) {
+ __ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
+ value_location.AsFpuRegister<FRegister>());
__ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
- in.AsFpuRegister<FRegister>());
- } else if (in.IsDoubleStackSlot()) {
+ value_location.AsFpuRegister<FRegister>());
+ } else if (value_location.IsDoubleStackSlot()) {
__ LoadFromOffset(kLoadWord,
locations->GetTemp(1).AsRegister<Register>(),
SP,
- in.GetStackIndex());
+ value_location.GetStackIndex());
__ LoadFromOffset(kLoadWord,
locations->GetTemp(2).AsRegister<Register>(),
SP,
- in.GetStackIndex() + 4);
+ value_location.GetStackIndex() + 4);
} else {
- DCHECK(in.IsConstant());
- DCHECK(in.GetConstant()->IsDoubleConstant());
- int64_t value = bit_cast<int64_t, double>(in.GetConstant()->AsDoubleConstant()->GetValue());
+ DCHECK(value_location.IsConstant());
+ DCHECK(value_location.GetConstant()->IsDoubleConstant());
+ int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
__ LoadConst64(locations->GetTemp(2).AsRegister<Register>(),
locations->GetTemp(1).AsRegister<Register>(),
value);
}
}
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
- instruction,
- dex_pc,
- nullptr,
- IsDirectEntrypoint(kQuickA64Store));
+ codegen_->InvokeRuntime(kQuickA64Store, instruction, dex_pc);
CheckEntrypointTypes<kQuickA64Store, void, volatile int64_t *, int64_t>();
} else {
- if (!Primitive::IsFloatingPointType(type)) {
+ if (value_location.IsConstant()) {
+ int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker);
+ } else if (!Primitive::IsFloatingPointType(type)) {
Register src;
if (type == Primitive::kPrimLong) {
- DCHECK(locations->InAt(1).IsRegisterPair());
- src = locations->InAt(1).AsRegisterPairLow<Register>();
- Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>();
- __ StoreToOffset(kStoreWord, src, obj, offset);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize);
+ src = value_location.AsRegisterPairLow<Register>();
} else {
- DCHECK(locations->InAt(1).IsRegister());
- src = locations->InAt(1).AsRegister<Register>();
- __ StoreToOffset(store_type, src, obj, offset);
+ src = value_location.AsRegister<Register>();
}
+ __ StoreToOffset(store_type, src, obj, offset, null_checker);
} else {
- DCHECK(locations->InAt(1).IsFpuRegister());
- FRegister src = locations->InAt(1).AsFpuRegister<FRegister>();
+ FRegister src = value_location.AsFpuRegister<FRegister>();
if (type == Primitive::kPrimFloat) {
- __ StoreSToOffset(src, obj, offset);
+ __ StoreSToOffset(src, obj, offset, null_checker);
} else {
- __ StoreDToOffset(src, obj, offset);
+ __ StoreDToOffset(src, obj, offset, null_checker);
}
}
- // Longs are handled earlier.
- if (type != Primitive::kPrimLong) {
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
}
// TODO: memory barriers?
if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
- DCHECK(locations->InAt(1).IsRegister());
- Register src = locations->InAt(1).AsRegister<Register>();
+ Register src = value_location.AsRegister<Register>();
codegen_->MarkGCCard(obj, src);
}
@@ -3973,7 +5112,7 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke
__ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value());
// T9();
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
@@ -4042,6 +5181,8 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
}
// We disable PC-relative load when there is an irreducible loop, as the optimization
// is incompatible with it.
+ // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+ // with irreducible loops.
bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
bool fallback_load = has_irreducible_loops;
switch (desired_string_load_kind) {
@@ -4057,10 +5198,8 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
DCHECK(Runtime::Current()->UseJitCompilation());
fallback_load = false;
break;
- case HLoadString::LoadKind::kDexCachePcRelative:
+ case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
- // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
- // with irreducible loops.
break;
case HLoadString::LoadKind::kDexCacheViaMethod:
fallback_load = false;
@@ -4140,7 +5279,7 @@ Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticO
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method ATTRIBUTE_UNUSED) {
+ HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
// We disable PC-relative load when there is an irreducible loop, as the optimization
// is incompatible with it.
@@ -4209,13 +5348,16 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
}
switch (method_load_kind) {
- case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+ case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
// temp = thread->string_init_entrypoint
+ uint32_t offset =
+ GetThreadOffset<kMipsPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
__ LoadFromOffset(kLoadWord,
temp.AsRegister<Register>(),
TR,
- invoke->GetStringInitOffset());
+ offset);
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
@@ -4274,7 +5416,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
// T9 prepared above for better instruction scheduling.
// T9()
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
break;
case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
// TODO: Implement this type.
@@ -4290,7 +5432,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
kMipsPointerSize).Int32Value());
// T9()
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
break;
}
DCHECK(!IsLeafMethod());
@@ -4314,8 +5456,13 @@ void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDire
}
void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
- LocationSummary* locations = invoke->GetLocations();
- Location receiver = locations->InAt(0);
+ // Use the calling convention instead of the location of the receiver, as
+ // intrinsics may have put the receiver in a different register. In the intrinsics
+ // slow path, the arguments have been moved to the right place, so here we are
+ // guaranteed that the receiver is the first register of the calling convention.
+ InvokeDexCallingConvention calling_convention;
+ Register receiver = calling_convention.GetRegisterAt(0);
+
Register temp = temp_location.AsRegister<Register>();
size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
invoke->GetVTableIndex(), kMipsPointerSize).SizeValue();
@@ -4323,8 +5470,7 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem
Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
// temp = object->GetClass();
- DCHECK(receiver.IsRegister());
- __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+ __ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetMethodAt(method_offset);
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
@@ -4332,7 +5478,7 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem
__ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value());
// T9();
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
}
void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -4386,11 +5532,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
LocationSummary* locations = cls->GetLocations();
if (cls->NeedsAccessCheck()) {
codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
- cls,
- cls->GetDexPc(),
- nullptr,
- IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess));
+ codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
@@ -4441,21 +5583,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
DCHECK(!kEmitCompilerReadBarrier);
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
- if (isR6) {
- __ Bind(&info->high_label);
- __ Bind(&info->pc_rel_label);
- // Add a 32-bit offset to PC.
- __ Auipc(out, /* placeholder */ 0x1234);
- __ Addiu(out, out, /* placeholder */ 0x5678);
- } else {
- __ Bind(&info->high_label);
- __ Lui(out, /* placeholder */ 0x1234);
- // We do not bind info->pc_rel_label here, we'll use the assembler's label
- // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
- __ Ori(out, out, /* placeholder */ 0x5678);
- // Add a 32-bit offset to PC.
- __ Addu(out, out, base_or_current_method_reg);
- }
+ codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
break;
}
case HLoadClass::LoadKind::kBootImageAddress: {
@@ -4544,7 +5672,9 @@ void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear AT
void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
- ? LocationSummary::kCallOnSlowPath
+ ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod)
+ ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kCallOnSlowPath)
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
@@ -4553,12 +5683,12 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBssEntry:
if (codegen_->GetInstructionSetFeatures().IsR6()) {
break;
}
FALLTHROUGH_INTENDED;
// We need an extra register for PC-relative dex cache accesses.
- case HLoadString::LoadKind::kDexCachePcRelative:
case HLoadString::LoadKind::kDexCacheViaMethod:
locations->SetInAt(0, Location::RequiresRegister());
break;
@@ -4580,13 +5710,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBssEntry:
base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
break;
- // We need an extra register for PC-relative dex cache accesses.
- case HLoadString::LoadKind::kDexCachePcRelative:
- case HLoadString::LoadKind::kDexCacheViaMethod:
- base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
- break;
default:
base_or_current_method_reg = ZERO;
break;
@@ -4602,23 +5728,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
return; // No dex cache slow path.
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
- if (isR6) {
- __ Bind(&info->high_label);
- __ Bind(&info->pc_rel_label);
- // Add a 32-bit offset to PC.
- __ Auipc(out, /* placeholder */ 0x1234);
- __ Addiu(out, out, /* placeholder */ 0x5678);
- } else {
- __ Bind(&info->high_label);
- __ Lui(out, /* placeholder */ 0x1234);
- // We do not bind info->pc_rel_label here, we'll use the assembler's label
- // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
- __ Ori(out, out, /* placeholder */ 0x5678);
- // Add a 32-bit offset to PC.
- __ Addu(out, out, base_or_current_method_reg);
- }
+ codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
return; // No dex cache slow path.
}
case HLoadString::LoadKind::kBootImageAddress: {
@@ -4630,52 +5743,28 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
codegen_->DeduplicateBootImageAddressLiteral(address));
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
- static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
- DCHECK_ALIGNED(load->GetAddress(), 4u);
- int16_t offset = Low16Bits(address);
- uint32_t base_address = address - offset; // This accounts for offset sign extension.
- __ Lui(out, High16Bits(base_address));
- // /* GcRoot<mirror::String> */ out = *(base_address + offset)
- GenerateGcRootFieldLoad(load, out_loc, out, offset);
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- HMipsDexCacheArraysBase* base = load->InputAt(0)->AsMipsDexCacheArraysBase();
- int32_t offset =
- load->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
- // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset)
- GenerateGcRootFieldLoad(load, out_loc, base_or_current_method_reg, offset);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(load,
- out_loc,
- base_or_current_method_reg,
- ArtMethod::DeclaringClassOffset().Int32Value());
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(load,
- out_loc,
- out,
- CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- break;
+ case HLoadString::LoadKind::kBssEntry: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ CodeGeneratorMIPS::PcRelativePatchInfo* info =
+ codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
+ __ LoadFromOffset(kLoadWord, out, out, 0);
+ SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
+ codegen_->AddSlowPath(slow_path);
+ __ Beqz(out, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
}
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
- codegen_->AddSlowPath(slow_path);
- __ Beqz(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex());
+ codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
}
void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) {
@@ -4696,18 +5785,10 @@ void LocationsBuilderMIPS::VisitMonitorOperation(HMonitorOperation* instruction)
void InstructionCodeGeneratorMIPS::VisitMonitorOperation(HMonitorOperation* instruction) {
if (instruction->IsEnter()) {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLockObject),
- instruction,
- instruction->GetDexPc(),
- nullptr,
- IsDirectEntrypoint(kQuickLockObject));
+ codegen_->InvokeRuntime(kQuickLockObject, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
} else {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pUnlockObject),
- instruction,
- instruction->GetDexPc(),
- nullptr,
- IsDirectEntrypoint(kQuickUnlockObject));
+ codegen_->InvokeRuntime(kQuickUnlockObject, instruction, instruction->GetDexPc());
}
CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
}
@@ -4882,12 +5963,7 @@ void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) {
__ Lw(current_method_register, SP, kCurrentMethodStackOffset);
// Move an uint16_t value to a register.
__ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
- codegen_->InvokeRuntime(
- GetThreadOffset<kMipsPointerSize>(instruction->GetEntrypoint()).Int32Value(),
- instruction,
- instruction->GetDexPc(),
- nullptr,
- IsDirectEntrypoint(kQuickAllocArrayWithAccessCheck));
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
void*, uint32_t, int32_t, ArtMethod*>();
}
@@ -4913,15 +5989,10 @@ void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
__ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
__ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value());
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
} else {
- codegen_->InvokeRuntime(
- GetThreadOffset<kMipsPointerSize>(instruction->GetEntrypoint()).Int32Value(),
- instruction,
- instruction->GetDexPc(),
- nullptr,
- IsDirectEntrypoint(kQuickAllocObjectWithAccessCheck));
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
}
}
@@ -4973,14 +6044,8 @@ void InstructionCodeGeneratorMIPS::VisitBooleanNot(HBooleanNot* instruction) {
}
void LocationsBuilderMIPS::VisitNullCheck(HNullCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
locations->SetInAt(0, Location::RequiresRegister());
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void CodeGeneratorMIPS::GenerateImplicitNullCheck(HNullCheck* instruction) {
@@ -5106,27 +6171,17 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
GenerateDivRemIntegral(instruction);
break;
case Primitive::kPrimLong: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod),
- instruction,
- instruction->GetDexPc(),
- nullptr,
- IsDirectEntrypoint(kQuickLmod));
+ codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
break;
}
case Primitive::kPrimFloat: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf),
- instruction, instruction->GetDexPc(),
- nullptr,
- IsDirectEntrypoint(kQuickFmodf));
+ codegen_->InvokeRuntime(kQuickFmodf, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickFmodf, float, float, float>();
break;
}
case Primitive::kPrimDouble: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod),
- instruction, instruction->GetDexPc(),
- nullptr,
- IsDirectEntrypoint(kQuickFmod));
+ codegen_->InvokeRuntime(kQuickFmod, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickFmod, double, double, double>();
break;
}
@@ -5282,7 +6337,9 @@ void InstructionCodeGeneratorMIPS::VisitUnresolvedStaticFieldSet(
}
void LocationsBuilderMIPS::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
void InstructionCodeGeneratorMIPS::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5307,11 +6364,7 @@ void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) {
}
void InstructionCodeGeneratorMIPS::VisitThrow(HThrow* instruction) {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
- instruction,
- instruction->GetDexPc(),
- nullptr,
- IsDirectEntrypoint(kQuickDeliverException));
+ codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
@@ -5432,15 +6485,9 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
__ Cvtdl(dst, FTMP);
}
} else {
- int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
- : QUICK_ENTRY_POINT(pL2d);
- bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
- : IsDirectEntrypoint(kQuickL2d);
- codegen_->InvokeRuntime(entry_offset,
- conversion,
- conversion->GetDexPc(),
- nullptr,
- direct);
+ QuickEntrypointEnum entrypoint = (result_type == Primitive::kPrimFloat) ? kQuickL2f
+ : kQuickL2d;
+ codegen_->InvokeRuntime(entrypoint, conversion, conversion->GetDexPc());
if (result_type == Primitive::kPrimFloat) {
CheckEntrypointTypes<kQuickL2f, float, int64_t>();
} else {
@@ -5533,11 +6580,9 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
__ Bind(&done);
} else {
- int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
- : QUICK_ENTRY_POINT(pD2l);
- bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
- : IsDirectEntrypoint(kQuickD2l);
- codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct);
+ QuickEntrypointEnum entrypoint = (input_type == Primitive::kPrimFloat) ? kQuickF2l
+ : kQuickD2l;
+ codegen_->InvokeRuntime(entrypoint, conversion, conversion->GetDexPc());
if (input_type == Primitive::kPrimFloat) {
CheckEntrypointTypes<kQuickF2l, int64_t, float>();
} else {
@@ -5742,13 +6787,11 @@ void LocationsBuilderMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) {
locations->SetInAt(0, Location::RequiresRegister());
}
-void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) {
- int32_t lower_bound = switch_instr->GetStartValue();
- int32_t num_entries = switch_instr->GetNumEntries();
- LocationSummary* locations = switch_instr->GetLocations();
- Register value_reg = locations->InAt(0).AsRegister<Register>();
- HBasicBlock* default_block = switch_instr->GetDefaultBlock();
-
+void InstructionCodeGeneratorMIPS::GenPackedSwitchWithCompares(Register value_reg,
+ int32_t lower_bound,
+ uint32_t num_entries,
+ HBasicBlock* switch_block,
+ HBasicBlock* default_block) {
// Create a set of compare/jumps.
Register temp_reg = TMP;
__ Addiu32(temp_reg, value_reg, -lower_bound);
@@ -5757,7 +6800,7 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr
// this case, index >= num_entries must be true. So that we can save one branch instruction.
__ Bltz(temp_reg, codegen_->GetLabelOf(default_block));
- const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+ const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
// Jump to successors[0] if value == lower_bound.
__ Beqz(temp_reg, codegen_->GetLabelOf(successors[0]));
int32_t last_index = 0;
@@ -5775,11 +6818,107 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr
}
// And the default for any other value.
- if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+ if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
__ B(codegen_->GetLabelOf(default_block));
}
}
+void InstructionCodeGeneratorMIPS::GenTableBasedPackedSwitch(Register value_reg,
+ Register constant_area,
+ int32_t lower_bound,
+ uint32_t num_entries,
+ HBasicBlock* switch_block,
+ HBasicBlock* default_block) {
+ // Create a jump table.
+ std::vector<MipsLabel*> labels(num_entries);
+ const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+ for (uint32_t i = 0; i < num_entries; i++) {
+ labels[i] = codegen_->GetLabelOf(successors[i]);
+ }
+ JumpTable* table = __ CreateJumpTable(std::move(labels));
+
+ // Is the value in range?
+ __ Addiu32(TMP, value_reg, -lower_bound);
+ if (IsInt<16>(static_cast<int32_t>(num_entries))) {
+ __ Sltiu(AT, TMP, num_entries);
+ __ Beqz(AT, codegen_->GetLabelOf(default_block));
+ } else {
+ __ LoadConst32(AT, num_entries);
+ __ Bgeu(TMP, AT, codegen_->GetLabelOf(default_block));
+ }
+
+ // We are in the range of the table.
+ // Load the target address from the jump table, indexing by the value.
+ __ LoadLabelAddress(AT, constant_area, table->GetLabel());
+ __ Sll(TMP, TMP, 2);
+ __ Addu(TMP, TMP, AT);
+ __ Lw(TMP, TMP, 0);
+ // Compute the absolute target address by adding the table start address
+ // (the table contains offsets to targets relative to its start).
+ __ Addu(TMP, TMP, AT);
+ // And jump.
+ __ Jr(TMP);
+ __ NopIfNoReordering();
+}
+
+void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+ int32_t lower_bound = switch_instr->GetStartValue();
+ uint32_t num_entries = switch_instr->GetNumEntries();
+ LocationSummary* locations = switch_instr->GetLocations();
+ Register value_reg = locations->InAt(0).AsRegister<Register>();
+ HBasicBlock* switch_block = switch_instr->GetBlock();
+ HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+ if (codegen_->GetInstructionSetFeatures().IsR6() &&
+ num_entries > kPackedSwitchJumpTableThreshold) {
+ // R6 uses PC-relative addressing to access the jump table.
+ // R2, OTOH, requires an HMipsComputeBaseMethodAddress input to access
+ // the jump table and it is implemented by changing HPackedSwitch to
+ // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress.
+ // See VisitMipsPackedSwitch() for the table-based implementation on R2.
+ GenTableBasedPackedSwitch(value_reg,
+ ZERO,
+ lower_bound,
+ num_entries,
+ switch_block,
+ default_block);
+ } else {
+ GenPackedSwitchWithCompares(value_reg,
+ lower_bound,
+ num_entries,
+ switch_block,
+ default_block);
+ }
+}
+
+void LocationsBuilderMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* switch_instr) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ // Constant area pointer (HMipsComputeBaseMethodAddress).
+ locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* switch_instr) {
+ int32_t lower_bound = switch_instr->GetStartValue();
+ uint32_t num_entries = switch_instr->GetNumEntries();
+ LocationSummary* locations = switch_instr->GetLocations();
+ Register value_reg = locations->InAt(0).AsRegister<Register>();
+ Register constant_area = locations->InAt(1).AsRegister<Register>();
+ HBasicBlock* switch_block = switch_instr->GetBlock();
+ HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+ // This is an R2-only path. HPackedSwitch has been changed to
+ // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress
+ // required to address the jump table relative to PC.
+ GenTableBasedPackedSwitch(value_reg,
+ constant_area,
+ lower_bound,
+ num_entries,
+ switch_block,
+ default_block);
+}
+
void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress(
HMipsComputeBaseMethodAddress* insn) {
LocationSummary* locations =
@@ -5813,24 +6952,8 @@ void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArra
Register reg = base->GetLocations()->Out().AsRegister<Register>();
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
-
- if (codegen_->GetInstructionSetFeatures().IsR6()) {
- __ Bind(&info->high_label);
- __ Bind(&info->pc_rel_label);
- // Add a 32-bit offset to PC.
- __ Auipc(reg, /* placeholder */ 0x1234);
- __ Addiu(reg, reg, /* placeholder */ 0x5678);
- } else {
- // Generate a dummy PC-relative call to obtain PC.
- __ Nal();
- __ Bind(&info->high_label);
- __ Lui(reg, /* placeholder */ 0x1234);
- __ Bind(&info->pc_rel_label);
- __ Ori(reg, reg, /* placeholder */ 0x5678);
- // Add a 32-bit offset to PC.
- __ Addu(reg, reg, RA);
- // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()?
- }
+ // TODO: Reuse MipsComputeBaseMethodAddress on R2 instead of passing ZERO to force emitting NAL.
+ codegen_->EmitPcRelativeAddressPlaceholder(info, reg, ZERO);
}
void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 63a0345c1c..e132819c24 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -191,6 +191,8 @@ class LocationsBuilderMIPS : public HGraphVisitor {
void HandleShift(HBinaryOperation* operation);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ Location RegisterOrZeroConstant(HInstruction* instruction);
+ Location FpuRegisterOrConstantForStore(HInstruction* instruction);
InvokeDexCallingConventionVisitorMIPS parameter_visitor_;
@@ -218,6 +220,14 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
MipsAssembler* GetAssembler() const { return assembler_; }
+ // Compare-and-jump packed switch generates approx. 3 + 2.5 * N 32-bit
+ // instructions for N cases.
+ // Table-based packed switch generates approx. 11 32-bit instructions
+ // and N 32-bit data words for N cases.
+ // At N = 6 they come out as 18 and 17 32-bit words respectively.
+ // We switch to the table-based method starting with 7 cases.
+ static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+
private:
void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
void GenerateMemoryBarrier(MemBarrierKind kind);
@@ -237,12 +247,38 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
Register obj,
uint32_t offset);
void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
+ // When the function returns `false` it means that the condition holds if `dst` is non-zero
+ // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero
+ // `dst` are exchanged.
+ bool MaterializeIntCompare(IfCondition cond,
+ LocationSummary* input_locations,
+ Register dst);
void GenerateIntCompareAndBranch(IfCondition cond,
LocationSummary* locations,
MipsLabel* label);
void GenerateLongCompareAndBranch(IfCondition cond,
LocationSummary* locations,
MipsLabel* label);
+ void GenerateFpCompare(IfCondition cond,
+ bool gt_bias,
+ Primitive::Type type,
+ LocationSummary* locations);
+ // When the function returns `false` it means that the condition holds if the condition
+ // code flag `cc` is non-zero and doesn't hold if `cc` is zero. If it returns `true`,
+ // the roles of zero and non-zero values of the `cc` flag are exchanged.
+ bool MaterializeFpCompareR2(IfCondition cond,
+ bool gt_bias,
+ Primitive::Type type,
+ LocationSummary* input_locations,
+ int cc);
+ // When the function returns `false` it means that the condition holds if `dst` is non-zero
+ // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero
+ // `dst` are exchanged.
+ bool MaterializeFpCompareR6(IfCondition cond,
+ bool gt_bias,
+ Primitive::Type type,
+ LocationSummary* input_locations,
+ FRegister dst);
void GenerateFpCompareAndBranch(IfCondition cond,
bool gt_bias,
Primitive::Type type,
@@ -257,6 +293,20 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
+ auto GetImplicitNullChecker(HInstruction* instruction);
+ void GenPackedSwitchWithCompares(Register value_reg,
+ int32_t lower_bound,
+ uint32_t num_entries,
+ HBasicBlock* switch_block,
+ HBasicBlock* default_block);
+ void GenTableBasedPackedSwitch(Register value_reg,
+ Register constant_area,
+ int32_t lower_bound,
+ uint32_t num_entries,
+ HBasicBlock* switch_block,
+ HBasicBlock* default_block);
+ void GenConditionalMoveR2(HSelect* select);
+ void GenConditionalMoveR6(HSelect* select);
MipsAssembler* const assembler_;
CodeGeneratorMIPS* const codegen_;
@@ -273,6 +323,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
virtual ~CodeGeneratorMIPS() {}
void ComputeSpillMask() OVERRIDE;
+ bool HasAllocatedCalleeSaveRegisters() const OVERRIDE;
void GenerateFrameEntry() OVERRIDE;
void GenerateFrameExit() OVERRIDE;
@@ -304,10 +355,10 @@ class CodeGeneratorMIPS : public CodeGenerator {
void SetupBlockedRegisters() const OVERRIDE;
- size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
- size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
- size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
- size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
void ClobberRA() {
clobbered_ra_ = true;
}
@@ -315,9 +366,6 @@ class CodeGeneratorMIPS : public CodeGenerator {
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
- // Blocks all register pairs made out of blocked core registers.
- void UpdateBlockedPairRegisters() const;
-
InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips; }
const MipsInstructionSetFeatures& GetInstructionSetFeatures() const {
@@ -338,7 +386,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
- void MoveConstant(Location destination, int32_t value);
+ void MoveConstant(Location destination, int32_t value) OVERRIDE;
void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
@@ -346,17 +394,11 @@ class CodeGeneratorMIPS : public CodeGenerator {
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path) OVERRIDE;
-
- void InvokeRuntime(int32_t offset,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path,
- bool is_direct_entrypoint);
+ SlowPathCode* slow_path = nullptr) OVERRIDE;
ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
- bool NeedsTwoRegisters(Primitive::Type type) const {
+ bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
return type == Primitive::kPrimLong;
}
@@ -374,7 +416,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method) OVERRIDE;
+ HInvokeStaticOrDirect* invoke) OVERRIDE;
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
@@ -384,9 +426,9 @@ class CodeGeneratorMIPS : public CodeGenerator {
UNIMPLEMENTED(FATAL) << "Not implemented on MIPS";
}
- void GenerateNop();
- void GenerateImplicitNullCheck(HNullCheck* instruction);
- void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateNop() OVERRIDE;
+ void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
// The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
// and boot image strings. The only difference is the interpretation of the offset_or_index.
@@ -414,6 +456,8 @@ class CodeGeneratorMIPS : public CodeGenerator {
Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index);
Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
+ void EmitPcRelativeAddressPlaceholder(PcRelativePatchInfo* info, Register out, Register base);
+
private:
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
@@ -434,6 +478,10 @@ class CodeGeneratorMIPS : public CodeGenerator {
uint32_t offset_or_index,
ArenaDeque<PcRelativePatchInfo>* patches);
+ template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
+ ArenaVector<LinkerPatch>* linker_patches);
+
// Labels for each block that will be compiled.
MipsLabel* block_labels_;
MipsLabel frame_entry_label_;
@@ -452,7 +500,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
// Deduplication map for boot string literals for kBootImageLinkTimeAddress.
BootStringToLiteralMap boot_image_string_patches_;
- // PC-relative String patch info.
+ // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
// Deduplication map for boot type literals for kBootImageLinkTimeAddress.
BootTypeToLiteralMap boot_image_type_patches_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3472830379..010bf24232 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -102,8 +102,8 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type)
return Mips64ReturnLocation(type);
}
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value()
class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
@@ -127,13 +127,10 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
locations->InAt(1),
Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
Primitive::kPrimInt);
- uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
- ? QUICK_ENTRY_POINT(pThrowStringBounds)
- : QUICK_ENTRY_POINT(pThrowArrayBounds);
- mips64_codegen->InvokeRuntime(entry_point_offset,
- instruction_,
- instruction_->GetDexPc(),
- this);
+ QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
+ ? kQuickThrowStringBounds
+ : kQuickThrowArrayBounds;
+ mips64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
@@ -153,14 +150,7 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
- if (instruction_->CanThrowIntoCatchBlock()) {
- // Live registers will be restored in the catch block if caught.
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- }
- mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ mips64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
@@ -191,9 +181,9 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
InvokeRuntimeCallingConvention calling_convention;
__ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
- int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
- : QUICK_ENTRY_POINT(pInitializeType);
- mips64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
+ QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
+ : kQuickInitializeType;
+ mips64_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
if (do_clinit_) {
CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
} else {
@@ -246,7 +236,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
InvokeRuntimeCallingConvention calling_convention;
const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
__ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
- mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
+ mips64_codegen->InvokeRuntime(kQuickResolveString,
instruction_,
instruction_->GetDexPc(),
this);
@@ -277,7 +267,7 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
- mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
+ mips64_codegen->InvokeRuntime(kQuickThrowNullPointer,
instruction_,
instruction_->GetDexPc(),
this);
@@ -300,13 +290,8 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ mips64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ Bc(GetReturnLabel());
} else {
@@ -357,10 +342,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
Primitive::kPrimNot);
if (instruction_->IsInstanceOf()) {
- mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
- instruction_,
- dex_pc,
- this);
+ mips64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
CheckEntrypointTypes<
kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
@@ -368,7 +350,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
} else {
DCHECK(instruction_->IsCheckCast());
- mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
+ mips64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
}
@@ -390,11 +372,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ mips64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -429,8 +407,8 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
}
#undef __
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value()
void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
@@ -578,9 +556,14 @@ void CodeGeneratorMIPS64::GenerateFrameEntry() {
__ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
- static_assert(IsInt<16>(kCurrentMethodStackOffset),
- "kCurrentMethodStackOffset must fit into int16_t");
- __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+ // Save the current method if we need it. Note that we do not
+ // do this in HCurrentMethod, as the instruction might have been removed
+ // in the SSA graph.
+ if (RequiresCurrentMethod()) {
+ static_assert(IsInt<16>(kCurrentMethodStackOffset),
+ "kCurrentMethodStackOffset must fit into int16_t");
+ __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+ }
}
void CodeGeneratorMIPS64::GenerateFrameExit() {
@@ -961,25 +944,20 @@ void CodeGeneratorMIPS64::DumpFloatingPointRegister(std::ostream& stream, int re
}
void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value(),
- instruction,
- dex_pc,
- slow_path);
-}
-
-void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
- ValidateInvokeRuntime(instruction, slow_path);
+ ValidateInvokeRuntime(entrypoint, instruction, slow_path);
// TODO: anything related to T9/GP/GOT/PIC/.so's?
- __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
+ __ LoadFromOffset(kLoadDoubleword,
+ T9,
+ TR,
+ GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value());
__ Jalr(T9);
__ Nop();
- RecordPcInfo(instruction, dex_pc, slow_path);
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
}
void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path,
@@ -1516,10 +1494,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
}
} else {
DCHECK_EQ(value_type, Primitive::kPrimNot);
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
}
break;
@@ -1584,15 +1559,13 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
}
void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -2136,14 +2109,8 @@ void InstructionCodeGeneratorMIPS64::VisitDiv(HDiv* instruction) {
}
void LocationsBuilderMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
@@ -2656,6 +2623,7 @@ void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) {
void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -3009,7 +2977,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method ATTRIBUTE_UNUSED) {
+ HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
switch (desired_dispatch_info.method_load_kind) {
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
@@ -3043,13 +3011,16 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
- case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+ case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
// temp = thread->string_init_entrypoint
+ uint32_t offset =
+ GetThreadOffset<kMips64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
__ LoadFromOffset(kLoadDoubleword,
temp.AsRegister<GpuRegister>(),
TR,
- invoke->GetStringInitOffset());
+ offset);
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
@@ -3189,10 +3160,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
LocationSummary* locations = cls->GetLocations();
if (cls->NeedsAccessCheck()) {
codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
- cls,
- cls->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
@@ -3263,22 +3231,11 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
}
void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) {
- LocationSummary* locations = load->GetLocations();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
- GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
- __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
- ArtMethod::DeclaringClassOffset().Int32Value());
- __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
- __ LoadFromOffset(
- kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- // TODO: We will need a read barrier here.
-
- if (!load->IsInDexCache()) {
- SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
- codegen_->AddSlowPath(slow_path);
- __ Beqzc(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ Bc(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) {
@@ -3298,12 +3255,9 @@ void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instructio
}
void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) {
- codegen_->InvokeRuntime(instruction->IsEnter()
- ? QUICK_ENTRY_POINT(pLockObject)
- : QUICK_ENTRY_POINT(pUnlockObject),
+ codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
instruction,
- instruction->GetDexPc(),
- nullptr);
+ instruction->GetDexPc());
if (instruction->IsEnter()) {
CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
} else {
@@ -3431,10 +3385,7 @@ void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) {
LocationSummary* locations = instruction->GetLocations();
// Move an uint16_t value to a register.
__ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex());
- codegen_->InvokeRuntime(instruction->GetEntrypoint(),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
}
@@ -3463,10 +3414,7 @@ void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction)
__ Nop();
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
} else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
}
}
@@ -3509,14 +3457,8 @@ void InstructionCodeGeneratorMIPS64::VisitBooleanNot(HBooleanNot* instruction) {
}
void LocationsBuilderMIPS64::VisitNullCheck(HNullCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
locations->SetInAt(0, Location::RequiresRegister());
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void CodeGeneratorMIPS64::GenerateImplicitNullCheck(HNullCheck* instruction) {
@@ -3637,9 +3579,8 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
- int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
- : QUICK_ENTRY_POINT(pFmod);
- codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr);
+ QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod;
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
if (type == Primitive::kPrimFloat) {
CheckEntrypointTypes<kQuickFmodf, float, float, float>();
} else {
@@ -3795,7 +3736,9 @@ void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldSet(
}
void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -3820,10 +3763,7 @@ void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitThrow(HThrow* instruction) {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 197f86b22b..690eccb7d8 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -285,10 +285,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
void SetupBlockedRegisters() const OVERRIDE;
- size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
- size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
- size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
- size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -323,16 +323,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path) OVERRIDE;
-
- void InvokeRuntime(int32_t offset,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path);
+ SlowPathCode* slow_path = nullptr) OVERRIDE;
ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
- bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const { return false; }
+ bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; }
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
@@ -348,7 +343,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method) OVERRIDE;
+ HInvokeStaticOrDirect* invoke) OVERRIDE;
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
@@ -358,9 +353,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64";
}
- void GenerateNop();
- void GenerateImplicitNullCheck(HNullCheck* instruction);
- void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateNop() OVERRIDE;
+ void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
private:
// Labels for each block that will be compiled.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a2fa24542c..efd33c7025 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -47,8 +47,8 @@ static constexpr int kC2ConditionMask = 0x400;
static constexpr int kFakeReturnRegister = Register(8);
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
class NullCheckSlowPathX86 : public SlowPathCode {
@@ -62,7 +62,7 @@ class NullCheckSlowPathX86 : public SlowPathCode {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
- x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
+ x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
instruction_,
instruction_->GetDexPc(),
this);
@@ -84,14 +84,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
- if (instruction_->CanThrowIntoCatchBlock()) {
- // Live registers will be restored in the catch block if caught.
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- }
- x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
@@ -157,6 +150,9 @@ class BoundsCheckSlowPathX86 : public SlowPathCode {
length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
}
__ movl(length_loc.AsRegister<Register>(), array_len);
+ if (mirror::kUseStringCompression) {
+ __ andl(length_loc.AsRegister<Register>(), Immediate(INT32_MAX));
+ }
}
x86_codegen->EmitParallelMoves(
locations->InAt(0),
@@ -165,13 +161,10 @@ class BoundsCheckSlowPathX86 : public SlowPathCode {
length_loc,
Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
Primitive::kPrimInt);
- uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
- ? QUICK_ENTRY_POINT(pThrowStringBounds)
- : QUICK_ENTRY_POINT(pThrowArrayBounds);
- x86_codegen->InvokeRuntime(entry_point_offset,
- instruction_,
- instruction_->GetDexPc(),
- this);
+ QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
+ ? kQuickThrowStringBounds
+ : kQuickThrowArrayBounds;
+ x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
@@ -192,13 +185,8 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -239,14 +227,18 @@ class LoadStringSlowPathX86 : public SlowPathCode {
InvokeRuntimeCallingConvention calling_convention;
const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
__ movl(calling_convention.GetRegisterAt(0), Immediate(string_index));
- x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
RestoreLiveRegisters(codegen, locations);
+ // Store the resolved String to the BSS entry.
+ Register method_address = locations->InAt(0).AsRegister<Register>();
+ __ movl(Address(method_address, CodeGeneratorX86::kDummy32BitOffset),
+ locations->Out().AsRegister<Register>());
+ Label* fixup_label = x86_codegen->NewStringBssEntryPatch(instruction_->AsLoadString());
+ __ Bind(fixup_label);
+
__ jmp(GetExitLabel());
}
@@ -274,8 +266,8 @@ class LoadClassSlowPathX86 : public SlowPathCode {
InvokeRuntimeCallingConvention calling_convention;
__ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex()));
- x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
- : QUICK_ENTRY_POINT(pInitializeType),
+ x86_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage
+ : kQuickInitializeType,
at_, dex_pc_, this);
if (do_clinit_) {
CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
@@ -344,7 +336,7 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
Primitive::kPrimNot);
if (instruction_->IsInstanceOf()) {
- x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
+ x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
instruction_,
instruction_->GetDexPc(),
this);
@@ -352,10 +344,7 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
- x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
}
@@ -386,11 +375,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -429,10 +414,7 @@ class ArraySetSlowPathX86 : public SlowPathCode {
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
- x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
@@ -444,11 +426,25 @@ class ArraySetSlowPathX86 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
};
-// Slow path marking an object during a read barrier.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
public:
- ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj)
- : SlowPathCode(instruction), obj_(obj) {
+ ReadBarrierMarkSlowPathX86(HInstruction* instruction,
+ Location ref,
+ bool unpoison_ref_before_marking)
+ : SlowPathCode(instruction),
+ ref_(ref),
+ unpoison_ref_before_marking_(unpoison_ref_before_marking) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -456,54 +452,228 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
- Register reg = obj_.AsRegister<Register>();
+ Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
instruction_->IsLoadClass() ||
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
+ if (unpoison_ref_before_marking_) {
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+ }
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
- DCHECK_NE(reg, ESP);
- DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+ DCHECK_NE(ref_reg, ESP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
// "Compact" slow path, saving two moves.
//
// Instead of using the standard runtime calling convention (input
// and output in EAX):
//
- // EAX <- obj
+ // EAX <- ref
// EAX <- ReadBarrierMark(EAX)
- // obj <- EAX
+ // ref <- EAX
//
- // we just use rX (the register holding `obj`) as input and output
+ // we just use rX (the register containing `ref`) as input and output
// of a dedicated entrypoint:
//
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(reg);
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
// This runtime call does not require a stack map.
x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ jmp(GetExitLabel());
}
private:
- const Location obj_;
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // Should the reference in `ref_` be unpoisoned prior to marking it?
+ const bool unpoison_ref_before_marking_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
};
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathX86 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ const Address& field_addr,
+ bool unpoison_ref_before_marking,
+ Register temp)
+ : SlowPathCode(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_addr_(field_addr),
+ unpoison_ref_before_marking_(unpoison_ref_before_marking),
+ temp_(temp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+
+ __ Bind(GetEntryLabel());
+ if (unpoison_ref_before_marking_) {
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+ }
+
+ // Save the old (unpoisoned) reference.
+ __ movl(temp_, ref_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+ DCHECK_NE(ref_reg, ESP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in EAX):
+ //
+ // EAX <- ref
+ // EAX <- ReadBarrierMark(EAX)
+ // ref <- EAX
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
+ // This runtime call does not require a stack map.
+ x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*field_addr`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
+ // operation below would abort the CAS, leaving the field as-is.
+ NearLabel done;
+ __ cmpl(temp_, ref_reg);
+ __ j(kEqual, &done);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achieved
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+ // This operation is implemented with a 32-bit LOCK CMPXLCHG
+ // instruction, which requires the expected value (the old
+ // reference) to be in EAX. Save EAX beforehand, and move the
+ // expected value (stored in `temp_`) into EAX.
+ __ pushl(EAX);
+ __ movl(EAX, temp_);
+
+ // Convenience aliases.
+ Register base = obj_;
+ Register expected = EAX;
+ Register value = ref_reg;
+
+ bool base_equals_value = (base == value);
+ if (kPoisonHeapReferences) {
+ if (base_equals_value) {
+ // If `base` and `value` are the same register location, move
+ // `value` to a temporary register. This way, poisoning
+ // `value` won't invalidate `base`.
+ value = temp_;
+ __ movl(value, base);
+ }
+
+ // Check that the register allocator did not assign the location
+ // of `expected` (EAX) to `value` nor to `base`, so that heap
+ // poisoning (when enabled) works as intended below.
+ // - If `value` were equal to `expected`, both references would
+ // be poisoned twice, meaning they would not be poisoned at
+ // all, as heap poisoning uses address negation.
+ // - If `base` were equal to `expected`, poisoning `expected`
+ // would invalidate `base`.
+ DCHECK_NE(value, expected);
+ DCHECK_NE(base, expected);
+
+ __ PoisonHeapReference(expected);
+ __ PoisonHeapReference(value);
+ }
+
+ __ LockCmpxchgl(field_addr_, value);
+
+ // If heap poisoning is enabled, we need to unpoison the values
+ // that were poisoned earlier.
+ if (kPoisonHeapReferences) {
+ if (base_equals_value) {
+ // `value` has been moved to a temporary register, no need
+ // to unpoison it.
+ } else {
+ __ UnpoisonHeapReference(value);
+ }
+ // No need to unpoison `expected` (EAX), as it is be overwritten below.
+ }
+
+ // Restore EAX.
+ __ popl(EAX);
+
+ __ Bind(&done);
+ __ jmp(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const Register obj_;
+ // The address of the marked reference field. The base of this address must be `obj_`.
+ const Address field_addr_;
+
+ // Should the reference in `ref_` be unpoisoned prior to marking it?
+ const bool unpoison_ref_before_marking_;
+
+ const Register temp_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
public:
@@ -643,10 +813,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
__ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
}
- x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<
kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
@@ -710,7 +877,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
- x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+ x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
instruction_,
instruction_->GetDexPc(),
this);
@@ -731,8 +898,8 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
};
#undef __
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<X86Assembler*>(GetAssembler())-> /* NOLINT */
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
inline Condition X86Condition(IfCondition cond) {
switch (cond) {
@@ -803,25 +970,21 @@ void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value(),
- instruction,
- dex_pc,
- slow_path);
-}
-
-void CodeGeneratorX86::InvokeRuntime(int32_t entry_point_offset,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path) {
- ValidateInvokeRuntime(instruction, slow_path);
- __ fs()->call(Address::Absolute(entry_point_offset));
- RecordPcInfo(instruction, dex_pc, slow_path);
+ ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+ GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
}
void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
HInstruction* instruction,
SlowPathCode* slow_path) {
ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ GenerateInvokeRuntime(entry_point_offset);
+}
+
+void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
__ fs()->call(Address::Absolute(entry_point_offset));
}
@@ -859,24 +1022,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
}
void CodeGeneratorX86::SetupBlockedRegisters() const {
- // Don't allocate the dalvik style register pair passing.
- blocked_register_pairs_[ECX_EDX] = true;
-
// Stack register is always reserved.
blocked_core_registers_[ESP] = true;
-
- UpdateBlockedPairRegisters();
-}
-
-void CodeGeneratorX86::UpdateBlockedPairRegisters() const {
- for (int i = 0; i < kNumberOfRegisterPairs; i++) {
- X86ManagedRegister current =
- X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
- if (blocked_core_registers_[current.AsRegisterPairLow()]
- || blocked_core_registers_[current.AsRegisterPairHigh()]) {
- blocked_register_pairs_[i] = true;
- }
- }
}
InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
@@ -916,7 +1063,12 @@ void CodeGeneratorX86::GenerateFrameEntry() {
int adjust = GetFrameSize() - FrameEntrySpillSize();
__ subl(ESP, Immediate(adjust));
__ cfi().AdjustCFAOffset(adjust);
- __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
+ // Save the current method if we need it. Note that we do not
+ // do this in HCurrentMethod, as the instruction might have been removed
+ // in the SSA graph.
+ if (RequiresCurrentMethod()) {
+ __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
+ }
}
void CodeGeneratorX86::GenerateFrameExit() {
@@ -1122,15 +1274,11 @@ void CodeGeneratorX86::Move64(Location destination, Location source) {
__ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- int64_t value;
- if (constant->IsLongConstant()) {
- value = constant->AsLongConstant()->GetValue();
- } else {
- DCHECK(constant->IsDoubleConstant());
- value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
- }
+ DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
+ int64_t value = GetInt64ValueOf(constant);
__ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
- __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
+ __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
+ Immediate(High32Bits(value)));
} else {
DCHECK(source.IsDoubleStackSlot()) << source;
EmitParallelMoves(
@@ -1480,14 +1628,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio
Location lhs = condition->GetLocations()->InAt(0);
Location rhs = condition->GetLocations()->InAt(1);
// LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
- if (rhs.IsRegister()) {
- __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
- } else if (rhs.IsConstant()) {
- int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant);
- } else {
- __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
- }
+ codegen_->GenerateIntCompare(lhs, rhs);
if (true_target == nullptr) {
__ j(X86Condition(condition->GetOppositeCondition()), false_target);
} else {
@@ -1522,6 +1663,7 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
@@ -1580,18 +1722,6 @@ void LocationsBuilderX86::VisitSelect(HSelect* select) {
locations->SetOut(Location::SameAsFirstInput());
}
-void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
- Register lhs_reg = lhs.AsRegister<Register>();
- if (rhs.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- codegen_->Compare32BitValue(lhs_reg, value);
- } else if (rhs.IsStackSlot()) {
- __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
- } else {
- __ cmpl(lhs_reg, rhs.AsRegister<Register>());
- }
-}
-
void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
LocationSummary* locations = select->GetLocations();
DCHECK(locations->InAt(0).Equals(locations->Out()));
@@ -1621,7 +1751,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
LocationSummary* cond_locations = condition->GetLocations();
- GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
+ codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
cond = X86Condition(condition->GetCondition());
}
} else {
@@ -1730,7 +1860,7 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
// Clear output register: setb only sets the low byte.
__ xorl(reg, reg);
- GenerateIntCompare(lhs, rhs);
+ codegen_->GenerateIntCompare(lhs, rhs);
__ setb(X86Condition(cond->GetCondition()), reg);
return;
}
@@ -2592,19 +2722,13 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
case Primitive::kPrimFloat:
// Processing a Dex `float-to-long' instruction.
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pF2l),
- conversion,
- conversion->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
CheckEntrypointTypes<kQuickF2l, int64_t, float>();
break;
case Primitive::kPrimDouble:
// Processing a Dex `double-to-long' instruction.
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pD2l),
- conversion,
- conversion->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
CheckEntrypointTypes<kQuickD2l, int64_t, double>();
break;
@@ -3450,16 +3574,10 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
if (is_div) {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
} else {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
}
break;
@@ -3635,10 +3753,7 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
}
void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
switch (instruction->GetType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -3658,9 +3773,6 @@ void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
default:
LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
}
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
@@ -3685,7 +3797,7 @@ void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction)
} else {
DCHECK(value.IsConstant()) << value;
if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
- __ jmp(slow_path->GetEntryLabel());
+ __ jmp(slow_path->GetEntryLabel());
}
}
break;
@@ -4039,10 +4151,7 @@ void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
__ call(Address(temp, code_offset.Int32Value()));
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
} else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
DCHECK(!codegen_->IsLeafMethod());
}
@@ -4063,10 +4172,7 @@ void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
__ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
- codegen_->InvokeRuntime(instruction->GetEntrypoint(),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
DCHECK(!codegen_->IsLeafMethod());
}
@@ -4212,7 +4318,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
case Primitive::kPrimShort:
case Primitive::kPrimChar:
case Primitive::kPrimInt: {
- GenerateIntCompare(left, right);
+ codegen_->GenerateIntCompare(left, right);
break;
}
case Primitive::kPrimLong: {
@@ -4320,7 +4426,7 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method ATTRIBUTE_UNUSED) {
+ HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
// We disable pc-relative load when there is an irreducible loop, as the optimization
@@ -4379,10 +4485,13 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO
Location temp) {
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
- case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+ case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
// temp = thread->string_init_entrypoint
- __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset()));
+ uint32_t offset =
+ GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
+ __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
@@ -4391,7 +4500,8 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
__ movl(temp.AsRegister<Register>(), Immediate(/* placeholder */ 0));
- method_patches_.emplace_back(invoke->GetTargetMethod());
+ method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+ invoke->GetTargetMethod().dex_method_index);
__ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn.
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
@@ -4400,7 +4510,7 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO
__ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
// Bind a new fixup label at the end of the "movl" insn.
uint32_t offset = invoke->GetDexCacheArrayOffset();
- __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
+ __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFile(), offset));
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
@@ -4436,7 +4546,8 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
__ call(GetFrameEntryLabel());
break;
case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
- relative_call_patches_.emplace_back(invoke->GetTargetMethod());
+ relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+ invoke->GetTargetMethod().dex_method_index);
Label* label = &relative_call_patches_.back().label;
__ call(label); // Bind to the patch label, override at link time.
__ Bind(label); // Bind the label at the end of the "call" insn.
@@ -4495,7 +4606,8 @@ void CodeGeneratorX86::RecordSimplePatch() {
}
}
-void CodeGeneratorX86::RecordStringPatch(HLoadString* load_string) {
+void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
+ DCHECK(GetCompilerOptions().IsBootImage());
string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
__ Bind(&string_patches_.back().label);
}
@@ -4505,6 +4617,12 @@ void CodeGeneratorX86::RecordTypePatch(HLoadClass* load_class) {
__ Bind(&type_patches_.back().label);
}
+Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
+ DCHECK(!GetCompilerOptions().IsBootImage());
+ string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+ return &string_patches_.back().label;
+}
+
Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
uint32_t element_offset) {
// Add the patch entry and bind its label at the end of the instruction.
@@ -4512,6 +4630,21 @@ Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file
return &pc_relative_dex_cache_patches_.back().label;
}
+// The label points to the end of the "movl" or another instruction but the literal offset
+// for method patch needs to point to the embedded constant which occupies the last 4 bytes.
+constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
+
+template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
+ const ArenaDeque<PatchInfo<Label>>& infos,
+ ArenaVector<LinkerPatch>* linker_patches) {
+ for (const PatchInfo<Label>& info : infos) {
+ uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+ linker_patches->push_back(
+ Factory(literal_offset, &info.dex_file, GetMethodAddressOffset(), info.index));
+ }
+}
+
void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -4522,59 +4655,38 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
string_patches_.size() +
type_patches_.size();
linker_patches->reserve(size);
- // The label points to the end of the "movl" insn but the literal offset for method
- // patch needs to point to the embedded constant which occupies the last 4 bytes.
- constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
- for (const MethodPatchInfo<Label>& info : method_patches_) {
- uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
- info.target_method.dex_file,
- info.target_method.dex_method_index));
- }
- for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
+ for (const PatchInfo<Label>& info : method_patches_) {
uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
- info.target_method.dex_file,
- info.target_method.dex_method_index));
+ linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, &info.dex_file, info.index));
}
- for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
+ for (const PatchInfo<Label>& info : relative_call_patches_) {
uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
- &info.target_dex_file,
- GetMethodAddressOffset(),
- info.element_offset));
+ linker_patches->push_back(
+ LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index));
}
+ EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
+ linker_patches);
for (const Label& label : simple_patches_) {
uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
}
- if (GetCompilerOptions().GetCompilePic()) {
- for (const StringPatchInfo<Label>& info : string_patches_) {
- uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
- &info.dex_file,
- GetMethodAddressOffset(),
- info.string_index));
- }
- for (const TypePatchInfo<Label>& info : type_patches_) {
- uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset,
- &info.dex_file,
- GetMethodAddressOffset(),
- info.type_index));
- }
+ if (!GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
+ } else if (GetCompilerOptions().GetCompilePic()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
} else {
- for (const StringPatchInfo<Label>& info : string_patches_) {
+ for (const PatchInfo<Label>& info : string_patches_) {
uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
- &info.dex_file,
- info.string_index));
+ linker_patches->push_back(
+ LinkerPatch::StringPatch(literal_offset, &info.dex_file, info.index));
}
- for (const TypePatchInfo<Label>& info : type_patches_) {
+ }
+ if (GetCompilerOptions().GetCompilePic()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(type_patches_, linker_patches);
+ } else {
+ for (const PatchInfo<Label>& info : type_patches_) {
uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
- &info.dex_file,
- info.type_index));
+ linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, &info.dex_file, info.index));
}
}
}
@@ -4609,6 +4721,9 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI
kEmitCompilerReadBarrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -4632,10 +4747,6 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI
// load the temp into the XMM and then copy the XMM into the
// output, 32 bits at a time).
locations->AddTemp(Location::RequiresFpuRegister());
- } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
}
}
@@ -4679,11 +4790,10 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
case Primitive::kPrimNot: {
// /* HeapReference<Object> */ out = *(base + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp_loc = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ instruction, out, base, offset, /* needs_null_check */ true);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -5022,17 +5132,11 @@ void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
}
void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
- Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
+ Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
? Location::RequiresRegister()
: Location::Any();
locations->SetInAt(0, loc);
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
@@ -5078,6 +5182,9 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
object_array_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -5094,11 +5201,6 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
Location::kOutputOverlap :
Location::kNoOutputOverlap);
}
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
- if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
- locations->AddTemp(Location::RequiresRegister());
- }
}
void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
@@ -5113,56 +5215,47 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
switch (type) {
case Primitive::kPrimBoolean: {
Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- __ movzxb(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
- } else {
- __ movzxb(out, Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset));
- }
+ __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
break;
}
case Primitive::kPrimByte: {
Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- __ movsxb(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
- } else {
- __ movsxb(out, Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset));
- }
+ __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
break;
}
case Primitive::kPrimShort: {
Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- __ movsxw(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
- } else {
- __ movsxw(out, Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset));
- }
+ __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
break;
}
case Primitive::kPrimChar: {
Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- __ movzxw(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ // Branch cases into compressed and uncompressed for each index's type.
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ NearLabel done, not_compressed;
+ __ cmpl(Address(obj, count_offset), Immediate(0));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ j(kGreaterEqual, &not_compressed);
+ __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
+ __ jmp(&done);
+ __ Bind(&not_compressed);
+ __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
+ __ Bind(&done);
} else {
- __ movzxw(out, Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset));
+ // Common case for charAt of array of char or when string compression's
+ // feature is turned off.
+ __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
}
break;
}
case Primitive::kPrimInt: {
Register out = out_loc.AsRegister<Register>();
- if (index.IsConstant()) {
- __ movl(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
- } else {
- __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset));
- }
+ __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
break;
}
@@ -5173,28 +5266,22 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
} else {
Register out = out_loc.AsRegister<Register>();
+ __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
if (index.IsConstant()) {
uint32_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ movl(out, Address(obj, offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- // If read barriers are enabled, emit read barriers other than
- // Baker's using a slow path (and also unpoison the loaded
- // reference, if heap poisoning is enabled).
codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
} else {
- __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- // If read barriers are enabled, emit read barriers other than
- // Baker's using a slow path (and also unpoison the loaded
- // reference, if heap poisoning is enabled).
codegen_->MaybeGenerateReadBarrierSlow(
instruction, out_loc, out_loc, obj_loc, data_offset, index);
}
@@ -5204,40 +5291,23 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimLong: {
DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
- if (index.IsConstant()) {
- size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ movl(out_loc.AsRegisterPairLow<Register>(), Address(obj, offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
- } else {
- __ movl(out_loc.AsRegisterPairLow<Register>(),
- Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ movl(out_loc.AsRegisterPairHigh<Register>(),
- Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize));
- }
+ __ movl(out_loc.AsRegisterPairLow<Register>(),
+ CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ movl(out_loc.AsRegisterPairHigh<Register>(),
+ CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize));
break;
}
case Primitive::kPrimFloat: {
XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
- if (index.IsConstant()) {
- __ movss(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
- } else {
- __ movss(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset));
- }
+ __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
break;
}
case Primitive::kPrimDouble: {
XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
- if (index.IsConstant()) {
- __ movsd(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
- } else {
- __ movsd(out, Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset));
- }
+ __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
break;
}
@@ -5260,12 +5330,10 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
- bool object_array_set_with_read_barrier =
- kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+ may_need_runtime_call_for_type_check ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
@@ -5310,9 +5378,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
- : Address(array, index.AsRegister<Register>(), TIMES_1, offset);
+ Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
if (value.IsRegister()) {
__ movb(address, value.AsRegister<ByteRegister>());
} else {
@@ -5325,9 +5391,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimShort:
case Primitive::kPrimChar: {
uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
- : Address(array, index.AsRegister<Register>(), TIMES_2, offset);
+ Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
if (value.IsRegister()) {
__ movw(address, value.AsRegister<Register>());
} else {
@@ -5339,9 +5403,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimNot: {
uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
- : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+ Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
if (!value.IsRegister()) {
// Just setting null.
@@ -5356,9 +5418,13 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
DCHECK(needs_write_barrier);
Register register_value = value.AsRegister<Register>();
- NearLabel done, not_null, do_put;
+ // We cannot use a NearLabel for `done`, as its range may be too
+ // short when Baker read barriers are enabled.
+ Label done;
+ NearLabel not_null, do_put;
SlowPathCode* slow_path = nullptr;
- Register temp = locations->GetTemp(0).AsRegister<Register>();
+ Location temp_loc = locations->GetTemp(0);
+ Register temp = temp_loc.AsRegister<Register>();
if (may_need_runtime_call_for_type_check) {
slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction);
codegen_->AddSlowPath(slow_path);
@@ -5371,62 +5437,40 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
__ Bind(&not_null);
}
- if (kEmitCompilerReadBarrier) {
- // When read barriers are enabled, the type checking
- // instrumentation requires two read barriers:
- //
- // __ movl(temp2, temp);
- // // /* HeapReference<Class> */ temp = temp->component_type_
- // __ movl(temp, Address(temp, component_offset));
- // codegen_->GenerateReadBarrierSlow(
- // instruction, temp_loc, temp_loc, temp2_loc, component_offset);
- //
- // // /* HeapReference<Class> */ temp2 = register_value->klass_
- // __ movl(temp2, Address(register_value, class_offset));
- // codegen_->GenerateReadBarrierSlow(
- // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
- //
- // __ cmpl(temp, temp2);
- //
- // However, the second read barrier may trash `temp`, as it
- // is a temporary register, and as such would not be saved
- // along with live registers before calling the runtime (nor
- // restored afterwards). So in this case, we bail out and
- // delegate the work to the array set slow path.
- //
- // TODO: Extend the register allocator to support a new
- // "(locally) live temp" location so as to avoid always
- // going into the slow path when read barriers are enabled.
- __ jmp(slow_path->GetEntryLabel());
- } else {
- // /* HeapReference<Class> */ temp = array->klass_
- __ movl(temp, Address(array, class_offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Note that when Baker read barriers are enabled, the type
+ // checks are performed without read barriers. This is fine,
+ // even in the case where a class object is in the from-space
+ // after the flip, as a comparison involving such a type would
+ // not produce a false positive; it may of course produce a
+ // false negative, in which case we would take the ArraySet
+ // slow path.
+
+ // /* HeapReference<Class> */ temp = array->klass_
+ __ movl(temp, Address(array, class_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ MaybeUnpoisonHeapReference(temp);
+
+ // /* HeapReference<Class> */ temp = temp->component_type_
+ __ movl(temp, Address(temp, component_offset));
+ // If heap poisoning is enabled, no need to unpoison `temp`
+ // nor the object reference in `register_value->klass`, as
+ // we are comparing two poisoned references.
+ __ cmpl(temp, Address(register_value, class_offset));
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ __ j(kEqual, &do_put);
+ // If heap poisoning is enabled, the `temp` reference has
+ // not been unpoisoned yet; unpoison it now.
__ MaybeUnpoisonHeapReference(temp);
- // /* HeapReference<Class> */ temp = temp->component_type_
- __ movl(temp, Address(temp, component_offset));
- // If heap poisoning is enabled, no need to unpoison `temp`
- // nor the object reference in `register_value->klass`, as
- // we are comparing two poisoned references.
- __ cmpl(temp, Address(register_value, class_offset));
-
- if (instruction->StaticTypeOfArrayIsObjectArray()) {
- __ j(kEqual, &do_put);
- // If heap poisoning is enabled, the `temp` reference has
- // not been unpoisoned yet; unpoison it now.
- __ MaybeUnpoisonHeapReference(temp);
-
- // /* HeapReference<Class> */ temp = temp->super_class_
- __ movl(temp, Address(temp, super_offset));
- // If heap poisoning is enabled, no need to unpoison
- // `temp`, as we are comparing against null below.
- __ testl(temp, temp);
- __ j(kNotEqual, slow_path->GetEntryLabel());
- __ Bind(&do_put);
- } else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
- }
+ // If heap poisoning is enabled, no need to unpoison the
+ // heap reference loaded below, as it is only used for a
+ // comparison with null.
+ __ cmpl(Address(temp, super_offset), Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ j(kNotEqual, slow_path->GetEntryLabel());
}
}
@@ -5455,9 +5499,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimInt: {
uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
- : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+ Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
if (value.IsRegister()) {
__ movl(address, value.AsRegister<Register>());
} else {
@@ -5471,44 +5513,27 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimLong: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
- if (index.IsConstant()) {
- size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- if (value.IsRegisterPair()) {
- __ movl(Address(array, offset), value.AsRegisterPairLow<Register>());
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ movl(Address(array, offset + kX86WordSize), value.AsRegisterPairHigh<Register>());
- } else {
- DCHECK(value.IsConstant());
- int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
- __ movl(Address(array, offset), Immediate(Low32Bits(val)));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ movl(Address(array, offset + kX86WordSize), Immediate(High32Bits(val)));
- }
+ if (value.IsRegisterPair()) {
+ __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
+ value.AsRegisterPairLow<Register>());
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
+ value.AsRegisterPairHigh<Register>());
} else {
- if (value.IsRegisterPair()) {
- __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset),
- value.AsRegisterPairLow<Register>());
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
- value.AsRegisterPairHigh<Register>());
- } else {
- DCHECK(value.IsConstant());
- int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
- __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset),
- Immediate(Low32Bits(val)));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
- Immediate(High32Bits(val)));
- }
+ DCHECK(value.IsConstant());
+ int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
+ __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
+ Immediate(Low32Bits(val)));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
+ Immediate(High32Bits(val)));
}
break;
}
case Primitive::kPrimFloat: {
uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
- : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+ Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
if (value.IsFpuRegister()) {
__ movss(address, value.AsFpuRegister<XmmRegister>());
} else {
@@ -5522,17 +5547,13 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimDouble: {
uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
- : Address(array, index.AsRegister<Register>(), TIMES_8, offset);
+ Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
if (value.IsFpuRegister()) {
__ movsd(address, value.AsFpuRegister<XmmRegister>());
} else {
DCHECK(value.IsConstant());
- Address address_hi = index.IsConstant() ?
- Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
- offset + kX86WordSize) :
- Address(array, index.AsRegister<Register>(), TIMES_8, offset + kX86WordSize);
+ Address address_hi =
+ CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
__ movl(address, Immediate(Low32Bits(v)));
codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -5566,24 +5587,32 @@ void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
Register out = locations->Out().AsRegister<Register>();
__ movl(out, Address(obj, offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out most significant bit in case the array is String's array of char.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ andl(out, Immediate(INT32_MAX));
+ }
}
void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
HInstruction* length = instruction->InputAt(1);
if (!length->IsEmittedAtUseSite()) {
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
}
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
+ // Need register to see array's length.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
+ const bool is_string_compressed_char_at =
+ mirror::kUseStringCompression && instruction->IsStringCharAt();
LocationSummary* locations = instruction->GetLocations();
Location index_loc = locations->InAt(0);
Location length_loc = locations->InAt(1);
@@ -5618,21 +5647,25 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
Location array_loc = array_length->GetLocations()->InAt(0);
Address array_len(array_loc.AsRegister<Register>(), len_offset);
- if (index_loc.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
- __ cmpl(array_len, Immediate(value));
+ if (is_string_compressed_char_at) {
+ Register length_reg = locations->GetTemp(0).AsRegister<Register>();
+ __ movl(length_reg, array_len);
+ codegen_->MaybeRecordImplicitNullCheck(array_length);
+ __ andl(length_reg, Immediate(INT32_MAX));
+ codegen_->GenerateIntCompare(length_reg, index_loc);
} else {
- __ cmpl(array_len, index_loc.AsRegister<Register>());
+ // Checking bounds for general case:
+ // Array of char or string's array with feature compression off.
+ if (index_loc.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+ __ cmpl(array_len, Immediate(value));
+ } else {
+ __ cmpl(array_len, index_loc.AsRegister<Register>());
+ }
+ codegen_->MaybeRecordImplicitNullCheck(array_length);
}
- codegen_->MaybeRecordImplicitNullCheck(array_length);
} else {
- Register length = length_loc.AsRegister<Register>();
- if (index_loc.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
- __ cmpl(length, Immediate(value));
- } else {
- __ cmpl(length, index_loc.AsRegister<Register>());
- }
+ codegen_->GenerateIntCompare(length_loc, index_loc);
}
codegen_->AddSlowPath(slow_path);
__ j(kBelowEqual, slow_path->GetEntryLabel());
@@ -5648,7 +5681,9 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction)
}
void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5959,17 +5994,6 @@ void ParallelMoveResolverX86::RestoreScratch(int reg) {
HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
- if (kEmitCompilerReadBarrier) {
- switch (desired_class_load_kind) {
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageAddress:
- // TODO: Implement for read barrier.
- return HLoadClass::LoadKind::kDexCacheViaMethod;
- default:
- break;
- }
- }
switch (desired_class_load_kind) {
case HLoadClass::LoadKind::kReferrersClass:
break;
@@ -6011,10 +6035,15 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
return;
}
- LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
@@ -6029,10 +6058,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
LocationSummary* locations = cls->GetLocations();
if (cls->NeedsAccessCheck()) {
codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
- cls,
- cls->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
@@ -6041,6 +6067,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
Register out = out_loc.AsRegister<Register>();
bool generate_null_check = false;
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
switch (cls->GetLoadKind()) {
case HLoadClass::LoadKind::kReferrersClass: {
DCHECK(!cls->CanCallRuntime());
@@ -6048,24 +6075,28 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
// /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
Register current_method = locations->InAt(0).AsRegister<Register>();
GenerateGcRootFieldLoad(
- cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+ cls,
+ out_loc,
+ Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
+ /* fixup_label */ nullptr,
+ requires_read_barrier);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!requires_read_barrier);
__ movl(out, Immediate(/* placeholder */ 0));
codegen_->RecordTypePatch(cls);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!requires_read_barrier);
Register method_address = locations->InAt(0).AsRegister<Register>();
__ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
codegen_->RecordTypePatch(cls);
break;
}
case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!requires_read_barrier);
DCHECK_NE(cls->GetAddress(), 0u);
uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
__ movl(out, Immediate(address));
@@ -6076,7 +6107,11 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
DCHECK_NE(cls->GetAddress(), 0u);
uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
// /* GcRoot<mirror::Class> */ out = *address
- GenerateGcRootFieldLoad(cls, out_loc, Address::Absolute(address));
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ Address::Absolute(address),
+ /* fixup_label */ nullptr,
+ requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
}
@@ -6085,8 +6120,11 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
uint32_t offset = cls->GetDexCacheElementOffset();
Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset);
// /* GcRoot<mirror::Class> */ out = *(base + offset) /* PC-relative */
- GenerateGcRootFieldLoad(
- cls, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label);
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ Address(base_reg, CodeGeneratorX86::kDummy32BitOffset),
+ fixup_label,
+ requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
}
@@ -6097,8 +6135,11 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
__ movl(out, Address(current_method,
ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
// /* GcRoot<mirror::Class> */ out = out[type_index]
- GenerateGcRootFieldLoad(
- cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())),
+ /* fixup_label */ nullptr,
+ requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
}
@@ -6152,17 +6193,6 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
- if (kEmitCompilerReadBarrier) {
- switch (desired_string_load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageAddress:
- // TODO: Implement for read barrier.
- return HLoadString::LoadKind::kDexCacheViaMethod;
- default:
- break;
- }
- }
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
DCHECK(!GetCompilerOptions().GetCompilePic());
@@ -6170,7 +6200,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
DCHECK(GetCompilerOptions().GetCompilePic());
FALLTHROUGH_INTENDED;
- case HLoadString::LoadKind::kDexCachePcRelative:
+ case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT.
// We disable pc-relative load when there is an irreducible loop, as the optimization
// is incompatible with it.
@@ -6193,16 +6223,32 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
- ? LocationSummary::kCallOnSlowPath
+ ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod)
+ ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kCallOnSlowPath)
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
- if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
- load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
+ if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
+ load_kind == HLoadString::LoadKind::kBssEntry) {
locations->SetInAt(0, Location::RequiresRegister());
}
- locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+ locations->SetOut(Location::RegisterLocation(EAX));
+ } else {
+ locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and/or marking to save everything.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
+ }
}
void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
@@ -6212,68 +6258,46 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
switch (load->GetLoadKind()) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
__ movl(out, Immediate(/* placeholder */ 0));
- codegen_->RecordStringPatch(load);
+ codegen_->RecordBootStringPatch(load);
return; // No dex cache slow path.
}
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
- DCHECK(!kEmitCompilerReadBarrier);
Register method_address = locations->InAt(0).AsRegister<Register>();
__ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
- codegen_->RecordStringPatch(load);
+ codegen_->RecordBootStringPatch(load);
return; // No dex cache slow path.
}
case HLoadString::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
DCHECK_NE(load->GetAddress(), 0u);
uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
__ movl(out, Immediate(address));
codegen_->RecordSimplePatch();
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
- // /* GcRoot<mirror::String> */ out = *address
- GenerateGcRootFieldLoad(load, out_loc, Address::Absolute(address));
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- Register base_reg = locations->InAt(0).AsRegister<Register>();
- uint32_t offset = load->GetDexCacheElementOffset();
- Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
- // /* GcRoot<mirror::String> */ out = *(base + offset) /* PC-relative */
- GenerateGcRootFieldLoad(
- load, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- Register current_method = locations->InAt(0).AsRegister<Register>();
-
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
- break;
+ case HLoadString::LoadKind::kBssEntry: {
+ Register method_address = locations->InAt(0).AsRegister<Register>();
+ Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset);
+ Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
+ // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier);
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
+ codegen_->AddSlowPath(slow_path);
+ __ testl(out, out);
+ __ j(kEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
}
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
- codegen_->AddSlowPath(slow_path);
- __ testl(out, out);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
+ __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex()));
+ codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
}
static Address GetExceptionTlsAddress() {
@@ -6306,17 +6330,14 @@ void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
}
void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
return kEmitCompilerReadBarrier &&
- (kUseBakerReadBarrier ||
- type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ !kUseBakerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
type_check_kind == TypeCheckKind::kArrayObjectCheck);
}
@@ -6324,6 +6345,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -6331,6 +6353,7 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -6340,6 +6363,9 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
// Note that TypeCheckSlowPathX86 uses this "out" register too.
@@ -6377,7 +6403,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
// /* HeapReference<Class> */ out = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
@@ -6599,7 +6625,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
@@ -6635,8 +6661,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -6675,8 +6700,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -6708,8 +6732,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -6717,8 +6740,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
__ j(kEqual, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -6758,11 +6780,10 @@ void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction)
}
void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
- codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject)
- : QUICK_ENTRY_POINT(pUnlockObject),
+ codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
+ : kQuickUnlockObject,
instruction,
- instruction->GetDexPc(),
- nullptr);
+ instruction->GetDexPc());
if (instruction->IsEnter()) {
CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
} else {
@@ -6909,17 +6930,17 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction*
Location maybe_temp) {
Register out_reg = out.AsRegister<Register>();
if (kEmitCompilerReadBarrier) {
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
+ instruction, out, out_reg, offset, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
// in the following move operation, as we will need it for the
// read barrier below.
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
__ movl(maybe_temp.AsRegister<Register>(), out_reg);
// /* HeapReference<Object> */ out = *(out + offset)
__ movl(out_reg, Address(out_reg, offset));
@@ -6936,17 +6957,15 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction*
void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset,
- Location maybe_temp) {
+ uint32_t offset) {
Register out_reg = out.AsRegister<Register>();
Register obj_reg = obj.AsRegister<Register>();
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
+ instruction, out, obj_reg, offset, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -6964,9 +6983,11 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction
void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
const Address& address,
- Label* fixup_label) {
+ Label* fixup_label,
+ bool requires_read_barrier) {
Register root_reg = root.AsRegister<Register>();
- if (kEmitCompilerReadBarrier) {
+ if (requires_read_barrier) {
+ DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
@@ -6989,9 +7010,9 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path used to mark the GC root `root`.
- SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root);
+ // Slow path marking the GC root `root`.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
+ instruction, root, /* unpoison_ref_before_marking */ false);
codegen_->AddSlowPath(slow_path);
__ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()),
@@ -7025,14 +7046,13 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
Location ref,
Register obj,
uint32_t offset,
- Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
// /* HeapReference<Object> */ ref = *(obj + offset)
Address src(obj, offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
}
void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -7040,7 +7060,6 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
Register obj,
uint32_t data_offset,
Location index,
- Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -7050,18 +7069,17 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- Address src = index.IsConstant() ?
- Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
- Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+ Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
}
void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
const Address& src,
- Location temp,
- bool needs_null_check) {
+ bool needs_null_check,
+ bool always_update_field,
+ Register* temp) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -7090,23 +7108,23 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// performance reasons.
Register ref_reg = ref.AsRegister<Register>();
- Register temp_reg = temp.AsRegister<Register>();
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // /* int32_t */ monitor = obj->monitor_
- __ movl(temp_reg, Address(obj, monitor_offset));
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+ constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+ constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ // At this point, just do the "if" and make sure that flags are preserved until the branch.
+ __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
- // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
- __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
- __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
- static_assert(
- LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
- "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86 memory model.
@@ -7114,20 +7132,27 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// The actual reference load.
// /* HeapReference<Object> */ ref = *src
- __ movl(ref_reg, src);
+ __ movl(ref_reg, src); // Flags are unaffected.
+
+ // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
+ // Slow path marking the object `ref` when it is gray.
+ SlowPathCode* slow_path;
+ if (always_update_field) {
+ DCHECK(temp != nullptr);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
+ instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
+ instruction, ref, /* unpoison_ref_before_marking */ true);
+ }
+ AddSlowPath(slow_path);
+
+ // We have done the "if" of the gray bit check above, now branch based on the flags.
+ __ j(kNotZero, slow_path->GetEntryLabel());
// Object* ref = ref_addr->AsMirrorPtr()
__ MaybeUnpoisonHeapReference(ref_reg);
- // Slow path used to mark the object `ref` when it is gray.
- SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref);
- AddSlowPath(slow_path);
-
- // if (rb_state == ReadBarrier::gray_ptr_)
- // ref = ReadBarrier::Mark(ref);
- __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
- __ j(kEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -7534,6 +7559,31 @@ void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
}
}
+void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
+ Register lhs_reg = lhs.AsRegister<Register>();
+ GenerateIntCompare(lhs_reg, rhs);
+}
+
+void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
+ if (rhs.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+ Compare32BitValue(lhs, value);
+ } else if (rhs.IsStackSlot()) {
+ __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
+ } else {
+ __ cmpl(lhs, rhs.AsRegister<Register>());
+ }
+}
+
+Address CodeGeneratorX86::ArrayAddress(Register obj,
+ Location index,
+ ScaleFactor scale,
+ uint32_t data_offset) {
+ return index.IsConstant() ?
+ Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
+ Address(obj, index.AsRegister<Register>(), scale, data_offset);
+}
+
Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
Register reg,
Register value) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f306b33247..1b51999546 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -254,17 +254,17 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset,
- Location maybe_temp);
+ uint32_t offset);
// Generate a GC root reference load:
//
// root <- *address
//
- // while honoring read barriers (if any).
+ // while honoring read barriers if `requires_read_barrier` is true.
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
const Address& address,
- Label* fixup_label = nullptr);
+ Label* fixup_label,
+ bool requires_read_barrier);
// Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
// `is_wide` specifies whether it is long/double or not.
@@ -295,7 +295,6 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
HBasicBlock* default_block);
void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
- void GenerateIntCompare(Location lhs, Location rhs);
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
@@ -329,12 +328,7 @@ class CodeGeneratorX86 : public CodeGenerator {
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path) OVERRIDE;
-
- void InvokeRuntime(int32_t entry_point_offset,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path);
+ SlowPathCode* slow_path = nullptr) OVERRIDE;
// Generate code to invoke a runtime entry point, but do not record
// PC-related information in a stack map.
@@ -342,6 +336,8 @@ class CodeGeneratorX86 : public CodeGenerator {
HInstruction* instruction,
SlowPathCode* slow_path);
+ void GenerateInvokeRuntime(int32_t entry_point_offset);
+
size_t GetWordSize() const OVERRIDE {
return kX86WordSize;
}
@@ -376,9 +372,6 @@ class CodeGeneratorX86 : public CodeGenerator {
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
- // Blocks all register pairs made out of blocked core registers.
- void UpdateBlockedPairRegisters() const;
-
ParallelMoveResolverX86* GetMoveResolver() OVERRIDE {
return &move_resolver_;
}
@@ -406,7 +399,7 @@ class CodeGeneratorX86 : public CodeGenerator {
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method) OVERRIDE;
+ HInvokeStaticOrDirect* invoke) OVERRIDE;
// Generate a call to a static or direct method.
Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
@@ -415,8 +408,9 @@ class CodeGeneratorX86 : public CodeGenerator {
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
void RecordSimplePatch();
- void RecordStringPatch(HLoadString* load_string);
+ void RecordBootStringPatch(HLoadString* load_string);
void RecordTypePatch(HLoadClass* load_class);
+ Label* NewStringBssEntryPatch(HLoadString* load_string);
Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
@@ -476,6 +470,16 @@ class CodeGeneratorX86 : public CodeGenerator {
// Compare a register with a 32-bit value in the most efficient manner.
void Compare32BitValue(Register dest, int32_t value);
+ // Compare int values. Supports only register locations for `lhs`.
+ void GenerateIntCompare(Location lhs, Location rhs);
+ void GenerateIntCompare(Register lhs, Location rhs);
+
+ // Construct address for array access.
+ static Address ArrayAddress(Register obj,
+ Location index,
+ ScaleFactor scale,
+ uint32_t data_offset);
+
Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -486,7 +490,6 @@ class CodeGeneratorX86 : public CodeGenerator {
Location ref,
Register obj,
uint32_t offset,
- Location temp,
bool needs_null_check);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
@@ -495,16 +498,25 @@ class CodeGeneratorX86 : public CodeGenerator {
Register obj,
uint32_t data_offset,
Location index,
- Location temp,
bool needs_null_check);
- // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
- // and GenerateArrayLoadWithBakerReadBarrier.
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+ //
+ // Load the object reference located at address `src`, held by
+ // object `obj`, into `ref`, and mark it if needed. The base of
+ // address `src` must be `obj`.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`). This operation
+ // requires a temporary register, which must be provided as a
+ // non-null pointer (`temp`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
const Address& src,
- Location temp,
- bool needs_null_check);
+ bool needs_null_check,
+ bool always_update_field = false,
+ Register* temp = nullptr);
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
@@ -566,9 +578,9 @@ class CodeGeneratorX86 : public CodeGenerator {
}
}
- void GenerateNop();
- void GenerateImplicitNullCheck(HNullCheck* instruction);
- void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateNop() OVERRIDE;
+ void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// The correct value will be inserted when processing Assembler fixups.
@@ -577,15 +589,9 @@ class CodeGeneratorX86 : public CodeGenerator {
private:
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
- struct PcRelativeDexCacheAccessInfo {
- PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
- : target_dex_file(dex_file), element_offset(element_off), label() { }
-
- const DexFile& target_dex_file;
- uint32_t element_offset;
- // NOTE: Label is bound to the end of the instruction that has an embedded 32-bit offset.
- Label label;
- };
+ template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos,
+ ArenaVector<LinkerPatch>* linker_patches);
// Labels for each block that will be compiled.
Label* block_labels_; // Indexed by block id.
@@ -597,16 +603,16 @@ class CodeGeneratorX86 : public CodeGenerator {
const X86InstructionSetFeatures& isa_features_;
// Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back().
- ArenaDeque<MethodPatchInfo<Label>> method_patches_;
- ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
+ ArenaDeque<PatchInfo<Label>> method_patches_;
+ ArenaDeque<PatchInfo<Label>> relative_call_patches_;
// PC-relative DexCache access info.
- ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
+ ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_;
// Patch locations for patchoat where the linker doesn't do any other work.
ArenaDeque<Label> simple_patches_;
- // String patch locations.
- ArenaDeque<StringPatchInfo<Label>> string_patches_;
+ // String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC).
+ ArenaDeque<PatchInfo<Label>> string_patches_;
// Type patch locations.
- ArenaDeque<TypePatchInfo<Label>> type_patches_;
+ ArenaDeque<PatchInfo<Label>> type_patches_;
// Offset to the start of the constant area in the assembled code.
// Used for fixups to the constant area.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5d5fa8504a..fcabeeae5d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -51,8 +51,8 @@ static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15
static constexpr int kC2ConditionMask = 0x400;
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
class NullCheckSlowPathX86_64 : public SlowPathCode {
@@ -66,7 +66,7 @@ class NullCheckSlowPathX86_64 : public SlowPathCode {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
- x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
+ x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
instruction_,
instruction_->GetDexPc(),
this);
@@ -88,14 +88,7 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
- if (instruction_->CanThrowIntoCatchBlock()) {
- // Live registers will be restored in the catch block if caught.
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- }
- x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
@@ -149,13 +142,8 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -210,6 +198,9 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
}
__ movl(length_loc.AsRegister<CpuRegister>(), array_len);
+ if (mirror::kUseStringCompression) {
+ __ andl(length_loc.AsRegister<CpuRegister>(), Immediate(INT32_MAX));
+ }
}
// We're moving two locations to locations that could overlap, so we need a parallel
@@ -221,13 +212,10 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
length_loc,
Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
Primitive::kPrimInt);
- uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
- ? QUICK_ENTRY_POINT(pThrowStringBounds)
- : QUICK_ENTRY_POINT(pThrowArrayBounds);
- x86_64_codegen->InvokeRuntime(entry_point_offset,
- instruction_,
- instruction_->GetDexPc(),
- this);
+ QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
+ ? kQuickThrowStringBounds
+ : kQuickThrowArrayBounds;
+ x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
@@ -259,9 +247,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCode {
InvokeRuntimeCallingConvention calling_convention;
__ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
- x86_64_codegen->InvokeRuntime(do_clinit_ ?
- QUICK_ENTRY_POINT(pInitializeStaticStorage) :
- QUICK_ENTRY_POINT(pInitializeType),
+ x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType,
at_,
dex_pc_,
this);
@@ -313,16 +299,23 @@ class LoadStringSlowPathX86_64 : public SlowPathCode {
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- InvokeRuntimeCallingConvention calling_convention;
const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
- __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
- x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
+ // Custom calling convention: RAX serves as both input and output.
+ __ movl(CpuRegister(RAX), Immediate(string_index));
+ x86_64_codegen->InvokeRuntime(kQuickResolveString,
instruction_,
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
RestoreLiveRegisters(codegen, locations);
+
+ // Store the resolved String to the BSS entry.
+ __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false),
+ locations->Out().AsRegister<CpuRegister>());
+ Label* fixup_label = x86_64_codegen->NewStringBssEntryPatch(instruction_->AsLoadString());
+ __ Bind(fixup_label);
+
__ jmp(GetExitLabel());
}
@@ -364,18 +357,12 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode {
Primitive::kPrimNot);
if (instruction_->IsInstanceOf()) {
- x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
- instruction_,
- dex_pc,
- this);
+ x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
CheckEntrypointTypes<
kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
- x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
- instruction_,
- dex_pc,
- this);
+ x86_64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
}
@@ -407,11 +394,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
- x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -450,10 +433,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
- x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
@@ -465,11 +445,25 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
};
-// Slow path marking an object during a read barrier.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
public:
- ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj)
- : SlowPathCode(instruction), obj_(obj) {
+ ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
+ Location ref,
+ bool unpoison_ref_before_marking)
+ : SlowPathCode(instruction),
+ ref_(ref),
+ unpoison_ref_before_marking_(unpoison_ref_before_marking) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -477,54 +471,236 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
- Register reg = obj_.AsRegister<Register>();
+ CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
+ Register ref_reg = ref_cpu_reg.AsRegister();
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
instruction_->IsLoadClass() ||
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
+ if (unpoison_ref_before_marking_) {
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_cpu_reg);
+ }
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
- DCHECK_NE(reg, RSP);
- DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+ DCHECK_NE(ref_reg, RSP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
// "Compact" slow path, saving two moves.
//
// Instead of using the standard runtime calling convention (input
// and output in R0):
//
- // RDI <- obj
+ // RDI <- ref
// RAX <- ReadBarrierMark(RDI)
- // obj <- RAX
+ // ref <- RAX
//
- // we just use rX (the register holding `obj`) as input and output
+ // we just use rX (the register containing `ref`) as input and output
// of a dedicated entrypoint:
//
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(reg);
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
// This runtime call does not require a stack map.
x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ jmp(GetExitLabel());
}
private:
- const Location obj_;
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // Should the reference in `ref_` be unpoisoned prior to marking it?
+ const bool unpoison_ref_before_marking_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
};
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathX86_64 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
+ Location ref,
+ CpuRegister obj,
+ const Address& field_addr,
+ bool unpoison_ref_before_marking,
+ CpuRegister temp1,
+ CpuRegister temp2)
+ : SlowPathCode(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_addr_(field_addr),
+ unpoison_ref_before_marking_(unpoison_ref_before_marking),
+ temp1_(temp1),
+ temp2_(temp2) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
+ Register ref_reg = ref_cpu_reg.AsRegister();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+
+ __ Bind(GetEntryLabel());
+ if (unpoison_ref_before_marking_) {
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_cpu_reg);
+ }
+
+ // Save the old (unpoisoned) reference.
+ __ movl(temp1_, ref_cpu_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ DCHECK_NE(ref_reg, RSP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // RDI <- ref
+ // RAX <- ReadBarrierMark(RDI)
+ // ref <- RAX
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
+ // This runtime call does not require a stack map.
+ x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*field_addr`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
+ // operation below would abort the CAS, leaving the field as-is.
+ NearLabel done;
+ __ cmpl(temp1_, ref_cpu_reg);
+ __ j(kEqual, &done);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achived
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+ // This operation is implemented with a 32-bit LOCK CMPXLCHG
+ // instruction, which requires the expected value (the old
+ // reference) to be in EAX. Save RAX beforehand, and move the
+ // expected value (stored in `temp1_`) into EAX.
+ __ movq(temp2_, CpuRegister(RAX));
+ __ movl(CpuRegister(RAX), temp1_);
+
+ // Convenience aliases.
+ CpuRegister base = obj_;
+ CpuRegister expected = CpuRegister(RAX);
+ CpuRegister value = ref_cpu_reg;
+
+ bool base_equals_value = (base.AsRegister() == value.AsRegister());
+ Register value_reg = ref_reg;
+ if (kPoisonHeapReferences) {
+ if (base_equals_value) {
+ // If `base` and `value` are the same register location, move
+ // `value_reg` to a temporary register. This way, poisoning
+ // `value_reg` won't invalidate `base`.
+ value_reg = temp1_.AsRegister();
+ __ movl(CpuRegister(value_reg), base);
+ }
+
+ // Check that the register allocator did not assign the location
+ // of `expected` (RAX) to `value` nor to `base`, so that heap
+ // poisoning (when enabled) works as intended below.
+ // - If `value` were equal to `expected`, both references would
+ // be poisoned twice, meaning they would not be poisoned at
+ // all, as heap poisoning uses address negation.
+ // - If `base` were equal to `expected`, poisoning `expected`
+ // would invalidate `base`.
+ DCHECK_NE(value_reg, expected.AsRegister());
+ DCHECK_NE(base.AsRegister(), expected.AsRegister());
+
+ __ PoisonHeapReference(expected);
+ __ PoisonHeapReference(CpuRegister(value_reg));
+ }
+
+ __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
+
+ // If heap poisoning is enabled, we need to unpoison the values
+ // that were poisoned earlier.
+ if (kPoisonHeapReferences) {
+ if (base_equals_value) {
+ // `value_reg` has been moved to a temporary register, no need
+ // to unpoison it.
+ } else {
+ __ UnpoisonHeapReference(CpuRegister(value_reg));
+ }
+ // No need to unpoison `expected` (RAX), as it is be overwritten below.
+ }
+
+ // Restore RAX.
+ __ movq(CpuRegister(RAX), temp2_);
+
+ __ Bind(&done);
+ __ jmp(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const CpuRegister obj_;
+ // The address of the marked reference field. The base of this address must be `obj_`.
+ const Address field_addr_;
+
+ // Should the reference in `ref_` be unpoisoned prior to marking it?
+ const bool unpoison_ref_before_marking_;
+
+ const CpuRegister temp1_;
+ const CpuRegister temp2_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
public:
@@ -661,7 +837,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
__ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
}
- x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+ x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
instruction_,
instruction_->GetDexPc(),
this);
@@ -729,7 +905,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
- x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+ x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
instruction_,
instruction_->GetDexPc(),
this);
@@ -750,8 +926,8 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
};
#undef __
-// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
-#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
inline Condition X86_64IntegerCondition(IfCondition cond) {
switch (cond) {
@@ -787,7 +963,7 @@ inline Condition X86_64FPCondition(IfCondition cond) {
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method ATTRIBUTE_UNUSED) {
+ HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
switch (desired_dispatch_info.code_ptr_location) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
@@ -808,11 +984,13 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat
// All registers are assumed to be correctly set up.
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
- case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+ case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
// temp = thread->string_init_entrypoint
- __ gs()->movq(temp.AsRegister<CpuRegister>(),
- Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
+ uint32_t offset =
+ GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
+ __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true));
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
@@ -821,7 +999,8 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
__ movl(temp.AsRegister<CpuRegister>(), Immediate(0)); // Placeholder.
- method_patches_.emplace_back(invoke->GetTargetMethod());
+ method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+ invoke->GetTargetMethod().dex_method_index);
__ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn.
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
@@ -829,7 +1008,7 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat
Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
// Bind a new fixup label at the end of the "movl" insn.
uint32_t offset = invoke->GetDexCacheArrayOffset();
- __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
+ __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFile(), offset));
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
@@ -868,7 +1047,8 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
__ call(&frame_entry_label_);
break;
case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
- relative_call_patches_.emplace_back(invoke->GetTargetMethod());
+ relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+ invoke->GetTargetMethod().dex_method_index);
Label* label = &relative_call_patches_.back().label;
__ call(label); // Bind to the patch label, override at link time.
__ Bind(label); // Bind the label at the end of the "call" insn.
@@ -928,7 +1108,8 @@ void CodeGeneratorX86_64::RecordSimplePatch() {
}
}
-void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) {
+void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
+ DCHECK(GetCompilerOptions().IsBootImage());
string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
__ Bind(&string_patches_.back().label);
}
@@ -938,6 +1119,12 @@ void CodeGeneratorX86_64::RecordTypePatch(HLoadClass* load_class) {
__ Bind(&type_patches_.back().label);
}
+Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
+ DCHECK(!GetCompilerOptions().IsBootImage());
+ string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+ return &string_patches_.back().label;
+}
+
Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
uint32_t element_offset) {
// Add a patch entry and return the label.
@@ -945,6 +1132,21 @@ Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_f
return &pc_relative_dex_cache_patches_.back().label;
}
+// The label points to the end of the "movl" or another instruction but the literal offset
+// for method patch needs to point to the embedded constant which occupies the last 4 bytes.
+constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
+
+template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
+ const ArenaDeque<PatchInfo<Label>>& infos,
+ ArenaVector<LinkerPatch>* linker_patches) {
+ for (const PatchInfo<Label>& info : infos) {
+ uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+ linker_patches->push_back(
+ Factory(literal_offset, &info.dex_file, info.label.Position(), info.index));
+ }
+}
+
void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -955,48 +1157,29 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat
string_patches_.size() +
type_patches_.size();
linker_patches->reserve(size);
- // The label points to the end of the "movl" insn but the literal offset for method
- // patch needs to point to the embedded constant which occupies the last 4 bytes.
- constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
- for (const MethodPatchInfo<Label>& info : method_patches_) {
+ for (const PatchInfo<Label>& info : method_patches_) {
uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
- info.target_method.dex_file,
- info.target_method.dex_method_index));
+ linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, &info.dex_file, info.index));
}
- for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
+ for (const PatchInfo<Label>& info : relative_call_patches_) {
uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
- info.target_method.dex_file,
- info.target_method.dex_method_index));
- }
- for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
- uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
- &info.target_dex_file,
- info.label.Position(),
- info.element_offset));
+ linker_patches->push_back(
+ LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index));
}
+ EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
+ linker_patches);
for (const Label& label : simple_patches_) {
uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
}
- for (const StringPatchInfo<Label>& info : string_patches_) {
+ if (!GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
+ } else {
// These are always PC-relative, see GetSupportedLoadStringKind().
- uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
- &info.dex_file,
- info.label.Position(),
- info.string_index));
- }
- for (const TypePatchInfo<Label>& info : type_patches_) {
- // These are always PC-relative, see GetSupportedLoadClassKind().
- uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset,
- &info.dex_file,
- info.label.Position(),
- info.type_index));
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
}
+ // These are always PC-relative, see GetSupportedLoadClassKind().
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(type_patches_, linker_patches);
}
void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -1031,25 +1214,21 @@ void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
- InvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value(),
- instruction,
- dex_pc,
- slow_path);
-}
-
-void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path) {
- ValidateInvokeRuntime(instruction, slow_path);
- __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
- RecordPcInfo(instruction, dex_pc, slow_path);
+ ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+ GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
}
void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
HInstruction* instruction,
SlowPathCode* slow_path) {
ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ GenerateInvokeRuntime(entry_point_offset);
+}
+
+void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
__ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
}
@@ -1150,8 +1329,13 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
}
}
- __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
- CpuRegister(kMethodRegisterArgument));
+ // Save the current method if we need it. Note that we do not
+ // do this in HCurrentMethod, as the instruction might have been removed
+ // in the SSA graph.
+ if (RequiresCurrentMethod()) {
+ __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
+ CpuRegister(kMethodRegisterArgument));
+ }
}
void CodeGeneratorX86_64::GenerateFrameExit() {
@@ -1258,13 +1442,8 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
source.AsFpuRegister<XmmRegister>());
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- int64_t value;
- if (constant->IsDoubleConstant()) {
- value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
- } else {
- DCHECK(constant->IsLongConstant());
- value = constant->AsLongConstant()->GetValue();
- }
+ DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
+ int64_t value = GetInt64ValueOf(constant);
Store64BitValueToStack(destination, value);
} else {
DCHECK(source.IsDoubleStackSlot());
@@ -1363,31 +1542,11 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition)
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- CpuRegister left_reg = left.AsRegister<CpuRegister>();
- if (right.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
- if (value == 0) {
- __ testl(left_reg, left_reg);
- } else {
- __ cmpl(left_reg, Immediate(value));
- }
- } else if (right.IsStackSlot()) {
- __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
- } else {
- __ cmpl(left_reg, right.AsRegister<CpuRegister>());
- }
+ codegen_->GenerateIntCompare(left, right);
break;
}
case Primitive::kPrimLong: {
- CpuRegister left_reg = left.AsRegister<CpuRegister>();
- if (right.IsConstant()) {
- int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
- codegen_->Compare64BitValue(left_reg, value);
- } else if (right.IsDoubleStackSlot()) {
- __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
- } else {
- __ cmpq(left_reg, right.AsRegister<CpuRegister>());
- }
+ codegen_->GenerateLongCompare(left, right);
break;
}
case Primitive::kPrimFloat: {
@@ -1542,15 +1701,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc
Location lhs = condition->GetLocations()->InAt(0);
Location rhs = condition->GetLocations()->InAt(1);
- if (rhs.IsRegister()) {
- __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
- } else if (rhs.IsConstant()) {
- int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
- } else {
- __ cmpl(lhs.AsRegister<CpuRegister>(),
- Address(CpuRegister(RSP), rhs.GetStackIndex()));
- }
+ codegen_->GenerateIntCompare(lhs, rhs);
if (true_target == nullptr) {
__ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
} else {
@@ -1585,6 +1736,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
@@ -1749,28 +1901,14 @@ void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
// Clear output register: setcc only sets the low byte.
__ xorl(reg, reg);
- if (rhs.IsRegister()) {
- __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
- } else if (rhs.IsConstant()) {
- int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
- } else {
- __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
- }
+ codegen_->GenerateIntCompare(lhs, rhs);
__ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
return;
case Primitive::kPrimLong:
// Clear output register: setcc only sets the low byte.
__ xorl(reg, reg);
- if (rhs.IsRegister()) {
- __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
- } else if (rhs.IsConstant()) {
- int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
- codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
- } else {
- __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
- }
+ codegen_->GenerateLongCompare(lhs, rhs);
__ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
return;
case Primitive::kPrimFloat: {
@@ -1938,27 +2076,11 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
case Primitive::kPrimShort:
case Primitive::kPrimChar:
case Primitive::kPrimInt: {
- CpuRegister left_reg = left.AsRegister<CpuRegister>();
- if (right.IsConstant()) {
- int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
- codegen_->Compare32BitValue(left_reg, value);
- } else if (right.IsStackSlot()) {
- __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
- } else {
- __ cmpl(left_reg, right.AsRegister<CpuRegister>());
- }
+ codegen_->GenerateIntCompare(left, right);
break;
}
case Primitive::kPrimLong: {
- CpuRegister left_reg = left.AsRegister<CpuRegister>();
- if (right.IsConstant()) {
- int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
- codegen_->Compare64BitValue(left_reg, value);
- } else if (right.IsDoubleStackSlot()) {
- __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
- } else {
- __ cmpq(left_reg, right.AsRegister<CpuRegister>());
- }
+ codegen_->GenerateLongCompare(left, right);
break;
}
case Primitive::kPrimFloat: {
@@ -3734,14 +3856,8 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
}
void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
locations->SetInAt(0, Location::Any());
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
@@ -3767,7 +3883,7 @@ void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instructio
} else {
DCHECK(value.IsConstant()) << value;
if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
- __ jmp(slow_path->GetEntryLabel());
+ __ jmp(slow_path->GetEntryLabel());
}
}
break;
@@ -3782,7 +3898,7 @@ void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instructio
} else {
DCHECK(value.IsConstant()) << value;
if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
- __ jmp(slow_path->GetEntryLabel());
+ __ jmp(slow_path->GetEntryLabel());
}
}
break;
@@ -3967,10 +4083,7 @@ void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction)
__ call(Address(temp, code_offset.SizeValue()));
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
} else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
DCHECK(!codegen_->IsLeafMethod());
}
@@ -3992,10 +4105,7 @@ void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
instruction->GetTypeIndex());
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
- codegen_->InvokeRuntime(instruction->GetEntrypoint(),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
DCHECK(!codegen_->IsLeafMethod());
@@ -4142,6 +4252,9 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
object_field_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
@@ -4153,11 +4266,6 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
Location::RequiresRegister(),
object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
- if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
- }
}
void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
@@ -4201,11 +4309,10 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
case Primitive::kPrimNot: {
// /* HeapReference<Object> */ out = *(base + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp_loc = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
- // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+ // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ instruction, out, base, offset, /* needs_null_check */ true);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -4521,17 +4628,11 @@ void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
}
void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
- Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
+ Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
? Location::RequiresRegister()
: Location::Any();
locations->SetInAt(0, loc);
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
@@ -4577,6 +4678,9 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
object_array_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -4589,11 +4693,6 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
- if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
- locations->AddTemp(Location::RequiresRegister());
- }
}
void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
@@ -4608,56 +4707,45 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
switch (type) {
case Primitive::kPrimBoolean: {
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- if (index.IsConstant()) {
- __ movzxb(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
- } else {
- __ movzxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
- }
+ __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
break;
}
case Primitive::kPrimByte: {
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- if (index.IsConstant()) {
- __ movsxb(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
- } else {
- __ movsxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
- }
+ __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
break;
}
case Primitive::kPrimShort: {
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- if (index.IsConstant()) {
- __ movsxw(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
- } else {
- __ movsxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
- }
+ __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
break;
}
case Primitive::kPrimChar: {
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- if (index.IsConstant()) {
- __ movzxw(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ // Branch cases into compressed and uncompressed for each index's type.
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ NearLabel done, not_compressed;
+ __ cmpl(Address(obj, count_offset), Immediate(0));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ j(kGreaterEqual, &not_compressed);
+ __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
+ __ jmp(&done);
+ __ Bind(&not_compressed);
+ __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
+ __ Bind(&done);
} else {
- __ movzxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
+ __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
}
break;
}
case Primitive::kPrimInt: {
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- if (index.IsConstant()) {
- __ movl(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
- } else {
- __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
- }
+ __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
break;
}
@@ -4668,28 +4756,22 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
- // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+ // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
} else {
CpuRegister out = out_loc.AsRegister<CpuRegister>();
+ __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
if (index.IsConstant()) {
uint32_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ movl(out, Address(obj, offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- // If read barriers are enabled, emit read barriers other than
- // Baker's using a slow path (and also unpoison the loaded
- // reference, if heap poisoning is enabled).
codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
} else {
- __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- // If read barriers are enabled, emit read barriers other than
- // Baker's using a slow path (and also unpoison the loaded
- // reference, if heap poisoning is enabled).
codegen_->MaybeGenerateReadBarrierSlow(
instruction, out_loc, out_loc, obj_loc, data_offset, index);
}
@@ -4699,34 +4781,19 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimLong: {
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- if (index.IsConstant()) {
- __ movq(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
- } else {
- __ movq(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
- }
+ __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
break;
}
case Primitive::kPrimFloat: {
XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
- if (index.IsConstant()) {
- __ movss(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
- } else {
- __ movss(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
- }
+ __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
break;
}
case Primitive::kPrimDouble: {
XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
- if (index.IsConstant()) {
- __ movsd(out, Address(obj,
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
- } else {
- __ movsd(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
- }
+ __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
break;
}
@@ -4749,12 +4816,10 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
- bool object_array_set_with_read_barrier =
- kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+ may_need_runtime_call_for_type_check ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
@@ -4768,10 +4833,7 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
if (needs_write_barrier) {
// Temporary registers for the write barrier.
-
- // This first temporary register is possibly used for heap
- // reference poisoning and/or read barrier emission too.
- locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -4794,9 +4856,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
- : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset);
+ Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
if (value.IsRegister()) {
__ movb(address, value.AsRegister<CpuRegister>());
} else {
@@ -4809,9 +4869,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimShort:
case Primitive::kPrimChar: {
uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
- : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset);
+ Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
if (value.IsRegister()) {
__ movw(address, value.AsRegister<CpuRegister>());
} else {
@@ -4824,9 +4882,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimNot: {
uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
- : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+ Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
if (!value.IsRegister()) {
// Just setting null.
@@ -4841,9 +4897,13 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
DCHECK(needs_write_barrier);
CpuRegister register_value = value.AsRegister<CpuRegister>();
- NearLabel done, not_null, do_put;
+ // We cannot use a NearLabel for `done`, as its range may be too
+ // short when Baker read barriers are enabled.
+ Label done;
+ NearLabel not_null, do_put;
SlowPathCode* slow_path = nullptr;
- CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ Location temp_loc = locations->GetTemp(0);
+ CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
if (may_need_runtime_call_for_type_check) {
slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
codegen_->AddSlowPath(slow_path);
@@ -4856,62 +4916,40 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
__ Bind(&not_null);
}
- if (kEmitCompilerReadBarrier) {
- // When read barriers are enabled, the type checking
- // instrumentation requires two read barriers:
- //
- // __ movl(temp2, temp);
- // // /* HeapReference<Class> */ temp = temp->component_type_
- // __ movl(temp, Address(temp, component_offset));
- // codegen_->GenerateReadBarrierSlow(
- // instruction, temp_loc, temp_loc, temp2_loc, component_offset);
- //
- // // /* HeapReference<Class> */ temp2 = register_value->klass_
- // __ movl(temp2, Address(register_value, class_offset));
- // codegen_->GenerateReadBarrierSlow(
- // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
- //
- // __ cmpl(temp, temp2);
- //
- // However, the second read barrier may trash `temp`, as it
- // is a temporary register, and as such would not be saved
- // along with live registers before calling the runtime (nor
- // restored afterwards). So in this case, we bail out and
- // delegate the work to the array set slow path.
- //
- // TODO: Extend the register allocator to support a new
- // "(locally) live temp" location so as to avoid always
- // going into the slow path when read barriers are enabled.
- __ jmp(slow_path->GetEntryLabel());
- } else {
- // /* HeapReference<Class> */ temp = array->klass_
- __ movl(temp, Address(array, class_offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Note that when Baker read barriers are enabled, the type
+ // checks are performed without read barriers. This is fine,
+ // even in the case where a class object is in the from-space
+ // after the flip, as a comparison involving such a type would
+ // not produce a false positive; it may of course produce a
+ // false negative, in which case we would take the ArraySet
+ // slow path.
+
+ // /* HeapReference<Class> */ temp = array->klass_
+ __ movl(temp, Address(array, class_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ MaybeUnpoisonHeapReference(temp);
+
+ // /* HeapReference<Class> */ temp = temp->component_type_
+ __ movl(temp, Address(temp, component_offset));
+ // If heap poisoning is enabled, no need to unpoison `temp`
+ // nor the object reference in `register_value->klass`, as
+ // we are comparing two poisoned references.
+ __ cmpl(temp, Address(register_value, class_offset));
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ __ j(kEqual, &do_put);
+ // If heap poisoning is enabled, the `temp` reference has
+ // not been unpoisoned yet; unpoison it now.
__ MaybeUnpoisonHeapReference(temp);
- // /* HeapReference<Class> */ temp = temp->component_type_
- __ movl(temp, Address(temp, component_offset));
- // If heap poisoning is enabled, no need to unpoison `temp`
- // nor the object reference in `register_value->klass`, as
- // we are comparing two poisoned references.
- __ cmpl(temp, Address(register_value, class_offset));
-
- if (instruction->StaticTypeOfArrayIsObjectArray()) {
- __ j(kEqual, &do_put);
- // If heap poisoning is enabled, the `temp` reference has
- // not been unpoisoned yet; unpoison it now.
- __ MaybeUnpoisonHeapReference(temp);
-
- // /* HeapReference<Class> */ temp = temp->super_class_
- __ movl(temp, Address(temp, super_offset));
- // If heap poisoning is enabled, no need to unpoison
- // `temp`, as we are comparing against null below.
- __ testl(temp, temp);
- __ j(kNotEqual, slow_path->GetEntryLabel());
- __ Bind(&do_put);
- } else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
- }
+ // If heap poisoning is enabled, no need to unpoison the
+ // heap reference loaded below, as it is only used for a
+ // comparison with null.
+ __ cmpl(Address(temp, super_offset), Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ j(kNotEqual, slow_path->GetEntryLabel());
}
}
@@ -4940,9 +4978,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimInt: {
uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
- : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+ Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
if (value.IsRegister()) {
__ movl(address, value.AsRegister<CpuRegister>());
} else {
@@ -4956,18 +4992,14 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimLong: {
uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
- : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
+ Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
if (value.IsRegister()) {
__ movq(address, value.AsRegister<CpuRegister>());
codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
- Address address_high = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
- offset + sizeof(int32_t))
- : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
+ Address address_high =
+ CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
}
break;
@@ -4975,15 +5007,12 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimFloat: {
uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
- : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+ Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
if (value.IsFpuRegister()) {
__ movss(address, value.AsFpuRegister<XmmRegister>());
} else {
DCHECK(value.IsConstant());
- int32_t v =
- bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+ int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
__ movl(address, Immediate(v));
}
codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -4992,19 +5021,15 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimDouble: {
uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
- Address address = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
- : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
+ Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
if (value.IsFpuRegister()) {
__ movsd(address, value.AsFpuRegister<XmmRegister>());
codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
int64_t v =
bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
- Address address_high = index.IsConstant()
- ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
- offset + sizeof(int32_t))
- : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
+ Address address_high =
+ CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
}
break;
@@ -5036,21 +5061,23 @@ void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction)
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
__ movl(out, Address(obj, offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out most significant bit in case the array is String's array of char.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ andl(out, Immediate(INT32_MAX));
+ }
}
void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
- LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
HInstruction* length = instruction->InputAt(1);
if (!length->IsEmittedAtUseSite()) {
locations->SetInAt(1, Location::RegisterOrConstant(length));
}
- if (instruction->HasUses()) {
- locations->SetOut(Location::SameAsFirstInput());
- }
}
void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -5087,21 +5114,25 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction)
uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
Location array_loc = array_length->GetLocations()->InAt(0);
Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
- if (index_loc.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
- __ cmpl(array_len, Immediate(value));
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ CpuRegister length_reg = CpuRegister(TMP);
+ __ movl(length_reg, array_len);
+ codegen_->MaybeRecordImplicitNullCheck(array_length);
+ __ andl(length_reg, Immediate(INT32_MAX));
+ codegen_->GenerateIntCompare(length_reg, index_loc);
} else {
- __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
+ // Checking the bound for general case:
+ // Array of char or String's array when the compression feature off.
+ if (index_loc.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+ __ cmpl(array_len, Immediate(value));
+ } else {
+ __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
+ }
+ codegen_->MaybeRecordImplicitNullCheck(array_length);
}
- codegen_->MaybeRecordImplicitNullCheck(array_length);
} else {
- CpuRegister length = length_loc.AsRegister<CpuRegister>();
- if (index_loc.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
- __ cmpl(length, Immediate(value));
- } else {
- __ cmpl(length, index_loc.AsRegister<CpuRegister>());
- }
+ codegen_->GenerateIntCompare(length_loc, index_loc);
}
codegen_->AddSlowPath(slow_path);
__ j(kBelowEqual, slow_path->GetEntryLabel());
@@ -5137,7 +5168,9 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio
}
void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5397,17 +5430,6 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
- if (kEmitCompilerReadBarrier) {
- switch (desired_class_load_kind) {
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageAddress:
- // TODO: Implement for read barrier.
- return HLoadClass::LoadKind::kDexCacheViaMethod;
- default:
- break;
- }
- }
switch (desired_class_load_kind) {
case HLoadClass::LoadKind::kReferrersClass:
break;
@@ -5443,10 +5465,15 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
return;
}
- LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
@@ -5459,10 +5486,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
LocationSummary* locations = cls->GetLocations();
if (cls->NeedsAccessCheck()) {
codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
- cls,
- cls->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
@@ -5470,6 +5494,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
Location out_loc = locations->Out();
CpuRegister out = out_loc.AsRegister<CpuRegister>();
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
bool generate_null_check = false;
switch (cls->GetLoadKind()) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -5478,16 +5503,20 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
// /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
GenerateGcRootFieldLoad(
- cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+ cls,
+ out_loc,
+ Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
+ /* fixup_label */ nullptr,
+ requires_read_barrier);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!requires_read_barrier);
__ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
codegen_->RecordTypePatch(cls);
break;
case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!requires_read_barrier);
DCHECK_NE(cls->GetAddress(), 0u);
uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
__ movl(out, Immediate(address)); // Zero-extended.
@@ -5499,11 +5528,19 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
// /* GcRoot<mirror::Class> */ out = *address
if (IsUint<32>(cls->GetAddress())) {
Address address = Address::Absolute(cls->GetAddress(), /* no_rip */ true);
- GenerateGcRootFieldLoad(cls, out_loc, address);
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ address,
+ /* fixup_label */ nullptr,
+ requires_read_barrier);
} else {
// TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
__ movq(out, Immediate(cls->GetAddress()));
- GenerateGcRootFieldLoad(cls, out_loc, Address(out, 0));
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ Address(out, 0),
+ /* fixup_label */ nullptr,
+ requires_read_barrier);
}
generate_null_check = !cls->IsInDexCache();
break;
@@ -5514,7 +5551,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
/* no_rip */ false);
// /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label);
+ GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
}
@@ -5527,7 +5564,11 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
// /* GcRoot<mirror::Class> */ out = out[type_index]
GenerateGcRootFieldLoad(
- cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+ cls,
+ out_loc,
+ Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())),
+ /* fixup_label */ nullptr,
+ requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
}
@@ -5573,17 +5614,6 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
- if (kEmitCompilerReadBarrier) {
- switch (desired_string_load_kind) {
- case HLoadString::LoadKind::kBootImageLinkTimeAddress:
- case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageAddress:
- // TODO: Implement for read barrier.
- return HLoadString::LoadKind::kDexCacheViaMethod;
- default:
- break;
- }
- }
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
DCHECK(!GetCompilerOptions().GetCompilePic());
@@ -5597,7 +5627,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
case HLoadString::LoadKind::kDexCacheAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kDexCachePcRelative:
+ case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
case HLoadString::LoadKind::kDexCacheViaMethod:
@@ -5607,14 +5637,28 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
}
void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
- LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
- ? LocationSummary::kCallOnSlowPath
+ LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+ ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod)
+ ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kCallOnSlowPath)
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
- locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RegisterLocation(RAX));
+ } else {
+ locations->SetOut(Location::RequiresRegister());
+ if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and/or marking to save everything.
+ // Custom calling convention: RAX serves as both input and output.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(RAX));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
- locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
@@ -5624,66 +5668,41 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
switch (load->GetLoadKind()) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
- DCHECK(!kEmitCompilerReadBarrier);
__ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
- codegen_->RecordStringPatch(load);
+ codegen_->RecordBootStringPatch(load);
return; // No dex cache slow path.
}
case HLoadString::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
DCHECK_NE(load->GetAddress(), 0u);
uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
__ movl(out, Immediate(address)); // Zero-extended.
codegen_->RecordSimplePatch();
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- // /* GcRoot<mirror::String> */ out = *address
- if (IsUint<32>(load->GetAddress())) {
- Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
- GenerateGcRootFieldLoad(load, out_loc, address);
- } else {
- // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
- __ movq(out, Immediate(load->GetAddress()));
- GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
- }
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- uint32_t offset = load->GetDexCacheElementOffset();
- Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
+ case HLoadString::LoadKind::kBssEntry: {
Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
/* no_rip */ false);
- // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
-
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
- break;
+ Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
+ // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier);
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ testl(out, out);
+ __ j(kEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
}
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
- codegen_->AddSlowPath(slow_path);
- __ testl(out, out);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ // Custom calling convention: RAX serves as both input and output.
+ __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex()));
+ codegen_->InvokeRuntime(kQuickResolveString,
+ load,
+ load->GetDexPc());
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
}
static Address GetExceptionTlsAddress() {
@@ -5717,17 +5736,14 @@ void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
return kEmitCompilerReadBarrier &&
- (kUseBakerReadBarrier ||
- type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ !kUseBakerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
type_check_kind == TypeCheckKind::kArrayObjectCheck);
}
@@ -5735,6 +5751,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -5742,6 +5759,7 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -5751,6 +5769,9 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
// Note that TypeCheckSlowPathX86_64 uses this "out" register too.
@@ -5788,7 +5809,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
// /* HeapReference<Class> */ out = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
@@ -6018,8 +6039,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
if (cls.IsRegister()) {
__ cmpl(temp, cls.AsRegister<CpuRegister>());
@@ -6043,8 +6063,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
@@ -6064,8 +6083,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -6089,8 +6107,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
// Walk over the class hierarchy to find a match.
NearLabel loop;
@@ -6116,8 +6133,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&done);
break;
@@ -6136,8 +6152,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
// Do an exact check.
NearLabel check_non_primitive_component_type;
@@ -6165,8 +6180,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -6174,8 +6188,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
__ j(kEqual, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&done);
break;
@@ -6191,8 +6204,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
// We always go into the type check slow path for the unresolved
// and interface check cases.
@@ -6227,11 +6239,9 @@ void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instructio
}
void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
- codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject)
- : QUICK_ENTRY_POINT(pUnlockObject),
+ codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
instruction,
- instruction->GetDexPc(),
- nullptr);
+ instruction->GetDexPc());
if (instruction->IsEnter()) {
CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
} else {
@@ -6360,17 +6370,17 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi
Location maybe_temp) {
CpuRegister out_reg = out.AsRegister<CpuRegister>();
if (kEmitCompilerReadBarrier) {
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
+ instruction, out, out_reg, offset, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
// in the following move operation, as we will need it for the
// read barrier below.
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
__ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
// /* HeapReference<Object> */ out = *(out + offset)
__ movl(out_reg, Address(out_reg, offset));
@@ -6387,17 +6397,15 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi
void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset,
- Location maybe_temp) {
+ uint32_t offset) {
CpuRegister out_reg = out.AsRegister<CpuRegister>();
CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
+ instruction, out, obj_reg, offset, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -6415,9 +6423,11 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruct
void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
const Address& address,
- Label* fixup_label) {
+ Label* fixup_label,
+ bool requires_read_barrier) {
CpuRegister root_reg = root.AsRegister<CpuRegister>();
- if (kEmitCompilerReadBarrier) {
+ if (requires_read_barrier) {
+ DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
@@ -6440,9 +6450,9 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path used to mark the GC root `root`.
- SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root);
+ // Slow path marking the GC root `root`.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
+ instruction, root, /* unpoison_ref_before_marking */ false);
codegen_->AddSlowPath(slow_path);
__ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(),
@@ -6477,14 +6487,13 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in
Location ref,
CpuRegister obj,
uint32_t offset,
- Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
// /* HeapReference<Object> */ ref = *(obj + offset)
Address src(obj, offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
}
void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6492,7 +6501,6 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
CpuRegister obj,
uint32_t data_offset,
Location index,
- Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -6502,18 +6510,18 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- Address src = index.IsConstant() ?
- Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
- Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+ Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
}
void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
CpuRegister obj,
const Address& src,
- Location temp,
- bool needs_null_check) {
+ bool needs_null_check,
+ bool always_update_field,
+ CpuRegister* temp1,
+ CpuRegister* temp2) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -6542,23 +6550,23 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// performance reasons.
CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
- CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // /* int32_t */ monitor = obj->monitor_
- __ movl(temp_reg, Address(obj, monitor_offset));
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+ constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+ constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ // At this point, just do the "if" and make sure that flags are preserved until the branch.
+ __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
- // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
- __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
- __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
- static_assert(
- LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
- "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86-64 memory model.
@@ -6566,20 +6574,28 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// The actual reference load.
// /* HeapReference<Object> */ ref = *src
- __ movl(ref_reg, src);
+ __ movl(ref_reg, src); // Flags are unaffected.
+
+ // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
+ // Slow path marking the object `ref` when it is gray.
+ SlowPathCode* slow_path;
+ if (always_update_field) {
+ DCHECK(temp1 != nullptr);
+ DCHECK(temp2 != nullptr);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
+ instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
+ instruction, ref, /* unpoison_ref_before_marking */ true);
+ }
+ AddSlowPath(slow_path);
+
+ // We have done the "if" of the gray bit check above, now branch based on the flags.
+ __ j(kNotZero, slow_path->GetEntryLabel());
// Object* ref = ref_addr->AsMirrorPtr()
__ MaybeUnpoisonHeapReference(ref_reg);
- // Slow path used to mark the object `ref` when it is gray.
- SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref);
- AddSlowPath(slow_path);
-
- // if (rb_state == ReadBarrier::gray_ptr_)
- // ref = ReadBarrier::Mark(ref);
- __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
- __ j(kEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -6810,6 +6826,43 @@ void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
}
}
+void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
+ CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
+ GenerateIntCompare(lhs_reg, rhs);
+}
+
+void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
+ if (rhs.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+ Compare32BitValue(lhs, value);
+ } else if (rhs.IsStackSlot()) {
+ __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
+ } else {
+ __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
+ }
+}
+
+void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
+ CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
+ if (rhs.IsConstant()) {
+ int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
+ Compare64BitValue(lhs_reg, value);
+ } else if (rhs.IsDoubleStackSlot()) {
+ __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
+ } else {
+ __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
+ }
+}
+
+Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
+ Location index,
+ ScaleFactor scale,
+ uint32_t data_offset) {
+ return index.IsConstant() ?
+ Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
+ Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
+}
+
void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
DCHECK(dest.IsDoubleStackSlot());
if (IsInt<32>(value)) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 4e0e34ce38..8b19dad0d0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -248,17 +248,17 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset,
- Location maybe_temp);
+ uint32_t offset);
// Generate a GC root reference load:
//
// root <- *address
//
- // while honoring read barriers (if any).
+ // while honoring read barriers if `requires_read_barrier` is true.
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
const Address& address,
- Label* fixup_label = nullptr);
+ Label* fixup_label,
+ bool requires_read_barrier);
void PushOntoFPStack(Location source, uint32_t temp_offset,
uint32_t stack_adjustment, bool is_float);
@@ -310,12 +310,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path) OVERRIDE;
-
- void InvokeRuntime(int32_t entry_point_offset,
- HInstruction* instruction,
- uint32_t dex_pc,
- SlowPathCode* slow_path);
+ SlowPathCode* slow_path = nullptr) OVERRIDE;
// Generate code to invoke a runtime entry point, but do not record
// PC-related information in a stack map.
@@ -323,6 +318,8 @@ class CodeGeneratorX86_64 : public CodeGenerator {
HInstruction* instruction,
SlowPathCode* slow_path);
+ void GenerateInvokeRuntime(int32_t entry_point_offset);
+
size_t GetWordSize() const OVERRIDE {
return kX86_64WordSize;
}
@@ -402,15 +399,16 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- MethodReference target_method) OVERRIDE;
+ HInvokeStaticOrDirect* invoke) OVERRIDE;
Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
void RecordSimplePatch();
- void RecordStringPatch(HLoadString* load_string);
+ void RecordBootStringPatch(HLoadString* load_string);
void RecordTypePatch(HLoadClass* load_class);
+ Label* NewStringBssEntryPatch(HLoadString* load_string);
Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
@@ -427,7 +425,6 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Location ref,
CpuRegister obj,
uint32_t offset,
- Location temp,
bool needs_null_check);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
@@ -436,16 +433,26 @@ class CodeGeneratorX86_64 : public CodeGenerator {
CpuRegister obj,
uint32_t data_offset,
Location index,
- Location temp,
bool needs_null_check);
- // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
- // and GenerateArrayLoadWithBakerReadBarrier.
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+ //
+ // Load the object reference located at address `src`, held by
+ // object `obj`, into `ref`, and mark it if needed. The base of
+ // address `src` must be `obj`.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`). This operation
+ // requires two temporary registers, which must be provided as
+ // non-null pointers (`temp1` and `temp2`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
CpuRegister obj,
const Address& src,
- Location temp,
- bool needs_null_check);
+ bool needs_null_check,
+ bool always_update_field = false,
+ CpuRegister* temp1 = nullptr,
+ CpuRegister* temp2 = nullptr);
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
@@ -516,6 +523,19 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void Compare32BitValue(CpuRegister dest, int32_t value);
void Compare64BitValue(CpuRegister dest, int64_t value);
+ // Compare int values. Supports register locations for `lhs`.
+ void GenerateIntCompare(Location lhs, Location rhs);
+ void GenerateIntCompare(CpuRegister lhs, Location rhs);
+
+ // Compare long values. Supports only register locations for `lhs`.
+ void GenerateLongCompare(Location lhs, Location rhs);
+
+ // Construct address for array access.
+ static Address ArrayAddress(CpuRegister obj,
+ Location index,
+ ScaleFactor scale,
+ uint32_t data_offset);
+
Address LiteralCaseTable(HPackedSwitch* switch_instr);
// Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
@@ -539,23 +559,18 @@ class CodeGeneratorX86_64 : public CodeGenerator {
}
}
- void GenerateNop();
- void GenerateImplicitNullCheck(HNullCheck* instruction);
- void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateNop() OVERRIDE;
+ void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// We will fix this up in the linker later to have the right value.
static constexpr int32_t kDummy32BitOffset = 256;
private:
- struct PcRelativeDexCacheAccessInfo {
- PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
- : target_dex_file(dex_file), element_offset(element_off), label() { }
-
- const DexFile& target_dex_file;
- uint32_t element_offset;
- Label label;
- };
+ template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos,
+ ArenaVector<LinkerPatch>* linker_patches);
// Labels for each block that will be compiled.
Label* block_labels_; // Indexed by block id.
@@ -571,16 +586,16 @@ class CodeGeneratorX86_64 : public CodeGenerator {
int constant_area_start_;
// Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back().
- ArenaDeque<MethodPatchInfo<Label>> method_patches_;
- ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
+ ArenaDeque<PatchInfo<Label>> method_patches_;
+ ArenaDeque<PatchInfo<Label>> relative_call_patches_;
// PC-relative DexCache access info.
- ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
+ ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_;
// Patch locations for patchoat where the linker doesn't do any other work.
ArenaDeque<Label> simple_patches_;
- // String patch locations.
- ArenaDeque<StringPatchInfo<Label>> string_patches_;
+ // String patch locations; type depends on configuration (app .bss or boot image PIC).
+ ArenaDeque<PatchInfo<Label>> string_patches_;
// Type patch locations.
- ArenaDeque<TypePatchInfo<Label>> type_patches_;
+ ArenaDeque<PatchInfo<Label>> type_patches_;
// Fixups for jump tables need to be handled specially.
ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index fe9a7af250..9ec32df578 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -29,12 +29,6 @@
#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "base/macros.h"
#include "builder.h"
-#include "code_generator_arm.h"
-#include "code_generator_arm64.h"
-#include "code_generator_mips.h"
-#include "code_generator_mips64.h"
-#include "code_generator_x86.h"
-#include "code_generator_x86_64.h"
#include "code_simulator_container.h"
#include "common_compiler_test.h"
#include "dex_file.h"
@@ -47,15 +41,60 @@
#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
#include "utils.h"
+#include "utils/arm/assembler_arm_vixl.h"
#include "utils/arm/managed_register_arm.h"
#include "utils/mips/managed_register_mips.h"
#include "utils/mips64/managed_register_mips64.h"
#include "utils/x86/managed_register_x86.h"
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "code_generator_arm.h"
+#include "code_generator_arm_vixl.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "code_generator_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "code_generator_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "code_generator_x86_64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "code_generator_mips.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "code_generator_mips64.h"
+#endif
+
#include "gtest/gtest.h"
namespace art {
+typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&);
+
+class CodegenTargetConfig {
+ public:
+ CodegenTargetConfig(InstructionSet isa, CreateCodegenFn create_codegen)
+ : isa_(isa), create_codegen_(create_codegen) {
+ }
+ InstructionSet GetInstructionSet() const { return isa_; }
+ CodeGenerator* CreateCodeGenerator(HGraph* graph, const CompilerOptions& compiler_options) {
+ return create_codegen_(graph, compiler_options);
+ }
+
+ private:
+ CodegenTargetConfig() {}
+ InstructionSet isa_;
+ CreateCodegenFn create_codegen_;
+};
+
+#ifdef ART_ENABLE_CODEGEN_arm
// Provide our own codegen, that ensures the C calling conventions
// are preserved. Currently, ART and C do not match as R4 is caller-save
// in ART, and callee-save in C. Alternatively, we could use or write
@@ -76,11 +115,31 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
blocked_core_registers_[arm::R4] = true;
blocked_core_registers_[arm::R6] = false;
blocked_core_registers_[arm::R7] = false;
- // Makes pair R6-R7 available.
- blocked_register_pairs_[arm::R6_R7] = false;
}
};
+// A way to test the VIXL32-based code generator on ARM. This will replace
+// TestCodeGeneratorARM when the VIXL32-based backend replaces the existing one.
+class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
+ public:
+ TestCodeGeneratorARMVIXL(HGraph* graph,
+ const ArmInstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
+ : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) {
+ AddAllocatedRegister(Location::RegisterLocation(arm::R6));
+ AddAllocatedRegister(Location::RegisterLocation(arm::R7));
+ }
+
+ void SetupBlockedRegisters() const OVERRIDE {
+ arm::CodeGeneratorARMVIXL::SetupBlockedRegisters();
+ blocked_core_registers_[arm::R4] = true;
+ blocked_core_registers_[arm::R6] = false;
+ blocked_core_registers_[arm::R7] = false;
+ }
+};
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
public:
TestCodeGeneratorX86(HGraph* graph,
@@ -95,16 +154,12 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
x86::CodeGeneratorX86::SetupBlockedRegisters();
// ebx is a callee-save register in C, but caller-save for ART.
blocked_core_registers_[x86::EBX] = true;
- blocked_register_pairs_[x86::EAX_EBX] = true;
- blocked_register_pairs_[x86::EDX_EBX] = true;
- blocked_register_pairs_[x86::ECX_EBX] = true;
- blocked_register_pairs_[x86::EBX_EDI] = true;
// Make edi available.
blocked_core_registers_[x86::EDI] = false;
- blocked_register_pairs_[x86::ECX_EDI] = false;
}
};
+#endif
class InternalCodeAllocator : public CodeAllocator {
public:
@@ -200,12 +255,7 @@ static void Run(const InternalCodeAllocator& allocator,
VerifyGeneratedCode(target_isa, f, has_result, expected);
}
-template <typename Expected>
-static void RunCode(CodeGenerator* codegen,
- HGraph* graph,
- std::function<void(HGraph*)> hook_before_codegen,
- bool has_result,
- Expected expected) {
+static void ValidateGraph(HGraph* graph) {
GraphChecker graph_checker(graph);
graph_checker.Run();
if (!graph_checker.IsValid()) {
@@ -214,75 +264,137 @@ static void RunCode(CodeGenerator* codegen,
}
}
ASSERT_TRUE(graph_checker.IsValid());
+}
+template <typename Expected>
+static void RunCodeNoCheck(CodeGenerator* codegen,
+ HGraph* graph,
+ std::function<void(HGraph*)> hook_before_codegen,
+ bool has_result,
+ Expected expected) {
SsaLivenessAnalysis liveness(graph, codegen);
-
PrepareForRegisterAllocation(graph).Run();
liveness.Analyze();
RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
hook_before_codegen(graph);
-
InternalCodeAllocator allocator;
codegen->Compile(&allocator);
Run(allocator, *codegen, has_result, expected);
}
template <typename Expected>
-static void RunCode(InstructionSet target_isa,
+static void RunCode(CodeGenerator* codegen,
+ HGraph* graph,
+ std::function<void(HGraph*)> hook_before_codegen,
+ bool has_result,
+ Expected expected) {
+ ValidateGraph(graph);
+ RunCodeNoCheck(codegen, graph, hook_before_codegen, has_result, expected);
+}
+
+template <typename Expected>
+static void RunCode(CodegenTargetConfig target_config,
HGraph* graph,
std::function<void(HGraph*)> hook_before_codegen,
bool has_result,
Expected expected) {
CompilerOptions compiler_options;
- if (target_isa == kArm || target_isa == kThumb2) {
- std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
- ArmInstructionSetFeatures::FromCppDefines());
- TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
- RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kArm64) {
- std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
- Arm64InstructionSetFeatures::FromCppDefines());
- arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
- RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kX86) {
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
- RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kX86_64) {
- std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
- X86_64InstructionSetFeatures::FromCppDefines());
- x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
- RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kMips) {
- std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
- MipsInstructionSetFeatures::FromCppDefines());
- mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
- RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kMips64) {
- std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
- Mips64InstructionSetFeatures::FromCppDefines());
- mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
- RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
- }
+ CodeGenerator* codegen = target_config.CreateCodeGenerator(graph, compiler_options);
+ RunCode(codegen, graph, hook_before_codegen, has_result, expected);
}
-static ::std::vector<InstructionSet> GetTargetISAs() {
- ::std::vector<InstructionSet> v;
- // Add all ISAs that are executable on hardware or on simulator.
- const ::std::vector<InstructionSet> executable_isa_candidates = {
- kArm,
- kArm64,
- kThumb2,
- kX86,
- kX86_64,
- kMips,
- kMips64
+#ifdef ART_ENABLE_CODEGEN_arm
+CodeGenerator* create_codegen_arm(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
+ ArmInstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena()) TestCodeGeneratorARM(graph,
+ *features_arm.get(),
+ compiler_options);
+}
+
+CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
+ ArmInstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
+ Arm64InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph,
+ *features_arm64.get(),
+ compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena()) TestCodeGeneratorX86(graph, *features_x86.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+ X86_64InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
+ MipsInstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
+ Mips64InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options);
+}
+#endif
+
+// Return all combinations of ISA and code generator that are executable on
+// hardware, or on simulator, and that we'd like to test.
+static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
+ ::std::vector<CodegenTargetConfig> v;
+ ::std::vector<CodegenTargetConfig> test_config_candidates = {
+#ifdef ART_ENABLE_CODEGEN_arm
+ CodegenTargetConfig(kArm, create_codegen_arm),
+ CodegenTargetConfig(kThumb2, create_codegen_arm),
+ CodegenTargetConfig(kArm, create_codegen_arm_vixl32),
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+ CodegenTargetConfig(kArm64, create_codegen_arm64),
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+ CodegenTargetConfig(kX86, create_codegen_x86),
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+ CodegenTargetConfig(kX86_64, create_codegen_x86_64),
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+ CodegenTargetConfig(kMips, create_codegen_mips),
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+ CodegenTargetConfig(kMips64, create_codegen_mips64)
+#endif
};
- for (auto target_isa : executable_isa_candidates) {
- if (CanExecute(target_isa)) {
- v.push_back(target_isa);
+ for (auto test_config : test_config_candidates) {
+ if (CanExecute(test_config.GetInstructionSet())) {
+ v.push_back(test_config);
}
}
@@ -292,26 +404,26 @@ static ::std::vector<InstructionSet> GetTargetISAs() {
static void TestCode(const uint16_t* data,
bool has_result = false,
int32_t expected = 0) {
- for (InstructionSet target_isa : GetTargetISAs()) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
ArenaPool pool;
ArenaAllocator arena(&pool);
HGraph* graph = CreateCFG(&arena, data);
// Remove suspend checks, they cannot be executed in this context.
RemoveSuspendChecks(graph);
- RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+ RunCode(target_config, graph, [](HGraph*) {}, has_result, expected);
}
}
static void TestCodeLong(const uint16_t* data,
bool has_result,
int64_t expected) {
- for (InstructionSet target_isa : GetTargetISAs()) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
ArenaPool pool;
ArenaAllocator arena(&pool);
HGraph* graph = CreateCFG(&arena, data, Primitive::kPrimLong);
// Remove suspend checks, they cannot be executed in this context.
RemoveSuspendChecks(graph);
- RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+ RunCode(target_config, graph, [](HGraph*) {}, has_result, expected);
}
}
@@ -628,7 +740,7 @@ TEST_F(CodegenTest, ReturnMulIntLit16) {
}
TEST_F(CodegenTest, NonMaterializedCondition) {
- for (InstructionSet target_isa : GetTargetISAs()) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
@@ -676,12 +788,12 @@ TEST_F(CodegenTest, NonMaterializedCondition) {
block->InsertInstructionBefore(move, block->GetLastInstruction());
};
- RunCode(target_isa, graph, hook_before_codegen, true, 0);
+ RunCode(target_config, graph, hook_before_codegen, true, 0);
}
}
TEST_F(CodegenTest, MaterializedCondition1) {
- for (InstructionSet target_isa : GetTargetISAs()) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
// Check that condition are materialized correctly. A materialized condition
// should yield `1` if it evaluated to true, and `0` otherwise.
// We force the materialization of comparisons for different combinations of
@@ -723,13 +835,13 @@ TEST_F(CodegenTest, MaterializedCondition1) {
HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
block->InsertInstructionBefore(move, block->GetLastInstruction());
};
- RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+ RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
}
}
}
TEST_F(CodegenTest, MaterializedCondition2) {
- for (InstructionSet target_isa : GetTargetISAs()) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
// Check that HIf correctly interprets a materialized condition.
// We force the materialization of comparisons for different combinations of
// inputs. An HIf takes the materialized combination as input and returns a
@@ -791,7 +903,7 @@ TEST_F(CodegenTest, MaterializedCondition2) {
HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
block->InsertInstructionBefore(move, block->GetLastInstruction());
};
- RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+ RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
}
}
}
@@ -820,7 +932,7 @@ static void TestComparison(IfCondition condition,
int64_t i,
int64_t j,
Primitive::Type type,
- const InstructionSet target_isa) {
+ const CodegenTargetConfig target_config) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = CreateGraph(&allocator);
@@ -902,54 +1014,94 @@ static void TestComparison(IfCondition condition,
block->AddInstruction(new (&allocator) HReturn(comparison));
graph->BuildDominatorTree();
- RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result);
+ RunCode(target_config, graph, [](HGraph*) {}, true, expected_result);
}
TEST_F(CodegenTest, ComparisonsInt) {
- for (InstructionSet target_isa : GetTargetISAs()) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
for (int64_t i = -1; i <= 1; i++) {
for (int64_t j = -1; j <= 1; j++) {
- TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondB, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondA, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+ for (int cond = kCondFirst; cond <= kCondLast; cond++) {
+ TestComparison(static_cast<IfCondition>(cond), i, j, Primitive::kPrimInt, target_config);
+ }
}
}
}
}
TEST_F(CodegenTest, ComparisonsLong) {
- // TODO: make MIPS work for long
- if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
- return;
- }
-
- for (InstructionSet target_isa : GetTargetISAs()) {
- if (target_isa == kMips || target_isa == kMips64) {
- continue;
- }
-
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
for (int64_t i = -1; i <= 1; i++) {
for (int64_t j = -1; j <= 1; j++) {
- TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondB, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondA, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+ for (int cond = kCondFirst; cond <= kCondLast; cond++) {
+ TestComparison(static_cast<IfCondition>(cond), i, j, Primitive::kPrimLong, target_config);
+ }
}
}
}
}
+#ifdef ART_ENABLE_CODEGEN_mips
+TEST_F(CodegenTest, MipsClobberRA) {
+ std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
+ MipsInstructionSetFeatures::FromCppDefines());
+ if (!CanExecute(kMips) || features_mips->IsR6()) {
+ // HMipsComputeBaseMethodAddress and the NAL instruction behind it
+ // should only be generated on non-R6.
+ return;
+ }
+
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = CreateGraph(&allocator);
+
+ HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry_block);
+ graph->SetEntryBlock(entry_block);
+ entry_block->AddInstruction(new (&allocator) HGoto());
+
+ HBasicBlock* block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block);
+
+ HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(exit_block);
+ graph->SetExitBlock(exit_block);
+ exit_block->AddInstruction(new (&allocator) HExit());
+
+ entry_block->AddSuccessor(block);
+ block->AddSuccessor(exit_block);
+
+ // To simplify matters, don't create PC-relative HLoadClass or HLoadString.
+ // Instead, generate HMipsComputeBaseMethodAddress directly.
+ HMipsComputeBaseMethodAddress* base = new (&allocator) HMipsComputeBaseMethodAddress();
+ block->AddInstruction(base);
+ // HMipsComputeBaseMethodAddress is defined as int, so just make the
+ // compiled method return it.
+ block->AddInstruction(new (&allocator) HReturn(base));
+
+ graph->BuildDominatorTree();
+
+ mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), CompilerOptions());
+ // Since there isn't HLoadClass or HLoadString, we need to manually indicate
+ // that RA is clobbered and the method entry code should generate a stack frame
+ // and preserve RA in it. And this is what we're testing here.
+ codegenMIPS.ClobberRA();
+ // Without ClobberRA() the code would be:
+ // nal # Sets RA to point to the jr instruction below
+ // move v0, ra # and the CPU falls into an infinite loop.
+ // jr ra
+ // nop
+ // The expected code is:
+ // addiu sp, sp, -16
+ // sw ra, 12(sp)
+ // sw a0, 0(sp)
+ // nal # Sets RA to point to the lw instruction below.
+ // move v0, ra
+ // lw ra, 12(sp)
+ // jr ra
+ // addiu sp, sp, 16
+ RunCode(&codegenMIPS, graph, [](HGraph*) {}, false, 0);
+}
+#endif
+
} // namespace art
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
new file mode 100644
index 0000000000..5d92bfd9cc
--- /dev/null
+++ b/compiler/optimizing/common_arm.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
+#define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch32/macro-assembler-aarch32.h"
+#pragma GCC diagnostic pop
+
+namespace art {
+namespace arm {
+namespace helpers {
+
+static_assert(vixl::aarch32::kSpCode == SP, "vixl::aarch32::kSpCode must equal ART's SP");
+
+inline dwarf::Reg DWARFReg(vixl::aarch32::Register reg) {
+ return dwarf::Reg::ArmCore(static_cast<int>(reg.GetCode()));
+}
+
+inline dwarf::Reg DWARFReg(vixl::aarch32::SRegister reg) {
+ return dwarf::Reg::ArmFp(static_cast<int>(reg.GetCode()));
+}
+
+inline vixl::aarch32::DRegister FromLowSToD(vixl::aarch32::SRegister reg) {
+ DCHECK_EQ(reg.GetCode() % 2, 0u) << reg;
+ return vixl::aarch32::DRegister(reg.GetCode() / 2);
+}
+
+inline vixl::aarch32::Register HighRegisterFrom(Location location) {
+ DCHECK(location.IsRegisterPair()) << location;
+ return vixl::aarch32::Register(location.AsRegisterPairHigh<vixl32::Register>());
+}
+
+inline vixl::aarch32::DRegister HighDRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegisterPair()) << location;
+ return vixl::aarch32::DRegister(location.AsFpuRegisterPairHigh<vixl32::DRegister>());
+}
+
+inline vixl::aarch32::Register LowRegisterFrom(Location location) {
+ DCHECK(location.IsRegisterPair()) << location;
+ return vixl::aarch32::Register(location.AsRegisterPairLow<vixl32::Register>());
+}
+
+inline vixl::aarch32::SRegister LowSRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegisterPair()) << location;
+ return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl32::SRegister>());
+}
+
+inline vixl::aarch32::Register RegisterFrom(Location location) {
+ DCHECK(location.IsRegister()) << location;
+ return vixl::aarch32::Register(location.reg());
+}
+
+inline vixl::aarch32::Register RegisterFrom(Location location, Primitive::Type type) {
+ DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type;
+ return RegisterFrom(location);
+}
+
+inline vixl::aarch32::DRegister DRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegisterPair()) << location;
+ int reg_code = location.low();
+ DCHECK_EQ(reg_code % 2, 0) << reg_code;
+ return vixl::aarch32::DRegister(reg_code / 2);
+}
+
+inline vixl::aarch32::SRegister SRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegister()) << location;
+ return vixl::aarch32::SRegister(location.reg());
+}
+
+inline vixl::aarch32::SRegister OutputSRegister(HInstruction* instr) {
+ Primitive::Type type = instr->GetType();
+ DCHECK_EQ(type, Primitive::kPrimFloat) << type;
+ return SRegisterFrom(instr->GetLocations()->Out());
+}
+
+inline vixl::aarch32::DRegister OutputDRegister(HInstruction* instr) {
+ Primitive::Type type = instr->GetType();
+ DCHECK_EQ(type, Primitive::kPrimDouble) << type;
+ return DRegisterFrom(instr->GetLocations()->Out());
+}
+
+inline vixl::aarch32::VRegister OutputVRegister(HInstruction* instr) {
+ Primitive::Type type = instr->GetType();
+ if (type == Primitive::kPrimFloat) {
+ return OutputSRegister(instr);
+ } else {
+ return OutputDRegister(instr);
+ }
+}
+
+inline vixl::aarch32::SRegister InputSRegisterAt(HInstruction* instr, int input_index) {
+ Primitive::Type type = instr->InputAt(input_index)->GetType();
+ DCHECK_EQ(type, Primitive::kPrimFloat) << type;
+ return SRegisterFrom(instr->GetLocations()->InAt(input_index));
+}
+
+inline vixl::aarch32::DRegister InputDRegisterAt(HInstruction* instr, int input_index) {
+ Primitive::Type type = instr->InputAt(input_index)->GetType();
+ DCHECK_EQ(type, Primitive::kPrimDouble) << type;
+ return DRegisterFrom(instr->GetLocations()->InAt(input_index));
+}
+
+inline vixl::aarch32::VRegister InputVRegisterAt(HInstruction* instr, int input_index) {
+ Primitive::Type type = instr->InputAt(input_index)->GetType();
+ if (type == Primitive::kPrimFloat) {
+ return InputSRegisterAt(instr, input_index);
+ } else {
+ return InputDRegisterAt(instr, input_index);
+ }
+}
+
+inline vixl::aarch32::Register OutputRegister(HInstruction* instr) {
+ return RegisterFrom(instr->GetLocations()->Out(), instr->GetType());
+}
+
+inline vixl::aarch32::Register InputRegisterAt(HInstruction* instr, int input_index) {
+ return RegisterFrom(instr->GetLocations()->InAt(input_index),
+ instr->InputAt(input_index)->GetType());
+}
+
+inline int64_t Int64ConstantFrom(Location location) {
+ HConstant* instr = location.GetConstant();
+ if (instr->IsIntConstant()) {
+ return instr->AsIntConstant()->GetValue();
+ } else if (instr->IsNullConstant()) {
+ return 0;
+ } else {
+ DCHECK(instr->IsLongConstant()) << instr->DebugName();
+ return instr->AsLongConstant()->GetValue();
+ }
+}
+
+inline vixl::aarch32::Operand OperandFrom(Location location, Primitive::Type type) {
+ if (location.IsRegister()) {
+ return vixl::aarch32::Operand(RegisterFrom(location, type));
+ } else {
+ return vixl::aarch32::Operand(Int64ConstantFrom(location));
+ }
+}
+
+inline vixl::aarch32::Operand InputOperandAt(HInstruction* instr, int input_index) {
+ return OperandFrom(instr->GetLocations()->InAt(input_index),
+ instr->InputAt(input_index)->GetType());
+}
+
+inline Location LocationFrom(const vixl::aarch32::Register& reg) {
+ return Location::RegisterLocation(reg.GetCode());
+}
+
+inline Location LocationFrom(const vixl::aarch32::SRegister& reg) {
+ return Location::FpuRegisterLocation(reg.GetCode());
+}
+
+inline Location LocationFrom(const vixl::aarch32::Register& low,
+ const vixl::aarch32::Register& high) {
+ return Location::RegisterPairLocation(low.GetCode(), high.GetCode());
+}
+
+inline Location LocationFrom(const vixl::aarch32::SRegister& low,
+ const vixl::aarch32::SRegister& high) {
+ return Location::FpuRegisterPairLocation(low.GetCode(), high.GetCode());
+}
+
+} // namespace helpers
+} // namespace arm
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index af0ee4e197..776a483d43 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -22,8 +22,13 @@
#include "nodes.h"
#include "utils/arm64/assembler_arm64.h"
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/simulator-aarch64.h"
+#pragma GCC diagnostic pop
namespace art {
namespace arm64 {
@@ -33,7 +38,7 @@ namespace helpers {
static_assert((SP == 31) && (WSP == 31) && (XZR == 32) && (WZR == 32),
"Unexpected values for register codes.");
-static inline int VIXLRegCodeFromART(int code) {
+inline int VIXLRegCodeFromART(int code) {
if (code == SP) {
return vixl::aarch64::kSPRegInternalCode;
}
@@ -43,7 +48,7 @@ static inline int VIXLRegCodeFromART(int code) {
return code;
}
-static inline int ARTRegCodeFromVIXL(int code) {
+inline int ARTRegCodeFromVIXL(int code) {
if (code == vixl::aarch64::kSPRegInternalCode) {
return SP;
}
@@ -53,73 +58,85 @@ static inline int ARTRegCodeFromVIXL(int code) {
return code;
}
-static inline vixl::aarch64::Register XRegisterFrom(Location location) {
+inline vixl::aarch64::Register XRegisterFrom(Location location) {
DCHECK(location.IsRegister()) << location;
return vixl::aarch64::Register::GetXRegFromCode(VIXLRegCodeFromART(location.reg()));
}
-static inline vixl::aarch64::Register WRegisterFrom(Location location) {
+inline vixl::aarch64::Register WRegisterFrom(Location location) {
DCHECK(location.IsRegister()) << location;
return vixl::aarch64::Register::GetWRegFromCode(VIXLRegCodeFromART(location.reg()));
}
-static inline vixl::aarch64::Register RegisterFrom(Location location, Primitive::Type type) {
+inline vixl::aarch64::Register RegisterFrom(Location location, Primitive::Type type) {
DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type;
return type == Primitive::kPrimLong ? XRegisterFrom(location) : WRegisterFrom(location);
}
-static inline vixl::aarch64::Register OutputRegister(HInstruction* instr) {
+inline vixl::aarch64::Register OutputRegister(HInstruction* instr) {
return RegisterFrom(instr->GetLocations()->Out(), instr->GetType());
}
-static inline vixl::aarch64::Register InputRegisterAt(HInstruction* instr, int input_index) {
+inline vixl::aarch64::Register InputRegisterAt(HInstruction* instr, int input_index) {
return RegisterFrom(instr->GetLocations()->InAt(input_index),
instr->InputAt(input_index)->GetType());
}
-static inline vixl::aarch64::FPRegister DRegisterFrom(Location location) {
+inline vixl::aarch64::FPRegister DRegisterFrom(Location location) {
DCHECK(location.IsFpuRegister()) << location;
return vixl::aarch64::FPRegister::GetDRegFromCode(location.reg());
}
-static inline vixl::aarch64::FPRegister SRegisterFrom(Location location) {
+inline vixl::aarch64::FPRegister SRegisterFrom(Location location) {
DCHECK(location.IsFpuRegister()) << location;
return vixl::aarch64::FPRegister::GetSRegFromCode(location.reg());
}
-static inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
+inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
DCHECK(Primitive::IsFloatingPointType(type)) << type;
return type == Primitive::kPrimDouble ? DRegisterFrom(location) : SRegisterFrom(location);
}
-static inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) {
+inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) {
return FPRegisterFrom(instr->GetLocations()->Out(), instr->GetType());
}
-static inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
+inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
return FPRegisterFrom(instr->GetLocations()->InAt(input_index),
instr->InputAt(input_index)->GetType());
}
-static inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
+inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
return Primitive::IsFloatingPointType(type)
? vixl::aarch64::CPURegister(FPRegisterFrom(location, type))
: vixl::aarch64::CPURegister(RegisterFrom(location, type));
}
-static inline vixl::aarch64::CPURegister OutputCPURegister(HInstruction* instr) {
+inline vixl::aarch64::CPURegister OutputCPURegister(HInstruction* instr) {
return Primitive::IsFloatingPointType(instr->GetType())
? static_cast<vixl::aarch64::CPURegister>(OutputFPRegister(instr))
: static_cast<vixl::aarch64::CPURegister>(OutputRegister(instr));
}
-static inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
+inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
return Primitive::IsFloatingPointType(instr->InputAt(index)->GetType())
? static_cast<vixl::aarch64::CPURegister>(InputFPRegisterAt(instr, index))
: static_cast<vixl::aarch64::CPURegister>(InputRegisterAt(instr, index));
}
-static inline int64_t Int64ConstantFrom(Location location) {
+inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* instr,
+ int index) {
+ HInstruction* input = instr->InputAt(index);
+ Primitive::Type input_type = input->GetType();
+ if (input->IsConstant() && input->AsConstant()->IsZeroBitPattern()) {
+ return (Primitive::ComponentSize(input_type) >= vixl::aarch64::kXRegSizeInBytes)
+ ? vixl::aarch64::xzr
+ : vixl::aarch64::wzr;
+ }
+ return InputCPURegisterAt(instr, index);
+}
+
+inline int64_t Int64ConstantFrom(Location location) {
HConstant* instr = location.GetConstant();
if (instr->IsIntConstant()) {
return instr->AsIntConstant()->GetValue();
@@ -131,7 +148,7 @@ static inline int64_t Int64ConstantFrom(Location location) {
}
}
-static inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::Type type) {
+inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::Type type) {
if (location.IsRegister()) {
return vixl::aarch64::Operand(RegisterFrom(location, type));
} else {
@@ -139,23 +156,23 @@ static inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::T
}
}
-static inline vixl::aarch64::Operand InputOperandAt(HInstruction* instr, int input_index) {
+inline vixl::aarch64::Operand InputOperandAt(HInstruction* instr, int input_index) {
return OperandFrom(instr->GetLocations()->InAt(input_index),
instr->InputAt(input_index)->GetType());
}
-static inline vixl::aarch64::MemOperand StackOperandFrom(Location location) {
+inline vixl::aarch64::MemOperand StackOperandFrom(Location location) {
return vixl::aarch64::MemOperand(vixl::aarch64::sp, location.GetStackIndex());
}
-static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
size_t offset = 0) {
// A heap reference must be 32bit, so fit in a W register.
DCHECK(base.IsW());
return vixl::aarch64::MemOperand(base.X(), offset);
}
-static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
const vixl::aarch64::Register& regoffset,
vixl::aarch64::Shift shift = vixl::aarch64::LSL,
unsigned shift_amount = 0) {
@@ -164,24 +181,24 @@ static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Registe
return vixl::aarch64::MemOperand(base.X(), regoffset, shift, shift_amount);
}
-static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
Offset offset) {
return HeapOperand(base, offset.SizeValue());
}
-static inline vixl::aarch64::MemOperand HeapOperandFrom(Location location, Offset offset) {
+inline vixl::aarch64::MemOperand HeapOperandFrom(Location location, Offset offset) {
return HeapOperand(RegisterFrom(location, Primitive::kPrimNot), offset);
}
-static inline Location LocationFrom(const vixl::aarch64::Register& reg) {
+inline Location LocationFrom(const vixl::aarch64::Register& reg) {
return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.GetCode()));
}
-static inline Location LocationFrom(const vixl::aarch64::FPRegister& fpreg) {
+inline Location LocationFrom(const vixl::aarch64::FPRegister& fpreg) {
return Location::FpuRegisterLocation(fpreg.GetCode());
}
-static inline vixl::aarch64::Operand OperandFromMemOperand(
+inline vixl::aarch64::Operand OperandFromMemOperand(
const vixl::aarch64::MemOperand& mem_op) {
if (mem_op.IsImmediateOffset()) {
return vixl::aarch64::Operand(mem_op.GetOffset());
@@ -202,7 +219,7 @@ static inline vixl::aarch64::Operand OperandFromMemOperand(
}
}
-static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
+inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant())
<< constant->DebugName();
@@ -241,7 +258,7 @@ static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst
}
}
-static inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
+inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
HInstruction* instr) {
if (constant->IsConstant()
&& CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
@@ -255,10 +272,10 @@ static inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
// codes are same, we can initialize vixl register list simply by the register masks. Currently,
// only SP/WSP and ZXR/WZR codes are different between art and vixl.
// Note: This function is only used for debug checks.
-static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers,
- size_t num_core,
- uint32_t art_fpu_registers,
- size_t num_fpu) {
+inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers,
+ size_t num_core,
+ uint32_t art_fpu_registers,
+ size_t num_fpu) {
// The register masks won't work if the number of register is larger than 32.
DCHECK_GE(sizeof(art_core_registers) * 8, num_core);
DCHECK_GE(sizeof(art_fpu_registers) * 8, num_fpu);
@@ -273,7 +290,7 @@ static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers,
return true;
}
-static inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
switch (op_kind) {
case HArm64DataProcWithShifterOp::kASR: return vixl::aarch64::ASR;
case HArm64DataProcWithShifterOp::kLSL: return vixl::aarch64::LSL;
@@ -285,7 +302,7 @@ static inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::
}
}
-static inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
switch (op_kind) {
case HArm64DataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB;
case HArm64DataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH;
@@ -300,7 +317,7 @@ static inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp
}
}
-static inline bool CanFitInShifterOperand(HInstruction* instruction) {
+inline bool CanFitInShifterOperand(HInstruction* instruction) {
if (instruction->IsTypeConversion()) {
HTypeConversion* conversion = instruction->AsTypeConversion();
Primitive::Type result_type = conversion->GetResultType();
@@ -315,7 +332,7 @@ static inline bool CanFitInShifterOperand(HInstruction* instruction) {
}
}
-static inline bool HasShifterOperand(HInstruction* instr) {
+inline bool HasShifterOperand(HInstruction* instr) {
// `neg` instructions are an alias of `sub` using the zero register as the
// first register input.
bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() ||
@@ -323,7 +340,7 @@ static inline bool HasShifterOperand(HInstruction* instr) {
return res;
}
-static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
+inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
DCHECK(HasShifterOperand(instruction));
// Although the `neg` instruction is an alias of the `sub` instruction, `HNeg`
// does *not* support extension. This is because the `extended register` form
@@ -334,6 +351,10 @@ static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
return instruction->IsAdd() || instruction->IsSub();
}
+inline bool IsConstantZeroBitPattern(const HInstruction* instruction) {
+ return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern();
+}
+
} // namespace helpers
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 0614945ddc..5f39a49d68 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -47,6 +47,9 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
private:
void VisitShift(HBinaryOperation* shift);
+ void VisitEqual(HEqual* instruction) OVERRIDE;
+ void VisitNotEqual(HNotEqual* instruction) OVERRIDE;
+
void VisitAbove(HAbove* instruction) OVERRIDE;
void VisitAboveOrEqual(HAboveOrEqual* instruction) OVERRIDE;
void VisitBelow(HBelow* instruction) OVERRIDE;
@@ -140,6 +143,30 @@ void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instr
}
}
+void InstructionWithAbsorbingInputSimplifier::VisitEqual(HEqual* instruction) {
+ if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) ||
+ (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) {
+ // Replace code looking like
+ // EQUAL lhs, null
+ // where lhs cannot be null with
+ // CONSTANT false
+ instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 0));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitNotEqual(HNotEqual* instruction) {
+ if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) ||
+ (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) {
+ // Replace code looking like
+ // NOT_EQUAL lhs, null
+ // where lhs cannot be null with
+ // CONSTANT true
+ instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 1));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) {
if (instruction->GetLeft()->IsConstant() &&
instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index e1bde7c737..9de521ad8d 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -16,8 +16,9 @@
#include "dead_code_elimination.h"
-#include "utils/array_ref.h"
+#include "base/array_ref.h"
#include "base/bit_vector-inl.h"
+#include "base/stl_util.h"
#include "ssa_phi_elimination.h"
namespace art {
@@ -168,8 +169,7 @@ bool HDeadCodeElimination::SimplifyIfs() {
bool simplified_one_or_more_ifs = false;
bool rerun_dominance_and_loop_analysis = false;
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
HInstruction* last = block->GetLastInstruction();
HInstruction* first = block->GetFirstInstruction();
if (last->IsIf() &&
@@ -271,20 +271,22 @@ bool HDeadCodeElimination::SimplifyIfs() {
}
void HDeadCodeElimination::ConnectSuccessiveBlocks() {
- // Order does not matter.
- for (HReversePostOrderIterator it(*graph_); !it.Done();) {
- HBasicBlock* block = it.Current();
- if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
- it.Advance();
- continue;
- }
- HBasicBlock* successor = block->GetSingleSuccessor();
- if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
- it.Advance();
- continue;
+ // Order does not matter. Skip the entry block by starting at index 1 in reverse post order.
+ for (size_t i = 1u, size = graph_->GetReversePostOrder().size(); i != size; ++i) {
+ HBasicBlock* block = graph_->GetReversePostOrder()[i];
+ DCHECK(!block->IsEntryBlock());
+ while (block->GetLastInstruction()->IsGoto()) {
+ HBasicBlock* successor = block->GetSingleSuccessor();
+ if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
+ break;
+ }
+ DCHECK_LT(i, IndexOfElement(graph_->GetReversePostOrder(), successor));
+ block->MergeWith(successor);
+ --size;
+ DCHECK_EQ(size, graph_->GetReversePostOrder().size());
+ DCHECK_EQ(block, graph_->GetReversePostOrder()[i]);
+ // Reiterate on this block in case it can be merged with its new successor.
}
- block->MergeWith(successor);
- // Reiterate on this block in case it can be merged with its new successor.
}
}
@@ -300,8 +302,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks() {
// Remove all dead blocks. Iterate in post order because removal needs the
// block's chain of dominators and nested loops need to be updated from the
// inside out.
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
int id = block->GetBlockId();
if (!live_blocks.IsBitSet(id)) {
MaybeRecordDeadBlock(block);
@@ -332,8 +333,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks() {
void HDeadCodeElimination::RemoveDeadInstructions() {
// Process basic blocks in post-order in the dominator tree, so that
// a dead instruction depending on another dead instruction is removed.
- for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) {
- HBasicBlock* block = b.Current();
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
// Traverse this block's instructions in backward order and remove
// the unused ones.
HBackwardInstructionIterator i(block->GetInstructions());
@@ -343,14 +343,7 @@ void HDeadCodeElimination::RemoveDeadInstructions() {
for (i.Advance(); !i.Done(); i.Advance()) {
HInstruction* inst = i.Current();
DCHECK(!inst->IsControlFlow());
- if (!inst->HasSideEffects()
- && !inst->CanThrow()
- && !inst->IsSuspendCheck()
- && !inst->IsNativeDebugInfo()
- // If we added an explicit barrier then we should keep it.
- && !inst->IsMemoryBarrier()
- && !inst->IsParameterValue()
- && !inst->HasUses()) {
+ if (inst->IsDeadAndRemovable()) {
block->RemoveInstruction(inst);
MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction);
}
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 0ce0ec1402..58e700deba 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -31,13 +31,11 @@ class HDeadCodeElimination : public HOptimization {
public:
HDeadCodeElimination(HGraph* graph,
OptimizingCompilerStats* stats = nullptr,
- const char* name = kInitialDeadCodeEliminationPassName)
+ const char* name = kDeadCodeEliminationPassName)
: HOptimization(graph, name, stats) {}
void Run() OVERRIDE;
-
- static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination";
- static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final";
+ static constexpr const char* kDeadCodeEliminationPassName = "dead_code_elimination";
private:
void MaybeRecordDeadBlock(HBasicBlock* block);
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
index 14c318e21f..82b81238ab 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -17,6 +17,8 @@
#include "dex_cache_array_fixups_arm.h"
#include "base/arena_containers.h"
+#include "code_generator_arm.h"
+#include "intrinsics_arm.h"
#include "utils/dex_cache_arrays_layout-inl.h"
namespace art {
@@ -27,8 +29,9 @@ namespace arm {
*/
class DexCacheArrayFixupsVisitor : public HGraphVisitor {
public:
- explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+ DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen)
: HGraphVisitor(graph),
+ codegen_(down_cast<CodeGeneratorARM*>(codegen)),
dex_cache_array_bases_(std::less<const DexFile*>(),
// Attribute memory use to code generator.
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
@@ -59,31 +62,15 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor {
}
}
- void VisitLoadString(HLoadString* load_string) OVERRIDE {
- // If this is a load with PC-relative access to the dex cache strings array,
- // we need to add the dex cache arrays base as the special input.
- if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) {
- // Initialize base for target dex file if needed.
- const DexFile& dex_file = load_string->GetDexFile();
- HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
- // Update the element offset in base.
- DexCacheArraysLayout layout(kArmPointerSize, &dex_file);
- base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex()));
- // Add the special argument base to the load.
- load_string->AddSpecialInput(base);
- }
- }
-
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
// If this is an invoke with PC-relative access to the dex cache methods array,
// we need to add the dex cache arrays base as the special input.
- if (invoke->HasPcRelativeDexCache()) {
- // Initialize base for target method dex file if needed.
- MethodReference target_method = invoke->GetTargetMethod();
- HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+ if (invoke->HasPcRelativeDexCache() &&
+ !IsCallFreeIntrinsic<IntrinsicLocationsBuilderARM>(invoke, codegen_)) {
+ HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(invoke->GetDexFile());
// Update the element offset in base.
- DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file);
- base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+ DexCacheArraysLayout layout(kArmPointerSize, &invoke->GetDexFile());
+ base->UpdateElementOffset(layout.MethodOffset(invoke->GetDexMethodIndex()));
// Add the special argument base to the method.
DCHECK(!invoke->HasCurrentMethodInput());
invoke->AddSpecialInput(base);
@@ -107,6 +94,8 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor {
return base;
}
+ CodeGeneratorARM* codegen_;
+
using DexCacheArraysBaseMap =
ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>;
DexCacheArraysBaseMap dex_cache_array_bases_;
@@ -118,7 +107,7 @@ void DexCacheArrayFixups::Run() {
// that can be live-in at the irreducible loop header.
return;
}
- DexCacheArrayFixupsVisitor visitor(graph_);
+ DexCacheArrayFixupsVisitor visitor(graph_, codegen_);
visitor.VisitInsertionOrder();
visitor.MoveBasesIfNeeded();
}
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
index 015f910328..9d67a319b9 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.h
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -21,14 +21,23 @@
#include "optimization.h"
namespace art {
+
+class CodeGenerator;
+
namespace arm {
class DexCacheArrayFixups : public HOptimization {
public:
- DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
- : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {}
+ DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+ : HOptimization(graph, kDexCacheArrayFixupsArmPassName, stats),
+ codegen_(codegen) {}
+
+ static constexpr const char* kDexCacheArrayFixupsArmPassName = "dex_cache_array_fixups_arm";
void Run() OVERRIDE;
+
+ private:
+ CodeGenerator* codegen_;
};
} // namespace arm
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc
index 19bab08eb4..31fff26dd5 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc
@@ -18,6 +18,7 @@
#include "dex_cache_array_fixups_mips.h"
#include "base/arena_containers.h"
+#include "intrinsics_mips.h"
#include "utils/dex_cache_arrays_layout-inl.h"
namespace art {
@@ -67,31 +68,16 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor {
}
}
- void VisitLoadString(HLoadString* load_string) OVERRIDE {
- // If this is a load with PC-relative access to the dex cache strings array,
- // we need to add the dex cache arrays base as the special input.
- if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) {
- // Initialize base for target dex file if needed.
- const DexFile& dex_file = load_string->GetDexFile();
- HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
- // Update the element offset in base.
- DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
- base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex()));
- // Add the special argument base to the load.
- load_string->AddSpecialInput(base);
- }
- }
-
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
// If this is an invoke with PC-relative access to the dex cache methods array,
// we need to add the dex cache arrays base as the special input.
- if (invoke->HasPcRelativeDexCache()) {
+ if (invoke->HasPcRelativeDexCache() &&
+ !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) {
// Initialize base for target method dex file if needed.
- MethodReference target_method = invoke->GetTargetMethod();
- HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+ HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(invoke->GetDexFile());
// Update the element offset in base.
- DexCacheArraysLayout layout(kMipsPointerSize, target_method.dex_file);
- base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+ DexCacheArraysLayout layout(kMipsPointerSize, &invoke->GetDexFile());
+ base->UpdateElementOffset(layout.MethodOffset(invoke->GetDexMethodIndex()));
// Add the special argument base to the method.
DCHECK(!invoke->HasCurrentMethodInput());
invoke->AddSpecialInput(base);
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h
index 21056e130a..861a199d6c 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.h
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.h
@@ -29,9 +29,11 @@ namespace mips {
class DexCacheArrayFixups : public HOptimization {
public:
DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
- : HOptimization(graph, "dex_cache_array_fixups_mips", stats),
+ : HOptimization(graph, kDexCacheArrayFixupsMipsPassName, stats),
codegen_(codegen) {}
+ static constexpr const char* kDexCacheArrayFixupsMipsPassName = "dex_cache_array_fixups_mips";
+
void Run() OVERRIDE;
private:
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 89d80cc281..09dcefa02c 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -122,7 +122,10 @@ class HGraphVisualizerDisassembler {
new DisassemblerOptions(/* absolute_addresses */ false,
base_address,
end_address,
- /* can_read_literals */ true)));
+ /* can_read_literals */ true,
+ Is64BitInstructionSet(instruction_set)
+ ? &Thread::DumpThreadOffset<PointerSize::k64>
+ : &Thread::DumpThreadOffset<PointerSize::k32>)));
}
~HGraphVisualizerDisassembler() {
@@ -438,13 +441,13 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
void VisitInvoke(HInvoke* invoke) OVERRIDE {
StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex();
- StartAttributeStream("method_name") << PrettyMethod(
- invoke->GetDexMethodIndex(), GetGraph()->GetDexFile(), /* with_signature */ false);
+ StartAttributeStream("method_name") << GetGraph()->GetDexFile().PrettyMethod(
+ invoke->GetDexMethodIndex(), /* with_signature */ false);
}
void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE {
VisitInvoke(invoke);
- StartAttributeStream("invoke_type") << invoke->GetOriginalInvokeType();
+ StartAttributeStream("invoke_type") << invoke->GetInvokeType();
}
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
@@ -462,15 +465,15 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
}
void VisitInstanceFieldGet(HInstanceFieldGet* iget) OVERRIDE {
- StartAttributeStream("field_name") << PrettyField(iget->GetFieldInfo().GetFieldIndex(),
- iget->GetFieldInfo().GetDexFile(),
+ StartAttributeStream("field_name") <<
+ iget->GetFieldInfo().GetDexFile().PrettyField(iget->GetFieldInfo().GetFieldIndex(),
/* with type */ false);
StartAttributeStream("field_type") << iget->GetFieldType();
}
void VisitInstanceFieldSet(HInstanceFieldSet* iset) OVERRIDE {
- StartAttributeStream("field_name") << PrettyField(iset->GetFieldInfo().GetFieldIndex(),
- iset->GetFieldInfo().GetDexFile(),
+ StartAttributeStream("field_name") <<
+ iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(),
/* with type */ false);
StartAttributeStream("field_type") << iset->GetFieldType();
}
@@ -601,7 +604,8 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
: instruction->GetReferenceTypeInfo();
ScopedObjectAccess soa(Thread::Current());
if (info.IsValid()) {
- StartAttributeStream("klass") << PrettyDescriptor(info.GetTypeHandle().Get());
+ StartAttributeStream("klass")
+ << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get());
StartAttributeStream("can_be_null")
<< std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 1e86b75075..f5931a2f81 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -411,8 +411,8 @@ void GlobalValueNumberer::Run() {
// Use the reverse post order to ensure the non back-edge predecessors of a block are
// visited before the block itself.
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- VisitBasicBlock(it.Current());
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ VisitBasicBlock(block);
}
}
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 129c2a94b5..f2602fbf8c 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -23,12 +23,12 @@ namespace art {
* Since graph traversal may enter a SCC at any position, an initial representation may be rotated,
* along dependences, viz. any of (a, b, c, d), (d, a, b, c) (c, d, a, b), (b, c, d, a) assuming
* a chain of dependences (mutual independent items may occur in arbitrary order). For proper
- * classification, the lexicographically first entry-phi is rotated to the front.
+ * classification, the lexicographically first loop-phi is rotated to the front.
*/
static void RotateEntryPhiFirst(HLoopInformation* loop,
ArenaVector<HInstruction*>* scc,
ArenaVector<HInstruction*>* new_scc) {
- // Find very first entry-phi.
+ // Find very first loop-phi.
const HInstructionList& phis = loop->GetHeader()->GetPhis();
HInstruction* phi = nullptr;
size_t phi_pos = -1;
@@ -41,7 +41,7 @@ static void RotateEntryPhiFirst(HLoopInformation* loop,
}
}
- // If found, bring that entry-phi to front.
+ // If found, bring that loop-phi to front.
if (phi != nullptr) {
new_scc->clear();
for (size_t i = 0; i < size; i++) {
@@ -87,23 +87,24 @@ HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph)
: HOptimization(graph, kInductionPassName),
global_depth_(0),
stack_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
- scc_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
map_(std::less<HInstruction*>(),
graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+ scc_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
cycle_(std::less<HInstruction*>(),
graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+ type_(Primitive::kPrimVoid),
induction_(std::less<HLoopInformation*>(),
- graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) {
+ graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+ cycles_(std::less<HPhi*>(),
+ graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) {
}
void HInductionVarAnalysis::Run() {
// Detects sequence variables (generalized induction variables) during an outer to inner
// traversal of all loops using Gerlek's algorithm. The order is important to enable
// range analysis on outer loop while visiting inner loops.
- for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) {
- HBasicBlock* graph_block = it_graph.Current();
+ for (HBasicBlock* graph_block : graph_->GetReversePostOrder()) {
// Don't analyze irreducible loops.
- // TODO(ajcbik): could/should we remove this restriction?
if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) {
VisitLoop(graph_block->GetLoopInformation());
}
@@ -121,7 +122,7 @@ void HInductionVarAnalysis::VisitLoop(HLoopInformation* loop) {
HBasicBlock* loop_block = it_loop.Current();
DCHECK(loop_block->IsInLoop());
if (loop_block->GetLoopInformation() != loop) {
- continue; // Inner loops already visited.
+ continue; // Inner loops visited later.
}
// Visit phi-operations and instructions.
for (HInstructionIterator it(loop_block->GetPhis()); !it.Done(); it.Advance()) {
@@ -245,13 +246,13 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) {
const size_t size = scc_.size();
DCHECK_GE(size, 1u);
- // Rotate proper entry-phi to front.
+ // Rotate proper loop-phi to front.
if (size > 1) {
ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis));
RotateEntryPhiFirst(loop, &scc_, &other);
}
- // Analyze from entry-phi onwards.
+ // Analyze from loop-phi onwards.
HInstruction* phi = scc_[0];
if (!phi->IsLoopHeaderPhi()) {
return;
@@ -263,6 +264,9 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) {
return;
}
+ // Store interesting cycle.
+ AssignCycle(phi->AsPhi());
+
// Singleton is wrap-around induction if all internal links have the same meaning.
if (size == 1) {
InductionInfo* update = TransferPhi(loop, phi, /* input_index */ 1);
@@ -285,6 +289,12 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) {
} else if (instruction->IsSub()) {
update = SolveAddSub(
loop, phi, instruction, instruction->InputAt(0), instruction->InputAt(1), kSub, true);
+ } else if (instruction->IsXor()) {
+ update = SolveXor(loop, phi, instruction, instruction->InputAt(0), instruction->InputAt(1));
+ } else if (instruction->IsEqual()) {
+ update = SolveTest(loop, phi, instruction, 0);
+ } else if (instruction->IsNotEqual()) {
+ update = SolveTest(loop, phi, instruction, 1);
} else if (instruction->IsTypeConversion()) {
update = SolveCnv(instruction->AsTypeConversion());
}
@@ -360,6 +370,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferAddSub(Indu
// can be combined with an invariant to yield a similar result. Even two linear inputs can
// be combined. All other combinations fail, however.
if (a != nullptr && b != nullptr) {
+ type_ = Narrowest(type_, Narrowest(a->type, b->type));
if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
return CreateInvariantOp(op, a, b);
} else if (a->induction_class == kLinear && b->induction_class == kLinear) {
@@ -396,6 +407,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferMul(Inducti
// can be multiplied with an invariant to yield a similar but multiplied result.
// Two non-invariant inputs cannot be multiplied, however.
if (a != nullptr && b != nullptr) {
+ type_ = Narrowest(type_, Narrowest(a->type, b->type));
if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
return CreateInvariantOp(kMul, a, b);
} else if (a->induction_class == kInvariant) {
@@ -436,6 +448,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferNeg(Inducti
// Transfer over a unary negation: an invariant, linear, wrap-around, or periodic input
// yields a similar but negated induction as result.
if (a != nullptr) {
+ type_ = Narrowest(type_, a->type);
if (a->induction_class == kInvariant) {
return CreateInvariantOp(kNeg, nullptr, a);
}
@@ -553,6 +566,42 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveAddSub(HLoopIn
return nullptr;
}
+HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveXor(HLoopInformation* loop,
+ HInstruction* entry_phi,
+ HInstruction* instruction,
+ HInstruction* x,
+ HInstruction* y) {
+ // Solve within a tight cycle on x = c ^ x or x = x ^ c.
+ if (entry_phi->InputCount() == 2 && instruction == entry_phi->InputAt(1)) {
+ InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0));
+ InductionInfo* a = LookupInfo(loop, x);
+ if (a != nullptr && a->induction_class == kInvariant && entry_phi == y) {
+ return CreateInduction(kPeriodic, CreateInvariantOp(kXor, a, initial), initial, type_);
+ }
+ InductionInfo* b = LookupInfo(loop, y);
+ if (b != nullptr && b->induction_class == kInvariant && entry_phi == x) {
+ return CreateInduction(kPeriodic, CreateInvariantOp(kXor, initial, b), initial, type_);
+ }
+ }
+ return nullptr;
+}
+
+HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveTest(HLoopInformation* loop,
+ HInstruction* entry_phi,
+ HInstruction* instruction,
+ int64_t opposite_value) {
+ // Detect hidden XOR construction in tight cycles on x = (x == 0) or x = (x != 1).
+ int64_t value = -1;
+ HInstruction* x = instruction->InputAt(0);
+ HInstruction* y = instruction->InputAt(1);
+ if (IsExact(LookupInfo(loop, x), &value) && value == opposite_value) {
+ return SolveXor(loop, entry_phi, instruction, graph_->GetIntConstant(1), y);
+ } else if (IsExact(LookupInfo(loop, y), &value) && value == opposite_value) {
+ return SolveXor(loop, entry_phi, instruction, x, graph_->GetIntConstant(1));
+ }
+ return nullptr;
+}
+
HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveCnv(HTypeConversion* conversion) {
Primitive::Type from = conversion->GetInputType();
Primitive::Type to = conversion->GetResultType();
@@ -714,10 +763,12 @@ void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop,
case kCondGE: op = kGE; break;
default: LOG(FATAL) << "CONDITION UNREACHABLE";
}
+ // Associate trip count with control instruction, rather than the condition (even
+ // though it's its use) since former provides a convenient use-free placeholder.
+ HInstruction* control = loop->GetHeader()->GetLastInstruction();
InductionInfo* taken_test = CreateInvariantOp(op, lower_expr, upper_expr);
- AssignInfo(loop,
- loop->GetHeader()->GetLastInstruction(),
- CreateTripCount(tcKind, trip_count, taken_test, type));
+ DCHECK(control->IsIf());
+ AssignInfo(loop, control, CreateTripCount(tcKind, trip_count, taken_test, type));
}
bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr,
@@ -848,8 +899,8 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv
int64_t value = -1;
if (IsExact(a, &value)) {
if (value == 0) {
- // Simplify 0 + b = b, 0 * b = 0.
- if (op == kAdd) {
+ // Simplify 0 + b = b, 0 ^ b = b, 0 * b = 0.
+ if (op == kAdd || op == kXor) {
return b;
} else if (op == kMul) {
return a;
@@ -865,8 +916,8 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv
}
if (IsExact(b, &value)) {
if (value == 0) {
- // Simplify a + 0 = a, a - 0 = a, a * 0 = 0, -0 = 0.
- if (op == kAdd || op == kSub) {
+ // Simplify a + 0 = a, a - 0 = a, a ^ 0 = a, a * 0 = 0, -0 = 0.
+ if (op == kAdd || op == kSub || op == kXor) {
return a;
} else if (op == kMul || op == kNeg) {
return b;
@@ -897,6 +948,23 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv
return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, b->type);
}
+
+void HInductionVarAnalysis::AssignCycle(HPhi* phi) {
+ ArenaSet<HInstruction*>* set = &cycles_.Put(phi, ArenaSet<HInstruction*>(
+ graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)))->second;
+ for (HInstruction* i : scc_) {
+ set->insert(i);
+ }
+}
+
+ArenaSet<HInstruction*>* HInductionVarAnalysis::LookupCycle(HPhi* phi) {
+ auto it = cycles_.find(phi);
+ if (it != cycles_.end()) {
+ return &it->second;
+ }
+ return nullptr;
+}
+
bool HInductionVarAnalysis::IsExact(InductionInfo* info, int64_t* value) {
return InductionVarRange(this).IsConstant(info, InductionVarRange::kExact, value);
}
@@ -937,6 +1005,7 @@ std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) {
case kNeg: inv += " - "; break;
case kMul: inv += " * "; break;
case kDiv: inv += " / "; break;
+ case kXor: inv += " ^ "; break;
case kLT: inv += " < "; break;
case kLE: inv += " <= "; break;
case kGT: inv += " > "; break;
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index 7c74816c26..70271799d2 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -39,9 +39,9 @@ class HInductionVarAnalysis : public HOptimization {
void Run() OVERRIDE;
- private:
static constexpr const char* kInductionPassName = "induction_var_analysis";
+ private:
struct NodeInfo {
explicit NodeInfo(uint32_t d) : depth(d), done(false) {}
uint32_t depth;
@@ -64,6 +64,7 @@ class HInductionVarAnalysis : public HOptimization {
kNeg,
kMul,
kDiv,
+ kXor,
kFetch,
// Trip-counts.
kTripCountInLoop, // valid in full loop; loop is finite
@@ -171,7 +172,16 @@ class HInductionVarAnalysis : public HOptimization {
HInstruction* x,
HInstruction* y,
InductionOp op,
- bool is_first_call);
+ bool is_first_call); // possibly swaps x and y to try again
+ InductionInfo* SolveXor(HLoopInformation* loop,
+ HInstruction* entry_phi,
+ HInstruction* instruction,
+ HInstruction* x,
+ HInstruction* y);
+ InductionInfo* SolveTest(HLoopInformation* loop,
+ HInstruction* entry_phi,
+ HInstruction* instruction,
+ int64_t oppositive_value);
InductionInfo* SolveCnv(HTypeConversion* conversion);
// Trip count information.
@@ -204,6 +214,8 @@ class HInductionVarAnalysis : public HOptimization {
InductionInfo* LookupInfo(HLoopInformation* loop, HInstruction* instruction);
InductionInfo* CreateConstant(int64_t value, Primitive::Type type);
InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b);
+ void AssignCycle(HPhi* phi);
+ ArenaSet<HInstruction*>* LookupCycle(HPhi* phi);
// Constants.
bool IsExact(InductionInfo* info, /*out*/ int64_t* value);
@@ -219,8 +231,8 @@ class HInductionVarAnalysis : public HOptimization {
// Temporary book-keeping during the analysis.
uint32_t global_depth_;
ArenaVector<HInstruction*> stack_;
- ArenaVector<HInstruction*> scc_;
ArenaSafeMap<HInstruction*, NodeInfo> map_;
+ ArenaVector<HInstruction*> scc_;
ArenaSafeMap<HInstruction*, InductionInfo*> cycle_;
Primitive::Type type_;
@@ -230,6 +242,11 @@ class HInductionVarAnalysis : public HOptimization {
*/
ArenaSafeMap<HLoopInformation*, ArenaSafeMap<HInstruction*, InductionInfo*>> induction_;
+ /**
+ * Preserves induction cycle information for each loop-phi.
+ */
+ ArenaSafeMap<HPhi*, ArenaSet<HInstruction*>> cycles_;
+
friend class InductionVarAnalysisTest;
friend class InductionVarRange;
friend class InductionVarRangeTest;
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 580d24b74b..031f1d74a8 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -107,7 +107,7 @@ class InductionVarAnalysisTest : public CommonCompilerTest {
}
// Builds if-statement at depth d.
- HPhi* BuildIf(int d, HBasicBlock** ifT, HBasicBlock **ifF) {
+ HPhi* BuildIf(int d, HBasicBlock** ifT, HBasicBlock** ifF) {
HBasicBlock* cond = new (&allocator_) HBasicBlock(graph_);
HBasicBlock* ifTrue = new (&allocator_) HBasicBlock(graph_);
HBasicBlock* ifFalse = new (&allocator_) HBasicBlock(graph_);
@@ -157,6 +157,13 @@ class InductionVarAnalysisTest : public CommonCompilerTest {
iva_->LookupInfo(loop_body_[d]->GetLoopInformation(), instruction));
}
+ // Returns induction information of the trip-count of loop at depth d.
+ std::string GetTripCount(int d) {
+ HInstruction* control = loop_header_[d]->GetLastInstruction();
+ DCHECK(control->IsIf());
+ return GetInductionInfo(control, d);
+ }
+
// Returns true if instructions have identical induction.
bool HaveSameInduction(HInstruction* instruction1, HInstruction* instruction2) {
return HInductionVarAnalysis::InductionEqual(
@@ -239,8 +246,7 @@ TEST_F(InductionVarAnalysisTest, FindBasicInduction) {
EXPECT_FALSE(HaveSameInduction(store->InputAt(1), increment_[0]));
// Trip-count.
- EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))",
- GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+ EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", GetTripCount(0).c_str());
}
TEST_F(InductionVarAnalysisTest, FindDerivedInduction) {
@@ -253,15 +259,15 @@ TEST_F(InductionVarAnalysisTest, FindDerivedInduction) {
// k = - i;
// }
BuildLoopNest(1);
- HInstruction *add = InsertInstruction(
+ HInstruction* add = InsertInstruction(
new (&allocator_) HAdd(Primitive::kPrimInt, constant100_, basic_[0]), 0);
- HInstruction *sub = InsertInstruction(
+ HInstruction* sub = InsertInstruction(
new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0);
- HInstruction *mul = InsertInstruction(
+ HInstruction* mul = InsertInstruction(
new (&allocator_) HMul(Primitive::kPrimInt, constant100_, basic_[0]), 0);
- HInstruction *shl = InsertInstruction(
+ HInstruction* shl = InsertInstruction(
new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0);
- HInstruction *neg = InsertInstruction(
+ HInstruction* neg = InsertInstruction(
new (&allocator_) HNeg(Primitive::kPrimInt, basic_[0]), 0);
PerformInductionVarAnalysis();
@@ -285,10 +291,10 @@ TEST_F(InductionVarAnalysisTest, FindChainInduction) {
HPhi* k = InsertLoopPhi(0, 0);
k->AddInput(constant0_);
- HInstruction *add = InsertInstruction(
+ HInstruction* add = InsertInstruction(
new (&allocator_) HAdd(Primitive::kPrimInt, k, constant100_), 0);
HInstruction* store1 = InsertArrayStore(add, 0);
- HInstruction *sub = InsertInstruction(
+ HInstruction* sub = InsertInstruction(
new (&allocator_) HSub(Primitive::kPrimInt, add, constant1_), 0);
HInstruction* store2 = InsertArrayStore(sub, 0);
k->AddInput(sub);
@@ -375,7 +381,7 @@ TEST_F(InductionVarAnalysisTest, FindFirstOrderWrapAroundInduction) {
k->AddInput(constant0_);
HInstruction* store = InsertArrayStore(k, 0);
- HInstruction *sub = InsertInstruction(
+ HInstruction* sub = InsertInstruction(
new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0);
k->AddInput(sub);
PerformInductionVarAnalysis();
@@ -401,7 +407,7 @@ TEST_F(InductionVarAnalysisTest, FindSecondOrderWrapAroundInduction) {
HInstruction* store = InsertArrayStore(k, 0);
k->AddInput(t);
- HInstruction *sub = InsertInstruction(
+ HInstruction* sub = InsertInstruction(
new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0], 0), 0);
t->AddInput(sub);
PerformInductionVarAnalysis();
@@ -425,15 +431,15 @@ TEST_F(InductionVarAnalysisTest, FindWrapAroundDerivedInduction) {
HPhi* k = InsertLoopPhi(0, 0);
k->AddInput(constant0_);
- HInstruction *add = InsertInstruction(
+ HInstruction* add = InsertInstruction(
new (&allocator_) HAdd(Primitive::kPrimInt, k, constant100_), 0);
- HInstruction *sub = InsertInstruction(
+ HInstruction* sub = InsertInstruction(
new (&allocator_) HSub(Primitive::kPrimInt, k, constant100_), 0);
- HInstruction *mul = InsertInstruction(
+ HInstruction* mul = InsertInstruction(
new (&allocator_) HMul(Primitive::kPrimInt, k, constant100_), 0);
- HInstruction *shl = InsertInstruction(
+ HInstruction* shl = InsertInstruction(
new (&allocator_) HShl(Primitive::kPrimInt, k, constant1_), 0);
- HInstruction *neg = InsertInstruction(
+ HInstruction* neg = InsertInstruction(
new (&allocator_) HNeg(Primitive::kPrimInt, k), 0);
k->AddInput(
InsertInstruction(new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0));
@@ -491,7 +497,7 @@ TEST_F(InductionVarAnalysisTest, FindIdiomaticPeriodicInduction) {
k->AddInput(constant0_);
HInstruction* store = InsertArrayStore(k, 0);
- HInstruction *sub = InsertInstruction(
+ HInstruction* sub = InsertInstruction(
new (&allocator_) HSub(Primitive::kPrimInt, constant1_, k), 0);
k->AddInput(sub);
PerformInductionVarAnalysis();
@@ -500,6 +506,131 @@ TEST_F(InductionVarAnalysisTest, FindIdiomaticPeriodicInduction) {
EXPECT_STREQ("periodic((1), (0)):PrimInt", GetInductionInfo(sub, 0).c_str());
}
+TEST_F(InductionVarAnalysisTest, FindXorPeriodicInduction) {
+ // Setup:
+ // k = 0;
+ // for (int i = 0; i < 100; i++) {
+ // a[k] = 0;
+ // k = k ^ 1;
+ // }
+ BuildLoopNest(1);
+ HPhi* k = InsertLoopPhi(0, 0);
+ k->AddInput(constant0_);
+
+ HInstruction* store = InsertArrayStore(k, 0);
+ HInstruction* x = InsertInstruction(
+ new (&allocator_) HXor(Primitive::kPrimInt, k, constant1_), 0);
+ k->AddInput(x);
+ PerformInductionVarAnalysis();
+
+ EXPECT_STREQ("periodic((0), (1)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str());
+ EXPECT_STREQ("periodic((1), (0)):PrimInt", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindXorConstantLeftPeriodicInduction) {
+ // Setup:
+ // k = 1;
+ // for (int i = 0; i < 100; i++) {
+ // k = 1 ^ k;
+ // }
+ BuildLoopNest(1);
+ HPhi* k = InsertLoopPhi(0, 0);
+ k->AddInput(constant1_);
+
+ HInstruction* x = InsertInstruction(
+ new (&allocator_) HXor(Primitive::kPrimInt, constant1_, k), 0);
+ k->AddInput(x);
+ PerformInductionVarAnalysis();
+
+ EXPECT_STREQ("periodic(((1) ^ (1)), (1)):PrimInt", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindXor100PeriodicInduction) {
+ // Setup:
+ // k = 1;
+ // for (int i = 0; i < 100; i++) {
+ // k = k ^ 100;
+ // }
+ BuildLoopNest(1);
+ HPhi* k = InsertLoopPhi(0, 0);
+ k->AddInput(constant1_);
+
+ HInstruction* x = InsertInstruction(
+ new (&allocator_) HXor(Primitive::kPrimInt, k, constant100_), 0);
+ k->AddInput(x);
+ PerformInductionVarAnalysis();
+
+ EXPECT_STREQ("periodic(((1) ^ (100)), (1)):PrimInt", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindBooleanEqPeriodicInduction) {
+ // Setup:
+ // k = 0;
+ // for (int i = 0; i < 100; i++) {
+ // k = (k == 0);
+ // }
+ BuildLoopNest(1);
+ HPhi* k = InsertLoopPhi(0, 0);
+ k->AddInput(constant0_);
+
+ HInstruction* x = InsertInstruction(new (&allocator_) HEqual(k, constant0_), 0);
+ k->AddInput(x);
+ PerformInductionVarAnalysis();
+
+ EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindBooleanEqConstantLeftPeriodicInduction) {
+ // Setup:
+ // k = 0;
+ // for (int i = 0; i < 100; i++) {
+ // k = (0 == k);
+ // }
+ BuildLoopNest(1);
+ HPhi* k = InsertLoopPhi(0, 0);
+ k->AddInput(constant0_);
+
+ HInstruction* x = InsertInstruction(new (&allocator_) HEqual(constant0_, k), 0);
+ k->AddInput(x);
+ PerformInductionVarAnalysis();
+
+ EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindBooleanNePeriodicInduction) {
+ // Setup:
+ // k = 0;
+ // for (int i = 0; i < 100; i++) {
+ // k = (k != 1);
+ // }
+ BuildLoopNest(1);
+ HPhi* k = InsertLoopPhi(0, 0);
+ k->AddInput(constant0_);
+
+ HInstruction* x = InsertInstruction(new (&allocator_) HNotEqual(k, constant1_), 0);
+ k->AddInput(x);
+ PerformInductionVarAnalysis();
+
+ EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindBooleanNeConstantLeftPeriodicInduction) {
+ // Setup:
+ // k = 0;
+ // for (int i = 0; i < 100; i++) {
+ // k = (1 != k);
+ // }
+ BuildLoopNest(1);
+ HPhi* k = InsertLoopPhi(0, 0);
+ k->AddInput(constant0_);
+
+ HInstruction* x = InsertInstruction(new (&allocator_) HNotEqual(constant1_, k), 0);
+ k->AddInput(x);
+ PerformInductionVarAnalysis();
+
+ EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str());
+}
+
TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) {
// Setup:
// k = 0;
@@ -520,15 +651,15 @@ TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) {
k_header->AddInput(k_body);
// Derived expressions.
- HInstruction *add = InsertInstruction(
+ HInstruction* add = InsertInstruction(
new (&allocator_) HAdd(Primitive::kPrimInt, k_body, constant100_), 0);
- HInstruction *sub = InsertInstruction(
+ HInstruction* sub = InsertInstruction(
new (&allocator_) HSub(Primitive::kPrimInt, k_body, constant100_), 0);
- HInstruction *mul = InsertInstruction(
+ HInstruction* mul = InsertInstruction(
new (&allocator_) HMul(Primitive::kPrimInt, k_body, constant100_), 0);
- HInstruction *shl = InsertInstruction(
+ HInstruction* shl = InsertInstruction(
new (&allocator_) HShl(Primitive::kPrimInt, k_body, constant1_), 0);
- HInstruction *neg = InsertInstruction(
+ HInstruction* neg = InsertInstruction(
new (&allocator_) HNeg(Primitive::kPrimInt, k_body), 0);
PerformInductionVarAnalysis();
@@ -557,7 +688,7 @@ TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) {
k[d] = InsertLoopPhi(0, d);
}
- HInstruction *inc = InsertInstruction(
+ HInstruction* inc = InsertInstruction(
new (&allocator_) HAdd(Primitive::kPrimInt, constant1_, k[9]), 9);
HInstruction* store = InsertArrayStore(inc, 9);
@@ -579,8 +710,7 @@ TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) {
}
EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(increment_[d], d).c_str());
// Trip-count.
- EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))",
- GetInductionInfo(loop_header_[d]->GetLastInstruction(), d).c_str());
+ EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", GetTripCount(d).c_str());
}
}
@@ -592,7 +722,7 @@ TEST_F(InductionVarAnalysisTest, ByteInductionIntLoopControl) {
// a[i] = 0;
// }
BuildLoopNest(1);
- HInstruction *conv = InsertInstruction(
+ HInstruction* conv = InsertInstruction(
new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0);
HInstruction* store1 = InsertArrayStore(conv, 0);
HInstruction* store2 = InsertArrayStore(basic_[0], 0);
@@ -607,8 +737,32 @@ TEST_F(InductionVarAnalysisTest, ByteInductionIntLoopControl) {
EXPECT_FALSE(HaveSameInduction(store1->InputAt(1), store2->InputAt(1)));
// Trip-count.
- EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))",
- GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+ EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", GetTripCount(0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, ByteInductionDerivedIntLoopControl) {
+ // Setup:
+ // for (int i = 0; i < 100; i++) {
+ // k = (byte) i;
+ // a[k] = 0;
+ // k = k + 1
+ // a[k] = 0;
+ // }
+ BuildLoopNest(1);
+ HInstruction* conv = InsertInstruction(
+ new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0);
+ HInstruction* store1 = InsertArrayStore(conv, 0);
+ HInstruction* add = InsertInstruction(
+ new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0);
+ HInstruction* store2 = InsertArrayStore(add, 0);
+
+ PerformInductionVarAnalysis();
+
+ // Byte induction (k) is "transferred" over conversion into addition (k + 1).
+ // This means only values within byte range can be trusted (even though
+ // addition can jump out of the range of course).
+ EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str());
+ EXPECT_STREQ("((1) * i + (1)):PrimByte", GetInductionInfo(store2->InputAt(1), 0).c_str());
}
TEST_F(InductionVarAnalysisTest, ByteLoopControl1) {
@@ -626,8 +780,7 @@ TEST_F(InductionVarAnalysisTest, ByteLoopControl1) {
EXPECT_STREQ("((1) * i + ((-128) + (1))):PrimByte", GetInductionInfo(increment_[0], 0).c_str());
// Trip-count.
- EXPECT_STREQ("(((127) - (-128)) (TC-loop) ((-128) < (127)))",
- GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+ EXPECT_STREQ("(((127) - (-128)) (TC-loop) ((-128) < (127)))", GetTripCount(0).c_str());
}
TEST_F(InductionVarAnalysisTest, ByteLoopControl2) {
@@ -645,7 +798,7 @@ TEST_F(InductionVarAnalysisTest, ByteLoopControl2) {
EXPECT_STREQ("((1) * i + ((-128) + (1))):PrimByte", GetInductionInfo(increment_[0], 0).c_str());
// Trip-count undefined.
- EXPECT_STREQ("", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+ EXPECT_STREQ("", GetTripCount(0).c_str());
}
TEST_F(InductionVarAnalysisTest, ShortLoopControl1) {
@@ -664,8 +817,7 @@ TEST_F(InductionVarAnalysisTest, ShortLoopControl1) {
EXPECT_STREQ("((1) * i + ((-32768) + (1))):PrimShort",
GetInductionInfo(increment_[0], 0).c_str());
// Trip-count.
- EXPECT_STREQ("(((32767) - (-32768)) (TC-loop) ((-32768) < (32767)))",
- GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+ EXPECT_STREQ("(((32767) - (-32768)) (TC-loop) ((-32768) < (32767)))", GetTripCount(0).c_str());
}
TEST_F(InductionVarAnalysisTest, ShortLoopControl2) {
@@ -684,7 +836,7 @@ TEST_F(InductionVarAnalysisTest, ShortLoopControl2) {
EXPECT_STREQ("((1) * i + ((-32768) + (1))):PrimShort",
GetInductionInfo(increment_[0], 0).c_str());
// Trip-count undefined.
- EXPECT_STREQ("", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+ EXPECT_STREQ("", GetTripCount(0).c_str());
}
TEST_F(InductionVarAnalysisTest, CharLoopControl1) {
@@ -701,8 +853,7 @@ TEST_F(InductionVarAnalysisTest, CharLoopControl1) {
EXPECT_STREQ("((1) * i + (1)):PrimChar", GetInductionInfo(increment_[0], 0).c_str());
// Trip-count.
- EXPECT_STREQ("((65535) (TC-loop) ((0) < (65535)))",
- GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+ EXPECT_STREQ("((65535) (TC-loop) ((0) < (65535)))", GetTripCount(0).c_str());
}
TEST_F(InductionVarAnalysisTest, CharLoopControl2) {
@@ -719,7 +870,7 @@ TEST_F(InductionVarAnalysisTest, CharLoopControl2) {
EXPECT_STREQ("((1) * i + (1)):PrimChar", GetInductionInfo(increment_[0], 0).c_str());
// Trip-count undefined.
- EXPECT_STREQ("", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+ EXPECT_STREQ("", GetTripCount(0).c_str());
}
} // namespace art
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 5e587e0810..7cc8b1ea4c 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -106,6 +106,12 @@ static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
return instruction;
}
+/** Helper method to obtain loop's control instruction. */
+static HInstruction* GetLoopControl(HLoopInformation* loop) {
+ DCHECK(loop != nullptr);
+ return loop->GetHeader()->GetLastInstruction();
+}
+
//
// Public class methods.
//
@@ -143,45 +149,143 @@ bool InductionVarRange::GetInductionRange(HInstruction* context,
// Find range.
chase_hint_ = chase_hint;
bool in_body = context->GetBlock() != loop->GetHeader();
+ int64_t stride_value = 0;
*min_val = GetVal(info, trip, in_body, /* is_min */ true);
*max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
- *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+ *needs_finite_test = NeedsTripCount(info, &stride_value) && IsUnsafeTripCount(trip);
return true;
}
-bool InductionVarRange::CanGenerateCode(HInstruction* context,
- HInstruction* instruction,
- /*out*/bool* needs_finite_test,
- /*out*/bool* needs_taken_test) {
- return GenerateCode(context,
- instruction,
- nullptr, nullptr, nullptr, nullptr, nullptr, // nothing generated yet
- needs_finite_test,
- needs_taken_test);
-}
-
-void InductionVarRange::GenerateRangeCode(HInstruction* context,
- HInstruction* instruction,
- HGraph* graph,
- HBasicBlock* block,
- /*out*/HInstruction** lower,
- /*out*/HInstruction** upper) {
+bool InductionVarRange::CanGenerateRange(HInstruction* context,
+ HInstruction* instruction,
+ /*out*/bool* needs_finite_test,
+ /*out*/bool* needs_taken_test) {
+ bool is_last_value = false;
+ int64_t stride_value = 0;
+ return GenerateRangeOrLastValue(context,
+ instruction,
+ is_last_value,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr, // nothing generated yet
+ &stride_value,
+ needs_finite_test,
+ needs_taken_test)
+ && (stride_value == -1 ||
+ stride_value == 0 ||
+ stride_value == 1); // avoid wrap-around anomalies.
+}
+
+void InductionVarRange::GenerateRange(HInstruction* context,
+ HInstruction* instruction,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*out*/HInstruction** lower,
+ /*out*/HInstruction** upper) {
+ bool is_last_value = false;
+ int64_t stride_value = 0;
+ bool b1, b2; // unused
+ if (!GenerateRangeOrLastValue(context,
+ instruction,
+ is_last_value,
+ graph,
+ block,
+ lower,
+ upper,
+ nullptr,
+ &stride_value,
+ &b1,
+ &b2)) {
+ LOG(FATAL) << "Failed precondition: CanGenerateRange()";
+ }
+}
+
+HInstruction* InductionVarRange::GenerateTakenTest(HInstruction* context,
+ HGraph* graph,
+ HBasicBlock* block) {
+ HInstruction* taken_test = nullptr;
+ bool is_last_value = false;
+ int64_t stride_value = 0;
bool b1, b2; // unused
- if (!GenerateCode(context, instruction, graph, block, lower, upper, nullptr, &b1, &b2)) {
- LOG(FATAL) << "Failed precondition: GenerateCode()";
+ if (!GenerateRangeOrLastValue(context,
+ context,
+ is_last_value,
+ graph,
+ block,
+ nullptr,
+ nullptr,
+ &taken_test,
+ &stride_value,
+ &b1,
+ &b2)) {
+ LOG(FATAL) << "Failed precondition: CanGenerateRange()";
+ }
+ return taken_test;
+}
+
+bool InductionVarRange::CanGenerateLastValue(HInstruction* instruction) {
+ bool is_last_value = true;
+ int64_t stride_value = 0;
+ bool needs_finite_test = false;
+ bool needs_taken_test = false;
+ return GenerateRangeOrLastValue(instruction,
+ instruction,
+ is_last_value,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr, // nothing generated yet
+ &stride_value,
+ &needs_finite_test,
+ &needs_taken_test)
+ && !needs_finite_test && !needs_taken_test;
+}
+
+HInstruction* InductionVarRange::GenerateLastValue(HInstruction* instruction,
+ HGraph* graph,
+ HBasicBlock* block) {
+ HInstruction* last_value = nullptr;
+ bool is_last_value = true;
+ int64_t stride_value = 0;
+ bool b1, b2; // unused
+ if (!GenerateRangeOrLastValue(instruction,
+ instruction,
+ is_last_value,
+ graph,
+ block,
+ &last_value,
+ &last_value,
+ nullptr,
+ &stride_value,
+ &b1,
+ &b2)) {
+ LOG(FATAL) << "Failed precondition: CanGenerateLastValue()";
}
+ return last_value;
}
-void InductionVarRange::GenerateTakenTest(HInstruction* context,
- HGraph* graph,
- HBasicBlock* block,
- /*out*/HInstruction** taken_test) {
- bool b1, b2; // unused
- if (!GenerateCode(context, context, graph, block, nullptr, nullptr, taken_test, &b1, &b2)) {
- LOG(FATAL) << "Failed precondition: GenerateCode()";
+void InductionVarRange::Replace(HInstruction* instruction,
+ HInstruction* fetch,
+ HInstruction* replacement) {
+ for (HLoopInformation* lp = instruction->GetBlock()->GetLoopInformation(); // closest enveloping loop
+ lp != nullptr;
+ lp = lp->GetPreHeader()->GetLoopInformation()) {
+ // Update instruction's information.
+ ReplaceInduction(induction_analysis_->LookupInfo(lp, instruction), fetch, replacement);
+ // Update loop's trip-count information.
+ ReplaceInduction(induction_analysis_->LookupInfo(lp, GetLoopControl(lp)), fetch, replacement);
}
}
+bool InductionVarRange::IsFinite(HLoopInformation* loop) const {
+ HInductionVarAnalysis::InductionInfo *trip =
+ induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
+ return trip != nullptr && !IsUnsafeTripCount(trip);
+}
+
//
// Private class methods.
//
@@ -221,13 +325,13 @@ bool InductionVarRange::HasInductionInfo(
/*out*/ HLoopInformation** loop,
/*out*/ HInductionVarAnalysis::InductionInfo** info,
/*out*/ HInductionVarAnalysis::InductionInfo** trip) const {
- HLoopInformation* l = context->GetBlock()->GetLoopInformation(); // closest enveloping loop
- if (l != nullptr) {
- HInductionVarAnalysis::InductionInfo* i = induction_analysis_->LookupInfo(l, instruction);
+ HLoopInformation* lp = context->GetBlock()->GetLoopInformation(); // closest enveloping loop
+ if (lp != nullptr) {
+ HInductionVarAnalysis::InductionInfo* i = induction_analysis_->LookupInfo(lp, instruction);
if (i != nullptr) {
- *loop = l;
+ *loop = lp;
*info = i;
- *trip = induction_analysis_->LookupInfo(l, l->GetHeader()->GetLastInstruction());
+ *trip = induction_analysis_->LookupInfo(lp, GetLoopControl(lp));
return true;
}
}
@@ -260,12 +364,13 @@ bool InductionVarRange::HasFetchInLoop(HInductionVarAnalysis::InductionInfo* inf
return false;
}
-bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const {
+bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info,
+ int64_t* stride_value) const {
if (info != nullptr) {
if (info->induction_class == HInductionVarAnalysis::kLinear) {
- return true;
+ return IsConstant(info->op_a, kExact, stride_value);
} else if (info->induction_class == HInductionVarAnalysis::kWrapAround) {
- return NeedsTripCount(info->op_b);
+ return NeedsTripCount(info->op_b, stride_value);
}
}
return false;
@@ -426,6 +531,8 @@ InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::Induct
return GetMul(info->op_a, info->op_b, trip, in_body, is_min);
case HInductionVarAnalysis::kDiv:
return GetDiv(info->op_a, info->op_b, trip, in_body, is_min);
+ case HInductionVarAnalysis::kXor:
+ return GetXor(info->op_a, info->op_b);
case HInductionVarAnalysis::kFetch:
return GetFetch(info->fetch, trip, in_body, is_min);
case HInductionVarAnalysis::kTripCountInLoop:
@@ -527,6 +634,21 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct
return Value();
}
+InductionVarRange::Value InductionVarRange::GetXor(
+ HInductionVarAnalysis::InductionInfo* info1,
+ HInductionVarAnalysis::InductionInfo* info2) const {
+ int64_t v1 = 0;
+ int64_t v2 = 0;
+ // Only accept exact values.
+ if (IsConstant(info1, kExact, &v1) && IsConstant(info2, kExact, &v2)) {
+ int64_t value = v1 ^ v2;
+ if (CanLongValueFitIntoInt(value)) {
+ return Value(static_cast<int32_t>(value));
+ }
+ }
+ return Value();
+}
+
InductionVarRange::Value InductionVarRange::MulRangeAndConstant(
int64_t value,
HInductionVarAnalysis::InductionInfo* info,
@@ -616,15 +738,17 @@ InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is
return Value();
}
-bool InductionVarRange::GenerateCode(HInstruction* context,
- HInstruction* instruction,
- HGraph* graph,
- HBasicBlock* block,
- /*out*/HInstruction** lower,
- /*out*/HInstruction** upper,
- /*out*/HInstruction** taken_test,
- /*out*/bool* needs_finite_test,
- /*out*/bool* needs_taken_test) const {
+bool InductionVarRange::GenerateRangeOrLastValue(HInstruction* context,
+ HInstruction* instruction,
+ bool is_last_value,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*out*/HInstruction** lower,
+ /*out*/HInstruction** upper,
+ /*out*/HInstruction** taken_test,
+ /*out*/int64_t* stride_value,
+ /*out*/bool* needs_finite_test,
+ /*out*/bool* needs_taken_test) const {
HLoopInformation* loop = nullptr;
HInductionVarAnalysis::InductionInfo* info = nullptr;
HInductionVarAnalysis::InductionInfo* trip = nullptr;
@@ -637,8 +761,24 @@ bool InductionVarRange::GenerateCode(HInstruction* context,
// code does not use the trip-count explicitly (since there could be an implicit relation
// between e.g. an invariant subscript and a not-taken condition).
bool in_body = context->GetBlock() != loop->GetHeader();
- *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+ *stride_value = 0;
+ *needs_finite_test = NeedsTripCount(info, stride_value) && IsUnsafeTripCount(trip);
*needs_taken_test = IsBodyTripCount(trip);
+ // Handle last value request.
+ if (is_last_value) {
+ if (info->induction_class == HInductionVarAnalysis::kLinear) {
+ if (*stride_value > 0) {
+ lower = nullptr;
+ } else {
+ upper = nullptr;
+ }
+ } else if (info->induction_class == HInductionVarAnalysis::kPeriodic) {
+ DCHECK(!in_body);
+ return GenerateLastValuePeriodic(info, trip, graph, block, lower, needs_taken_test);
+ } else {
+ return false;
+ }
+ }
// Code generation for taken test: generate the code when requested or otherwise analyze
// if code generation is feasible when taken test is needed.
if (taken_test != nullptr) {
@@ -658,6 +798,56 @@ bool InductionVarRange::GenerateCode(HInstruction* context,
GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false);
}
+bool InductionVarRange::GenerateLastValuePeriodic(HInductionVarAnalysis::InductionInfo* info,
+ HInductionVarAnalysis::InductionInfo* trip,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*out*/HInstruction** result,
+ /*out*/bool* needs_taken_test) const {
+ DCHECK(info->induction_class == HInductionVarAnalysis::kPeriodic);
+ // Count period.
+ int32_t period = 1;
+ for (HInductionVarAnalysis::InductionInfo* p = info;
+ p->induction_class == HInductionVarAnalysis::kPeriodic;
+ p = p->op_b, ++period) {}
+ // Handle periodic(x, y) case for restricted types.
+ if (period != 2 ||
+ trip->op_a->type != Primitive::kPrimInt ||
+ (info->type != Primitive::kPrimInt && info->type != Primitive::kPrimBoolean)) {
+ return false; // TODO: easy to generalize
+ }
+ HInstruction* x_instr = nullptr;
+ HInstruction* y_instr = nullptr;
+ HInstruction* trip_expr = nullptr;
+ if (GenerateCode(info->op_a, nullptr, graph, block, graph ? &x_instr : nullptr, false, false) &&
+ GenerateCode(info->op_b, nullptr, graph, block, graph ? &y_instr : nullptr, false, false) &&
+ GenerateCode(trip->op_a, nullptr, graph, block, graph ? &trip_expr : nullptr, false, false)) {
+ // During actual code generation (graph != nullptr),
+ // generate is_even ? x : y select instruction.
+ if (graph != nullptr) {
+ HInstruction* is_even = Insert(block, new (graph->GetArena()) HEqual(
+ Insert(block, new (graph->GetArena()) HAnd(
+ Primitive::kPrimInt, trip_expr, graph->GetIntConstant(1))),
+ graph->GetIntConstant(0), kNoDexPc));
+ *result = Insert(block, new (graph->GetArena()) HSelect(is_even, x_instr, y_instr, kNoDexPc));
+ }
+ // Guard select with taken test if needed.
+ if (*needs_taken_test) {
+ HInstruction* taken_test = nullptr;
+ if (!GenerateCode(
+ trip->op_b, nullptr, graph, block, graph ? &taken_test : nullptr, false, false)) {
+ return false;
+ } else if (graph != nullptr) {
+ *result = Insert(block,
+ new (graph->GetArena()) HSelect(taken_test, *result, x_instr, kNoDexPc));
+ }
+ *needs_taken_test = false; // taken care of
+ }
+ return true;
+ }
+ return false;
+}
+
bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
HInductionVarAnalysis::InductionInfo* trip,
HGraph* graph, // when set, code is generated
@@ -666,9 +856,13 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
bool in_body,
bool is_min) const {
if (info != nullptr) {
+ // If during codegen, the result is not needed (nullptr), simply return success.
+ if (graph != nullptr && result == nullptr) {
+ return true;
+ }
// Verify type safety.
Primitive::Type type = Primitive::kPrimInt;
- if (info->type != type) {
+ if (info->type != Primitive::kPrimInt && info->type != Primitive::kPrimBoolean) {
return false;
}
// Handle current operation.
@@ -679,6 +873,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
// Invariants.
switch (info->operation) {
case HInductionVarAnalysis::kAdd:
+ case HInductionVarAnalysis::kXor:
case HInductionVarAnalysis::kLT:
case HInductionVarAnalysis::kLE:
case HInductionVarAnalysis::kGT:
@@ -690,6 +885,8 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
switch (info->operation) {
case HInductionVarAnalysis::kAdd:
operation = new (graph->GetArena()) HAdd(type, opa, opb); break;
+ case HInductionVarAnalysis::kXor:
+ operation = new (graph->GetArena()) HXor(type, opa, opb); break;
case HInductionVarAnalysis::kLT:
operation = new (graph->GetArena()) HLessThan(opa, opb); break;
case HInductionVarAnalysis::kLE:
@@ -757,25 +954,29 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
}
break;
case HInductionVarAnalysis::kLinear: {
- // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
- // to avoid arithmetic wrap-around situations that are hard to guard against.
+ // Linear induction a * i + b, for normalized 0 <= i < TC. For ranges, this should
+ // be restricted to a unit stride to avoid arithmetic wrap-around situations that
+ // are harder to guard against. For a last value, requesting min/max based on any
+ // stride yields right value.
int64_t stride_value = 0;
if (IsConstant(info->op_a, kExact, &stride_value)) {
- if (stride_value == 1 || stride_value == -1) {
- const bool is_min_a = stride_value == 1 ? is_min : !is_min;
- if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) &&
- GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
- if (graph != nullptr) {
- HInstruction* oper;
- if (stride_value == 1) {
- oper = new (graph->GetArena()) HAdd(type, opa, opb);
- } else {
- oper = new (graph->GetArena()) HSub(type, opb, opa);
- }
- *result = Insert(block, oper);
+ const bool is_min_a = stride_value >= 0 ? is_min : !is_min;
+ if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) &&
+ GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
+ if (graph != nullptr) {
+ HInstruction* oper;
+ if (stride_value == 1) {
+ oper = new (graph->GetArena()) HAdd(type, opa, opb);
+ } else if (stride_value == -1) {
+ oper = new (graph->GetArena()) HSub(type, opb, opa);
+ } else {
+ HInstruction* mul = new (graph->GetArena()) HMul(
+ type, graph->GetIntConstant(stride_value), opa);
+ oper = new (graph->GetArena()) HAdd(type, Insert(block, mul), opb);
}
- return true;
+ *result = Insert(block, oper);
}
+ return true;
}
}
break;
@@ -800,4 +1001,18 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
return false;
}
+void InductionVarRange::ReplaceInduction(HInductionVarAnalysis::InductionInfo* info,
+ HInstruction* fetch,
+ HInstruction* replacement) {
+ if (info != nullptr) {
+ if (info->induction_class == HInductionVarAnalysis::kInvariant &&
+ info->operation == HInductionVarAnalysis::kFetch &&
+ info->fetch == fetch) {
+ info->fetch = replacement;
+ }
+ ReplaceInduction(info->op_a, fetch, replacement);
+ ReplaceInduction(info->op_b, fetch, replacement);
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 00aaa167f8..034cf32b2d 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -76,10 +76,10 @@ class InductionVarRange {
* and need_taken test flags denote if an additional finite-test and/or taken-test
* are needed to protect the range evaluation inside its loop.
*/
- bool CanGenerateCode(HInstruction* context,
- HInstruction* instruction,
- /*out*/ bool* needs_finite_test,
- /*out*/ bool* needs_taken_test);
+ bool CanGenerateRange(HInstruction* context,
+ HInstruction* instruction,
+ /*out*/ bool* needs_finite_test,
+ /*out*/ bool* needs_taken_test);
/**
* Generates the actual code in the HIR for the lower and upper bound expressions on the
@@ -94,25 +94,65 @@ class InductionVarRange {
* lower: add x, 0
* upper: add x, 5
*
- * Precondition: CanGenerateCode() returns true.
+ * Precondition: CanGenerateRange() returns true.
*/
- void GenerateRangeCode(HInstruction* context,
- HInstruction* instruction,
- HGraph* graph,
- HBasicBlock* block,
- /*out*/ HInstruction** lower,
- /*out*/ HInstruction** upper);
+ void GenerateRange(HInstruction* context,
+ HInstruction* instruction,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*out*/ HInstruction** lower,
+ /*out*/ HInstruction** upper);
/**
* Generates explicit taken-test for the loop in the given context. Code is generated in
- * given block and graph. The taken-test is returned in parameter test.
+ * given block and graph. Returns generated taken-test.
*
- * Precondition: CanGenerateCode() returns true and needs_taken_test is set.
+ * Precondition: CanGenerateRange() returns true and needs_taken_test is set.
*/
- void GenerateTakenTest(HInstruction* context,
- HGraph* graph,
- HBasicBlock* block,
- /*out*/ HInstruction** taken_test);
+ HInstruction* GenerateTakenTest(HInstruction* context, HGraph* graph, HBasicBlock* block);
+
+ /**
+ * Returns true if induction analysis is able to generate code for last value of
+ * the given instruction inside the closest enveloping loop.
+ */
+ bool CanGenerateLastValue(HInstruction* instruction);
+
+ /**
+ * Generates last value of the given instruction in the closest enveloping loop.
+ * Code is generated in given block and graph. Returns generated last value.
+ *
+ * Precondition: CanGenerateLastValue() returns true.
+ */
+ HInstruction* GenerateLastValue(HInstruction* instruction, HGraph* graph, HBasicBlock* block);
+
+ /**
+ * Updates all matching fetches with the given replacement in all induction information
+ * that is associated with the given instruction.
+ */
+ void Replace(HInstruction* instruction, HInstruction* fetch, HInstruction* replacement);
+
+ /**
+ * Incrementally updates induction information for just the given loop.
+ */
+ void ReVisit(HLoopInformation* loop) {
+ induction_analysis_->induction_.erase(loop);
+ for (HInstructionIterator it(loop->GetHeader()->GetPhis()); !it.Done(); it.Advance()) {
+ induction_analysis_->cycles_.erase(it.Current()->AsPhi());
+ }
+ induction_analysis_->VisitLoop(loop);
+ }
+
+ /**
+ * Lookup an interesting cycle associated with an entry phi.
+ */
+ ArenaSet<HInstruction*>* LookupCycle(HPhi* phi) const {
+ return induction_analysis_->LookupCycle(phi);
+ }
+
+ /**
+ * Checks if header logic of a loop terminates.
+ */
+ bool IsFinite(HLoopInformation* loop) const;
private:
/*
@@ -140,7 +180,8 @@ class InductionVarRange {
/*out*/ HInductionVarAnalysis::InductionInfo** trip) const;
bool HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const;
- bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const;
+ bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info,
+ /*out*/ int64_t* stride_value) const;
bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
bool IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
@@ -167,6 +208,8 @@ class InductionVarRange {
HInductionVarAnalysis::InductionInfo* trip,
bool in_body,
bool is_min) const;
+ Value GetXor(HInductionVarAnalysis::InductionInfo* info1,
+ HInductionVarAnalysis::InductionInfo* info2) const;
Value MulRangeAndConstant(int64_t value,
HInductionVarAnalysis::InductionInfo* info,
@@ -186,19 +229,28 @@ class InductionVarRange {
Value MergeVal(Value v1, Value v2, bool is_min) const;
/**
- * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
- * With values nullptr, the method can be used to determine if code generation
+ * Generates code for lower/upper/taken-test or last value in the HIR. Returns true on
+ * success. With values nullptr, the method can be used to determine if code generation
* would be successful without generating actual code yet.
*/
- bool GenerateCode(HInstruction* context,
- HInstruction* instruction,
- HGraph* graph,
- HBasicBlock* block,
- /*out*/ HInstruction** lower,
- /*out*/ HInstruction** upper,
- /*out*/ HInstruction** taken_test,
- /*out*/ bool* needs_finite_test,
- /*out*/ bool* needs_taken_test) const;
+ bool GenerateRangeOrLastValue(HInstruction* context,
+ HInstruction* instruction,
+ bool is_last_val,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*out*/ HInstruction** lower,
+ /*out*/ HInstruction** upper,
+ /*out*/ HInstruction** taken_test,
+ /*out*/ int64_t* stride_value,
+ /*out*/ bool* needs_finite_test,
+ /*out*/ bool* needs_taken_test) const;
+
+ bool GenerateLastValuePeriodic(HInductionVarAnalysis::InductionInfo* info,
+ HInductionVarAnalysis::InductionInfo* trip,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*out*/HInstruction** result,
+ /*out*/ bool* needs_taken_test) const;
bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
HInductionVarAnalysis::InductionInfo* trip,
@@ -208,6 +260,10 @@ class InductionVarRange {
bool in_body,
bool is_min) const;
+ void ReplaceInduction(HInductionVarAnalysis::InductionInfo* info,
+ HInstruction* fetch,
+ HInstruction* replacement);
+
/** Results of prior induction variable analysis. */
HInductionVarAnalysis* induction_analysis_;
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 4ea170f659..8bbdd4acb7 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -75,34 +75,34 @@ class InductionVarRangeTest : public CommonCompilerTest {
// Control flow.
loop_preheader_ = new (&allocator_) HBasicBlock(graph_);
graph_->AddBlock(loop_preheader_);
- HBasicBlock* loop_header = new (&allocator_) HBasicBlock(graph_);
- graph_->AddBlock(loop_header);
- HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_);
- graph_->AddBlock(loop_body);
+ loop_header_ = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(loop_header_);
+ loop_body_ = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(loop_body_);
HBasicBlock* return_block = new (&allocator_) HBasicBlock(graph_);
graph_->AddBlock(return_block);
entry_block_->AddSuccessor(loop_preheader_);
- loop_preheader_->AddSuccessor(loop_header);
- loop_header->AddSuccessor(loop_body);
- loop_header->AddSuccessor(return_block);
- loop_body->AddSuccessor(loop_header);
+ loop_preheader_->AddSuccessor(loop_header_);
+ loop_header_->AddSuccessor(loop_body_);
+ loop_header_->AddSuccessor(return_block);
+ loop_body_->AddSuccessor(loop_header_);
return_block->AddSuccessor(exit_block_);
// Instructions.
loop_preheader_->AddInstruction(new (&allocator_) HGoto());
HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
- loop_header->AddPhi(phi);
+ loop_header_->AddPhi(phi);
phi->AddInput(graph_->GetIntConstant(lower)); // i = l
if (stride > 0) {
condition_ = new (&allocator_) HLessThan(phi, upper); // i < u
} else {
condition_ = new (&allocator_) HGreaterThan(phi, upper); // i > u
}
- loop_header->AddInstruction(condition_);
- loop_header->AddInstruction(new (&allocator_) HIf(condition_));
+ loop_header_->AddInstruction(condition_);
+ loop_header_->AddInstruction(new (&allocator_) HIf(condition_));
increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, phi, graph_->GetIntConstant(stride));
- loop_body->AddInstruction(increment_); // i += s
+ loop_body_->AddInstruction(increment_); // i += s
phi->AddInput(increment_);
- loop_body->AddInstruction(new (&allocator_) HGoto());
+ loop_body_->AddInstruction(new (&allocator_) HGoto());
return_block->AddInstruction(new (&allocator_) HReturnVoid());
exit_block_->AddInstruction(new (&allocator_) HExit());
}
@@ -192,7 +192,8 @@ class InductionVarRangeTest : public CommonCompilerTest {
//
bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
- return range_.NeedsTripCount(info);
+ int64_t s = 0;
+ return range_.NeedsTripCount(info, &s);
}
bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
@@ -251,6 +252,8 @@ class InductionVarRangeTest : public CommonCompilerTest {
HBasicBlock* entry_block_;
HBasicBlock* exit_block_;
HBasicBlock* loop_preheader_;
+ HBasicBlock* loop_header_;
+ HBasicBlock* loop_body_;
HInductionVarAnalysis* iva_;
InductionVarRange range_;
@@ -600,15 +603,19 @@ TEST_F(InductionVarRangeTest, ConstantTripCountUp) {
Value v1, v2;
bool needs_finite_test = true;
+ bool needs_taken_test = true;
+
+ HInstruction* phi = condition_->InputAt(0);
+ HInstruction* exit = exit_block_->GetLastInstruction();
// In context of header: known.
- range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+ range_.GetInductionRange(condition_, phi, x_, &v1, &v2, &needs_finite_test);
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(0), v1);
ExpectEqual(Value(1000), v2);
// In context of loop-body: known.
- range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+ range_.GetInductionRange(increment_, phi, x_, &v1, &v2, &needs_finite_test);
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(0), v1);
ExpectEqual(Value(999), v2);
@@ -616,6 +623,20 @@ TEST_F(InductionVarRangeTest, ConstantTripCountUp) {
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(1), v1);
ExpectEqual(Value(1000), v2);
+
+ // Induction vs. no-induction.
+ EXPECT_TRUE(range_.CanGenerateRange(increment_, phi, &needs_finite_test, &needs_taken_test));
+ EXPECT_TRUE(range_.CanGenerateLastValue(phi));
+ EXPECT_FALSE(range_.CanGenerateRange(exit, exit, &needs_finite_test, &needs_taken_test));
+ EXPECT_FALSE(range_.CanGenerateLastValue(exit));
+
+ // Last value (unsimplified).
+ HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
+ ASSERT_TRUE(last->IsAdd());
+ ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
+ EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue());
+ ASSERT_TRUE(last->InputAt(1)->IsIntConstant());
+ EXPECT_EQ(0, last->InputAt(1)->AsIntConstant()->GetValue());
}
TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
@@ -624,15 +645,19 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
Value v1, v2;
bool needs_finite_test = true;
+ bool needs_taken_test = true;
+
+ HInstruction* phi = condition_->InputAt(0);
+ HInstruction* exit = exit_block_->GetLastInstruction();
// In context of header: known.
- range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+ range_.GetInductionRange(condition_, phi, x_, &v1, &v2, &needs_finite_test);
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(0), v1);
ExpectEqual(Value(1000), v2);
// In context of loop-body: known.
- range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+ range_.GetInductionRange(increment_, phi, x_, &v1, &v2, &needs_finite_test);
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(1), v1);
ExpectEqual(Value(1000), v2);
@@ -640,6 +665,25 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(0), v1);
ExpectEqual(Value(999), v2);
+
+ // Induction vs. no-induction.
+ EXPECT_TRUE(range_.CanGenerateRange(increment_, phi, &needs_finite_test, &needs_taken_test));
+ EXPECT_TRUE(range_.CanGenerateLastValue(phi));
+ EXPECT_FALSE(range_.CanGenerateRange(exit, exit, &needs_finite_test, &needs_taken_test));
+ EXPECT_FALSE(range_.CanGenerateLastValue(exit));
+
+ // Last value (unsimplified).
+ HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
+ ASSERT_TRUE(last->IsSub());
+ ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
+ EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue());
+ ASSERT_TRUE(last->InputAt(1)->IsNeg());
+ last = last->InputAt(1)->InputAt(0);
+ ASSERT_TRUE(last->IsSub());
+ ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
+ EXPECT_EQ(0, last->InputAt(0)->AsIntConstant()->GetValue());
+ ASSERT_TRUE(last->InputAt(1)->IsIntConstant());
+ EXPECT_EQ(1000, last->InputAt(1)->AsIntConstant()->GetValue());
}
TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
@@ -650,14 +694,16 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
bool needs_finite_test = true;
bool needs_taken_test = true;
+ HInstruction* phi = condition_->InputAt(0);
+
// In context of header: upper unknown.
- range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+ range_.GetInductionRange(condition_, phi, x_, &v1, &v2, &needs_finite_test);
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(0), v1);
ExpectEqual(Value(), v2);
// In context of loop-body: known.
- range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+ range_.GetInductionRange(increment_, phi, x_, &v1, &v2, &needs_finite_test);
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(0), v1);
ExpectEqual(Value(x_, 1, -1), v2);
@@ -668,19 +714,15 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
HInstruction* lower = nullptr;
HInstruction* upper = nullptr;
- HInstruction* taken = nullptr;
// Can generate code in context of loop-body only.
- EXPECT_FALSE(range_.CanGenerateCode(
- condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
- ASSERT_TRUE(range_.CanGenerateCode(
- increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+ EXPECT_FALSE(range_.CanGenerateRange(condition_, phi, &needs_finite_test, &needs_taken_test));
+ ASSERT_TRUE(range_.CanGenerateRange(increment_, phi, &needs_finite_test, &needs_taken_test));
EXPECT_FALSE(needs_finite_test);
EXPECT_TRUE(needs_taken_test);
- // Generates code.
- range_.GenerateRangeCode(
- increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
+ // Generates code (unsimplified).
+ range_.GenerateRange(increment_, phi, graph_, loop_preheader_, &lower, &upper);
// Verify lower is 0+0.
ASSERT_TRUE(lower != nullptr);
@@ -701,12 +743,19 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
// Verify taken-test is 0<V.
- range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+ HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_);
ASSERT_TRUE(taken != nullptr);
ASSERT_TRUE(taken->IsLessThan());
ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue());
EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
+
+ // Replacement.
+ range_.Replace(loop_header_->GetLastInstruction(), x_, y_);
+ range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(1), v1);
+ ExpectEqual(Value(y_, 1, 0), v2);
}
TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
@@ -717,14 +766,16 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
bool needs_finite_test = true;
bool needs_taken_test = true;
+ HInstruction* phi = condition_->InputAt(0);
+
// In context of header: lower unknown.
- range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+ range_.GetInductionRange(condition_, phi, x_, &v1, &v2, &needs_finite_test);
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(), v1);
ExpectEqual(Value(1000), v2);
// In context of loop-body: known.
- range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
+ range_.GetInductionRange(increment_, phi, x_, &v1, &v2, &needs_finite_test);
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(x_, 1, 1), v1);
ExpectEqual(Value(1000), v2);
@@ -735,19 +786,15 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
HInstruction* lower = nullptr;
HInstruction* upper = nullptr;
- HInstruction* taken = nullptr;
// Can generate code in context of loop-body only.
- EXPECT_FALSE(range_.CanGenerateCode(
- condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
- ASSERT_TRUE(range_.CanGenerateCode(
- increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+ EXPECT_FALSE(range_.CanGenerateRange(condition_, phi, &needs_finite_test, &needs_taken_test));
+ ASSERT_TRUE(range_.CanGenerateRange(increment_, phi, &needs_finite_test, &needs_taken_test));
EXPECT_FALSE(needs_finite_test);
EXPECT_TRUE(needs_taken_test);
- // Generates code.
- range_.GenerateRangeCode(
- increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
+ // Generates code (unsimplified).
+ range_.GenerateRange(increment_, phi, graph_, loop_preheader_, &lower, &upper);
// Verify lower is 1000-((1000-V)-1).
ASSERT_TRUE(lower != nullptr);
@@ -773,12 +820,19 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
// Verify taken-test is 1000>V.
- range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+ HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_);
ASSERT_TRUE(taken != nullptr);
ASSERT_TRUE(taken->IsGreaterThan());
ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue());
EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
+
+ // Replacement.
+ range_.Replace(loop_header_->GetLastInstruction(), x_, y_);
+ range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(y_, 1, 0), v1);
+ ExpectEqual(Value(999), v2);
}
} // namespace art
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 451aa38033..cc420b3260 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -41,7 +41,7 @@
#include "sharpening.h"
#include "ssa_builder.h"
#include "ssa_phi_elimination.h"
-#include "scoped_thread_state_change.h"
+#include "scoped_thread_state_change-inl.h"
#include "thread.h"
namespace art {
@@ -90,14 +90,14 @@ void HInliner::Run() {
if (!TryInline(call)) {
if (kIsDebugBuild && IsCompilingWithCoreImage()) {
std::string callee_name =
- PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile());
+ outer_compilation_unit_.GetDexFile()->PrettyMethod(call->GetDexMethodIndex());
bool should_inline = callee_name.find("$inline$") != std::string::npos;
CHECK(!should_inline) << "Could not inline " << callee_name;
}
} else {
if (kIsDebugBuild && IsCompilingWithCoreImage()) {
std::string callee_name =
- PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile());
+ outer_compilation_unit_.GetDexFile()->PrettyMethod(call->GetDexMethodIndex());
bool must_not_inline = callee_name.find("$noinline$") != std::string::npos;
CHECK(!must_not_inline) << "Should not have inlined " << callee_name;
}
@@ -109,7 +109,7 @@ void HInliner::Run() {
}
static bool IsMethodOrDeclaringClassFinal(ArtMethod* method)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
return method->IsFinal() || method->GetDeclaringClass()->IsFinal();
}
@@ -119,7 +119,7 @@ static bool IsMethodOrDeclaringClassFinal(ArtMethod* method)
* Return nullptr if the runtime target cannot be proven.
*/
static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resolved_method)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
if (IsMethodOrDeclaringClassFinal(resolved_method)) {
// No need to lookup further, the resolved method will be the target.
return resolved_method;
@@ -189,7 +189,7 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol
static uint32_t FindMethodIndexIn(ArtMethod* method,
const DexFile& dex_file,
uint32_t name_and_signature_index)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
if (IsSameDexFile(*method->GetDexFile(), dex_file)) {
return method->GetDexMethodIndex();
} else {
@@ -200,13 +200,13 @@ static uint32_t FindMethodIndexIn(ArtMethod* method,
static uint32_t FindClassIndexIn(mirror::Class* cls,
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
uint32_t index = DexFile::kDexNoIndex;
if (cls->GetDexCache() == nullptr) {
- DCHECK(cls->IsArrayClass()) << PrettyClass(cls);
+ DCHECK(cls->IsArrayClass()) << cls->PrettyClass();
index = cls->FindTypeIndexInOtherDexFile(dex_file);
} else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
- DCHECK(cls->IsProxyClass()) << PrettyClass(cls);
+ DCHECK(cls->IsProxyClass()) << cls->PrettyClass();
// TODO: deal with proxy classes.
} else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
DCHECK_EQ(cls->GetDexCache(), dex_cache.Get());
@@ -263,42 +263,24 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
return false; // Don't bother to move further if we know the method is unresolved.
}
- uint32_t method_index = invoke_instruction->GetDexMethodIndex();
ScopedObjectAccess soa(Thread::Current());
+ uint32_t method_index = invoke_instruction->GetDexMethodIndex();
const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
- VLOG(compiler) << "Try inlining " << PrettyMethod(method_index, caller_dex_file);
+ VLOG(compiler) << "Try inlining " << caller_dex_file.PrettyMethod(method_index);
- ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
// We can query the dex cache directly. The verifier has populated it already.
- ArtMethod* resolved_method;
+ ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod();
ArtMethod* actual_method = nullptr;
- if (invoke_instruction->IsInvokeStaticOrDirect()) {
- if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) {
- VLOG(compiler) << "Not inlining a String.<init> method";
- return false;
- }
- MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
- mirror::DexCache* const dex_cache = IsSameDexFile(caller_dex_file, *ref.dex_file)
- ? caller_compilation_unit_.GetDexCache().Get()
- : class_linker->FindDexCache(soa.Self(), *ref.dex_file);
- resolved_method = dex_cache->GetResolvedMethod(
- ref.dex_method_index, class_linker->GetImagePointerSize());
- // actual_method == resolved_method for direct or static calls.
- actual_method = resolved_method;
- } else {
- resolved_method = caller_compilation_unit_.GetDexCache().Get()->GetResolvedMethod(
- method_index, class_linker->GetImagePointerSize());
- if (resolved_method != nullptr) {
- // Check if we can statically find the method.
- actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
- }
- }
-
if (resolved_method == nullptr) {
- // TODO: Can this still happen?
- // Method cannot be resolved if it is in another dex file we do not have access to.
- VLOG(compiler) << "Method cannot be resolved " << PrettyMethod(method_index, caller_dex_file);
+ DCHECK(invoke_instruction->IsInvokeStaticOrDirect());
+ DCHECK(invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit());
+ VLOG(compiler) << "Not inlining a String.<init> method";
return false;
+ } else if (invoke_instruction->IsInvokeStaticOrDirect()) {
+ actual_method = resolved_method;
+ } else {
+ // Check if we can statically find the method.
+ actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
}
if (actual_method != nullptr) {
@@ -322,7 +304,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
if (ic.IsUninitialized()) {
VLOG(compiler) << "Interface or virtual call to "
- << PrettyMethod(method_index, caller_dex_file)
+ << caller_dex_file.PrettyMethod(method_index)
<< " is not hit and not inlined";
return false;
} else if (ic.IsMonomorphic()) {
@@ -340,7 +322,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
} else {
DCHECK(ic.IsMegamorphic());
VLOG(compiler) << "Interface or virtual call to "
- << PrettyMethod(method_index, caller_dex_file)
+ << caller_dex_file.PrettyMethod(method_index)
<< " is megamorphic and not inlined";
MaybeRecordStat(kMegamorphicCall);
return false;
@@ -349,7 +331,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
}
VLOG(compiler) << "Interface or virtual call to "
- << PrettyMethod(method_index, caller_dex_file)
+ << caller_dex_file.PrettyMethod(method_index)
<< " could not be statically determined";
return false;
}
@@ -384,7 +366,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
uint32_t class_index = FindClassIndexIn(
ic.GetMonomorphicType(), caller_dex_file, caller_compilation_unit_.GetDexCache());
if (class_index == DexFile::kDexNoIndex) {
- VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+ VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
<< " from inline cache is not inlined because its class is not"
<< " accessible to the caller";
return false;
@@ -452,7 +434,8 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
is_referrer,
invoke_instruction->GetDexPc(),
/* needs_access_check */ false,
- /* is_in_dex_cache */ true);
+ /* is_in_dex_cache */ true,
+ /* is_in_boot_image */ false);
HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
// TODO: Extend reference type propagation to understand the guard.
@@ -543,7 +526,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
}
if (!one_target_inlined) {
- VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+ VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
<< " from inline cache is not inlined because none"
<< " of its targets could be inlined";
return false;
@@ -677,7 +660,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
actual_method = new_method;
} else if (actual_method != new_method) {
// Different methods, bailout.
- VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+ VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
<< " from inline cache is not inlined because it resolves"
<< " to different methods";
return false;
@@ -762,9 +745,9 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* metho
// 2) We will not go to the conflict trampoline with an invoke-virtual.
// TODO: Consider sharpening once it is not dependent on the compiler driver.
const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
- uint32_t method_index = FindMethodIndexIn(
+ uint32_t dex_method_index = FindMethodIndexIn(
method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
- if (method_index == DexFile::kDexNoIndex) {
+ if (dex_method_index == DexFile::kDexNoIndex) {
return false;
}
HInvokeVirtual* new_invoke = new (graph_->GetArena()) HInvokeVirtual(
@@ -772,7 +755,8 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* metho
invoke_instruction->GetNumberOfArguments(),
invoke_instruction->GetType(),
invoke_instruction->GetDexPc(),
- method_index,
+ dex_method_index,
+ method,
method->GetMethodIndex());
HInputsRef inputs = invoke_instruction->GetInputs();
for (size_t index = 0; index != inputs.size(); ++index) {
@@ -810,7 +794,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* method,
HInstruction** return_replacement) {
if (method->IsProxyMethod()) {
- VLOG(compiler) << "Method " << PrettyMethod(method)
+ VLOG(compiler) << "Method " << method->PrettyMethod()
<< " is not inlined because of unimplemented inline support for proxy methods.";
return false;
}
@@ -820,11 +804,12 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
if (!compiler_driver_->MayInline(method->GetDexFile(),
outer_compilation_unit_.GetDexFile())) {
if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
- VLOG(compiler) << "Successfully replaced pattern of invoke " << PrettyMethod(method);
+ VLOG(compiler) << "Successfully replaced pattern of invoke "
+ << method->PrettyMethod();
MaybeRecordStat(kReplacedInvokeWithSimplePattern);
return true;
}
- VLOG(compiler) << "Won't inline " << PrettyMethod(method) << " in "
+ VLOG(compiler) << "Won't inline " << method->PrettyMethod() << " in "
<< outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
<< caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
<< method->GetDexFile()->GetLocation();
@@ -836,14 +821,14 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
const DexFile::CodeItem* code_item = method->GetCodeItem();
if (code_item == nullptr) {
- VLOG(compiler) << "Method " << PrettyMethod(method)
+ VLOG(compiler) << "Method " << method->PrettyMethod()
<< " is not inlined because it is native";
return false;
}
size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
- VLOG(compiler) << "Method " << PrettyMethod(method)
+ VLOG(compiler) << "Method " << method->PrettyMethod()
<< " is too big to inline: "
<< code_item->insns_size_in_code_units_
<< " > "
@@ -852,13 +837,13 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
}
if (code_item->tries_size_ != 0) {
- VLOG(compiler) << "Method " << PrettyMethod(method)
+ VLOG(compiler) << "Method " << method->PrettyMethod()
<< " is not inlined because of try block";
return false;
}
if (!method->IsCompilable()) {
- VLOG(compiler) << "Method " << PrettyMethod(method)
+ VLOG(compiler) << "Method " << method->PrettyMethod()
<< " has soft failures un-handled by the compiler, so it cannot be inlined";
}
@@ -867,7 +852,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
if (Runtime::Current()->UseJitCompilation() ||
!compiler_driver_->IsMethodVerifiedWithoutFailures(
method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
- VLOG(compiler) << "Method " << PrettyMethod(method)
+ VLOG(compiler) << "Method " << method->PrettyMethod()
<< " couldn't be verified, so it cannot be inlined";
return false;
}
@@ -877,7 +862,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
// Case of a static method that cannot be inlined because it implicitly
// requires an initialization check of its declaring class.
- VLOG(compiler) << "Method " << PrettyMethod(method)
+ VLOG(compiler) << "Method " << method->PrettyMethod()
<< " is not inlined because it is static and requires a clinit"
<< " check that cannot be emitted due to Dex cache limitations";
return false;
@@ -887,14 +872,14 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
return false;
}
- VLOG(compiler) << "Successfully inlined " << PrettyMethod(method);
+ VLOG(compiler) << "Successfully inlined " << method->PrettyMethod();
MaybeRecordStat(kInlinedInvoke);
return true;
}
static HInstruction* GetInvokeInputForArgVRegIndex(HInvoke* invoke_instruction,
size_t arg_vreg_index)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
size_t input_index = 0;
for (size_t i = 0; i < arg_vreg_index; ++i, ++input_index) {
DCHECK_LT(input_index, invoke_instruction->GetNumberOfArguments());
@@ -1030,7 +1015,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache,
uint32_t field_index,
HInstruction* obj)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
DCHECK(resolved_field != nullptr);
@@ -1058,7 +1043,7 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex
uint32_t field_index,
HInstruction* obj,
HInstruction* value)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
DCHECK(resolved_field != nullptr);
@@ -1121,7 +1106,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
}
}
- InvokeType invoke_type = invoke_instruction->GetOriginalInvokeType();
+ InvokeType invoke_type = invoke_instruction->GetInvokeType();
if (invoke_type == kInterface) {
// We have statically resolved the dispatch. To please the class linker
// at runtime, we change this call as if it was a virtual call.
@@ -1154,19 +1139,19 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
*code_item,
compiler_driver_,
inline_stats.get(),
- resolved_method->GetQuickenedInfo(),
+ resolved_method->GetQuickenedInfo(class_linker->GetImagePointerSize()),
dex_cache,
handles_);
if (builder.BuildGraph() != kAnalysisSuccess) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " could not be built, so cannot be inlined";
return false;
}
if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
compiler_driver_->GetInstructionSet())) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " cannot be inlined because of the register allocator";
return false;
}
@@ -1216,7 +1201,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
// a throw predecessor.
HBasicBlock* exit_block = callee_graph->GetExitBlock();
if (exit_block == nullptr) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " could not be inlined because it has an infinite loop";
return false;
}
@@ -1229,25 +1214,22 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
}
}
if (has_throw_predecessor) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " could not be inlined because one branch always throws";
return false;
}
- HReversePostOrderIterator it(*callee_graph);
- it.Advance(); // Past the entry block, it does not contain instructions that prevent inlining.
size_t number_of_instructions = 0;
bool can_inline_environment =
total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
- for (; !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
-
+ // Skip the entry block, it does not contain instructions that prevent inlining.
+ for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) {
if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
// Don't inline methods with irreducible loops, they could prevent some
// optimizations to run.
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " could not be inlined because it contains an irreducible loop";
return false;
}
@@ -1256,28 +1238,28 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
!instr_it.Done();
instr_it.Advance()) {
if (number_of_instructions++ == number_of_instructions_budget) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " is not inlined because its caller has reached"
<< " its instruction budget limit.";
return false;
}
HInstruction* current = instr_it.Current();
if (!can_inline_environment && current->NeedsEnvironment()) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " is not inlined because its caller has reached"
<< " its environment budget limit.";
return false;
}
if (!same_dex_file && current->NeedsEnvironment()) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " could not be inlined because " << current->DebugName()
<< " needs an environment and is in a different dex file";
return false;
}
if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " could not be inlined because " << current->DebugName()
<< " it is in a different dex file and requires access to the dex cache";
return false;
@@ -1285,7 +1267,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
if (current->IsNewInstance() &&
(current->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectWithAccessCheck)) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " could not be inlined because it is using an entrypoint"
<< " with access checks";
// Allocation entrypoint does not handle inlined frames.
@@ -1294,7 +1276,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
if (current->IsNewArray() &&
(current->AsNewArray()->GetEntrypoint() == kQuickAllocArrayWithAccessCheck)) {
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " could not be inlined because it is using an entrypoint"
<< " with access checks";
// Allocation entrypoint does not handle inlined frames.
@@ -1306,7 +1288,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
current->IsUnresolvedStaticFieldSet() ||
current->IsUnresolvedInstanceFieldSet()) {
// Entrypoint for unresolved fields does not handle inlined frames.
- VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+ VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " could not be inlined because it is using an unresolved"
<< " entrypoint";
return false;
@@ -1337,7 +1319,7 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph,
HConstantFolding fold(callee_graph);
HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
InstructionSimplifier simplify(callee_graph, stats_);
- IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_);
+ IntrinsicsRecognizer intrinsics(callee_graph, stats_);
HOptimization* optimizations[] = {
&intrinsics,
@@ -1374,7 +1356,7 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph,
static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,
bool declared_can_be_null,
HInstruction* actual_obj)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
if (declared_can_be_null && !actual_obj->CanBeNull()) {
return true;
}
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 02d3a5f499..a1dcd58a84 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -38,7 +38,7 @@ class HInliner : public HOptimization {
const DexCompilationUnit& outer_compilation_unit,
const DexCompilationUnit& caller_compilation_unit,
CompilerDriver* compiler_driver,
- StackHandleScopeCollection* handles,
+ VariableSizedHandleScope* handles,
OptimizingCompilerStats* stats,
size_t total_number_of_dex_registers,
size_t depth)
@@ -64,12 +64,12 @@ class HInliner : public HOptimization {
// reference type propagation can run after the inlining. If the inlining is successful, this
// method will replace and remove the `invoke_instruction`.
bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
bool TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
HInstruction** return_replacement)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
@@ -86,7 +86,7 @@ class HInliner : public HOptimization {
bool TryPatternSubstitution(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
HInstruction** return_replacement)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Create a new HInstanceFieldGet.
HInstanceFieldGet* CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache,
@@ -105,38 +105,38 @@ class HInliner : public HOptimization {
bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
const InlineCache& ic)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Try to inline targets of a polymorphic call.
bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
const InlineCache& ic)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
const InlineCache& ic)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
HInstruction* receiver,
uint32_t dex_pc) const
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Creates an instance of ReferenceTypeInfo from `klass` if `klass` is
// admissible (see ReferenceTypePropagation::IsAdmissible for details).
// Otherwise returns inexact Object RTI.
- ReferenceTypeInfo GetClassRTI(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+ ReferenceTypeInfo GetClassRTI(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Add a type guard on the given `receiver`. This will add to the graph:
// i0 = HFieldGet(receiver, klass)
@@ -154,7 +154,7 @@ class HInliner : public HOptimization {
bool is_referrer,
HInstruction* invoke_instruction,
bool with_deoptimization)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
/*
* Ad-hoc implementation for implementing a diamond pattern in the graph for
@@ -197,7 +197,7 @@ class HInliner : public HOptimization {
const size_t total_number_of_dex_registers_;
const size_t depth_;
size_t number_of_inlined_instructions_;
- StackHandleScopeCollection* const handles_;
+ VariableSizedHandleScope* const handles_;
DISALLOW_COPY_AND_ASSIGN(HInliner);
};
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index e5dab569fd..c8c4ca76fd 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -21,7 +21,8 @@
#include "class_linker.h"
#include "dex_instruction-inl.h"
#include "driver/compiler_options.h"
-#include "scoped_thread_state_change.h"
+#include "imtable-inl.h"
+#include "scoped_thread_state_change-inl.h"
namespace art {
@@ -80,8 +81,7 @@ void HInstructionBuilder::InitializeBlockLocals() {
// locals (guaranteed by HGraphBuilder) and that all try blocks have been
// visited already (from HTryBoundary scoping and reverse post order).
bool catch_block_visited = false;
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
+ for (HBasicBlock* current : graph_->GetReversePostOrder()) {
if (current == current_block_) {
catch_block_visited = true;
} else if (current->IsTryBlock()) {
@@ -275,8 +275,8 @@ bool HInstructionBuilder::Build() {
FindNativeDebugInfoLocations(native_debug_info_locations);
}
- for (HReversePostOrderIterator block_it(*graph_); !block_it.Done(); block_it.Advance()) {
- current_block_ = block_it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ current_block_ = block;
uint32_t block_dex_pc = current_block_->GetDexPc();
InitializeBlockLocals();
@@ -674,7 +674,7 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in
ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+ soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader())));
Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
// We fetch the referenced class eagerly (that is, the class pointed by in the MethodId
// at method_idx), as `CanAccessResolvedMethod` expects it be be in the dex cache.
@@ -767,6 +767,11 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in
return resolved_method;
}
+static bool IsStringConstructor(ArtMethod* method) {
+ ScopedObjectAccess soa(Thread::Current());
+ return method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
+}
+
bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
uint32_t dex_pc,
uint32_t method_idx,
@@ -785,31 +790,46 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
number_of_arguments++;
}
- MethodReference target_method(dex_file_, method_idx);
+ ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
+
+ if (UNLIKELY(resolved_method == nullptr)) {
+ MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
+ HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
+ number_of_arguments,
+ return_type,
+ dex_pc,
+ method_idx,
+ invoke_type);
+ return HandleInvoke(invoke,
+ number_of_vreg_arguments,
+ args,
+ register_index,
+ is_range,
+ descriptor,
+ nullptr, /* clinit_check */
+ true /* is_unresolved */);
+ }
- // Special handling for string init.
- int32_t string_init_offset = 0;
- bool is_string_init = compiler_driver_->IsStringInit(method_idx,
- dex_file_,
- &string_init_offset);
// Replace calls to String.<init> with StringFactory.
- if (is_string_init) {
+ if (IsStringConstructor(resolved_method)) {
+ uint32_t string_init_entry_point = WellKnownClasses::StringInitToEntryPoint(resolved_method);
HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
HInvokeStaticOrDirect::MethodLoadKind::kStringInit,
HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
- dchecked_integral_cast<uint64_t>(string_init_offset),
+ dchecked_integral_cast<uint64_t>(string_init_entry_point),
0U
};
+ MethodReference target_method(dex_file_, method_idx);
HInvoke* invoke = new (arena_) HInvokeStaticOrDirect(
arena_,
number_of_arguments - 1,
Primitive::kPrimNot /*return_type */,
dex_pc,
method_idx,
- target_method,
+ nullptr,
dispatch_info,
invoke_type,
- kStatic /* optimized_invoke_type */,
+ target_method,
HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
return HandleStringInit(invoke,
number_of_vreg_arguments,
@@ -819,26 +839,6 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
descriptor);
}
- ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
-
- if (UNLIKELY(resolved_method == nullptr)) {
- MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
- HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
- number_of_arguments,
- return_type,
- dex_pc,
- method_idx,
- invoke_type);
- return HandleInvoke(invoke,
- number_of_vreg_arguments,
- args,
- register_index,
- is_range,
- descriptor,
- nullptr, /* clinit_check */
- true /* is_unresolved */);
- }
-
// Potential class initialization check, in the case of a static method call.
HClinitCheck* clinit_check = nullptr;
HInvoke* invoke = nullptr;
@@ -853,10 +853,9 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
dex_pc, resolved_method, method_idx, &clinit_check_requirement);
} else if (invoke_type == kSuper) {
if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
- // Update the target method to the one resolved. Note that this may be a no-op if
+ // Update the method index to the one resolved. Note that this may be a no-op if
// we resolved to the method referenced by the instruction.
method_idx = resolved_method->GetDexMethodIndex();
- target_method = MethodReference(dex_file_, method_idx);
}
}
@@ -866,15 +865,17 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
0u,
0U
};
+ MethodReference target_method(resolved_method->GetDexFile(),
+ resolved_method->GetDexMethodIndex());
invoke = new (arena_) HInvokeStaticOrDirect(arena_,
number_of_arguments,
return_type,
dex_pc,
method_idx,
- target_method,
+ resolved_method,
dispatch_info,
invoke_type,
- invoke_type,
+ target_method,
clinit_check_requirement);
} else if (invoke_type == kVirtual) {
ScopedObjectAccess soa(Thread::Current()); // Needed for the method index
@@ -883,16 +884,18 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
return_type,
dex_pc,
method_idx,
+ resolved_method,
resolved_method->GetMethodIndex());
} else {
DCHECK_EQ(invoke_type, kInterface);
- ScopedObjectAccess soa(Thread::Current()); // Needed for the method index
+ ScopedObjectAccess soa(Thread::Current()); // Needed for the IMT index.
invoke = new (arena_) HInvokeInterface(arena_,
number_of_arguments,
return_type,
dex_pc,
method_idx,
- resolved_method->GetImtIndex());
+ resolved_method,
+ ImTable::GetImtIndex(resolved_method));
}
return HandleInvoke(invoke,
@@ -935,7 +938,8 @@ bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc)
IsOutermostCompilingClass(type_index),
dex_pc,
needs_access_check,
- /* is_in_dex_cache */ false);
+ /* is_in_dex_cache */ false,
+ /* is_in_boot_image */ false);
AppendInstruction(load_class);
HInstruction* cls = load_class;
@@ -957,7 +961,7 @@ bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc)
}
static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
}
@@ -1026,7 +1030,8 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke(
is_outer_class,
dex_pc,
/*needs_access_check*/ false,
- /* is_in_dex_cache */ false);
+ /* is_in_dex_cache */ false,
+ /* is_in_boot_image */ false);
AppendInstruction(load_class);
clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
AppendInstruction(clinit_check);
@@ -1059,7 +1064,7 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke,
// reject any class where this is violated. However, the verifier only does these checks
// on non trivially dead instructions, so we just bailout the compilation.
VLOG(compiler) << "Did not compile "
- << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+ << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
<< " because of non-sequential dex register pair in wide argument";
MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
return false;
@@ -1073,7 +1078,7 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke,
if (*argument_index != invoke->GetNumberOfArguments()) {
VLOG(compiler) << "Did not compile "
- << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+ << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
<< " because of wrong number of arguments in invoke instruction";
MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
return false;
@@ -1101,7 +1106,7 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke,
size_t start_index = 0;
size_t argument_index = 0;
- if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) { // Instance call.
+ if (invoke->GetInvokeType() != InvokeType::kStatic) { // Instance call.
uint32_t obj_reg = is_range ? register_index : args[0];
HInstruction* arg = is_unresolved
? LoadLocal(obj_reg, Primitive::kPrimNot)
@@ -1278,7 +1283,7 @@ static mirror::Class* GetClassFrom(CompilerDriver* driver,
ScopedObjectAccess soa(Thread::Current());
StackHandleScope<1> hs(soa.Self());
Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
+ soa.Decode<mirror::ClassLoader>(compilation_unit.GetClassLoader())));
Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache();
return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
@@ -1297,7 +1302,7 @@ bool HInstructionBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
StackHandleScope<3> hs(soa.Self());
Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+ soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader())));
Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
@@ -1338,7 +1343,7 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
StackHandleScope<3> hs(soa.Self());
Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+ soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader())));
ArtField* resolved_field = compiler_driver_->ResolveField(
soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
@@ -1384,7 +1389,8 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
is_outer_class,
dex_pc,
/*needs_access_check*/ false,
- /* is_in_dex_cache */ false);
+ /* is_in_dex_cache */ false,
+ /* is_in_boot_image */ false);
AppendInstruction(constant);
HInstruction* cls = constant;
@@ -1545,8 +1551,6 @@ void HInstructionBuilder::BuildFillArrayData(HInstruction* object,
void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
HInstruction* array = LoadNullCheckedLocal(instruction.VRegA_31t(), dex_pc);
- HInstruction* length = new (arena_) HArrayLength(array, dex_pc);
- AppendInstruction(length);
int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
const Instruction::ArrayDataPayload* payload =
@@ -1554,6 +1558,14 @@ void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uin
const uint8_t* data = payload->data;
uint32_t element_count = payload->element_count;
+ if (element_count == 0u) {
+ // For empty payload we emit only the null check above.
+ return;
+ }
+
+ HInstruction* length = new (arena_) HArrayLength(array, dex_pc);
+ AppendInstruction(length);
+
// Implementation of this DEX instruction seems to be that the bounds check is
// done before doing any stores.
HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1, dex_pc);
@@ -1607,7 +1619,7 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object,
}
static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
if (cls.Get() == nullptr) {
return TypeCheckKind::kUnresolvedCheck;
} else if (cls->IsInterface()) {
@@ -1653,7 +1665,8 @@ void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
IsOutermostCompilingClass(type_index),
dex_pc,
!can_access,
- /* is_in_dex_cache */ false);
+ /* is_in_dex_cache */ false,
+ /* is_in_boot_image */ false);
AppendInstruction(cls);
TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
@@ -1803,7 +1816,20 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
case Instruction::MOVE_OBJECT:
case Instruction::MOVE_OBJECT_16:
case Instruction::MOVE_OBJECT_FROM16: {
- HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot);
+ // The verifier has no notion of a null type, so a move-object of constant 0
+ // will lead to the same constant 0 in the destination register. To mimic
+ // this behavior, we just pretend we haven't seen a type change (int to reference)
+ // for the 0 constant and phis. We rely on our type propagation to eventually get the
+ // types correct.
+ uint32_t reg_number = instruction.VRegB();
+ HInstruction* value = (*current_locals_)[reg_number];
+ if (value->IsIntConstant()) {
+ DCHECK_EQ(value->AsIntConstant()->GetValue(), 0);
+ } else if (value->IsPhi()) {
+ DCHECK(value->GetType() == Primitive::kPrimInt || value->GetType() == Primitive::kPrimNot);
+ } else {
+ value = LoadLocal(reg_number, Primitive::kPrimNot);
+ }
UpdateLocal(instruction.VRegA(), value);
break;
}
@@ -2628,7 +2654,8 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
IsOutermostCompilingClass(type_index),
dex_pc,
!can_access,
- /* is_in_dex_cache */ false));
+ /* is_in_dex_cache */ false,
+ /* is_in_boot_image */ false));
UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
break;
}
@@ -2688,7 +2715,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
default:
VLOG(compiler) << "Did not compile "
- << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+ << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
<< " because of unhandled instruction "
<< instruction.Name();
MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 517cf76831..aa34ddd1d1 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -103,7 +103,7 @@ class HInstructionBuilder : public ValueObject {
bool NeedsAccessCheck(uint32_t type_index,
Handle<mirror::DexCache> dex_cache,
/*out*/bool* finalizable) const
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
template<typename T>
@@ -255,14 +255,14 @@ class HInstructionBuilder : public ValueObject {
ArtMethod* method,
uint32_t method_idx,
HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Build a HNewInstance instruction.
bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
// Return whether the compiler can assume `cls` is initialized.
bool IsInitialized(Handle<mirror::Class> cls) const
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Try to resolve a method using the class linker. Return null if a method could
// not be resolved.
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 4ca0600dba..e4d280f26d 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -18,7 +18,7 @@
#include "intrinsics.h"
#include "mirror/class-inl.h"
-#include "scoped_thread_state_change.h"
+#include "scoped_thread_state_change-inl.h"
namespace art {
@@ -124,20 +124,16 @@ void InstructionSimplifier::Run() {
void InstructionSimplifierVisitor::Run() {
// Iterate in reverse post order to open up more simplifications to users
// of instructions that got simplified.
- for (HReversePostOrderIterator it(*GetGraph()); !it.Done();) {
+ for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
// The simplification of an instruction to another instruction may yield
// possibilities for other simplifications. So although we perform a reverse
// post order visit, we sometimes need to revisit an instruction index.
- simplification_occurred_ = false;
- VisitBasicBlock(it.Current());
- if (simplification_occurred_ &&
- (simplifications_at_current_position_ < kMaxSamePositionSimplifications)) {
- // New simplifications may be applicable to the instruction at the
- // current index, so don't advance the iterator.
- continue;
- }
+ do {
+ simplification_occurred_ = false;
+ VisitBasicBlock(block);
+ } while (simplification_occurred_ &&
+ (simplifications_at_current_position_ < kMaxSamePositionSimplifications));
simplifications_at_current_position_ = 0;
- it.Advance();
}
}
@@ -1577,6 +1573,18 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) {
return;
}
+ if ((input_cst != nullptr) && input_cst->IsOne()
+ && input_other->GetType() == Primitive::kPrimBoolean) {
+ // Replace code looking like
+ // XOR dst, src, 1
+ // with
+ // BOOLEAN_NOT dst, src
+ HBooleanNot* boolean_not = new (GetGraph()->GetArena()) HBooleanNot(input_other);
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, boolean_not);
+ RecordSimplification();
+ return;
+ }
+
if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) {
// Replace code looking like
// XOR dst, src, 0xFFF...FF
@@ -1645,7 +1653,7 @@ void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke,
bool is_left,
Primitive::Type type) {
DCHECK(invoke->IsInvokeStaticOrDirect());
- DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic);
+ DCHECK_EQ(invoke->GetInvokeType(), InvokeType::kStatic);
HInstruction* value = invoke->InputAt(0);
HInstruction* distance = invoke->InputAt(1);
// Replace the invoke with an HRor.
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index 7905104ed4..7fe1067aa9 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -35,9 +35,9 @@ namespace art {
*/
class InstructionSimplifier : public HOptimization {
public:
- InstructionSimplifier(HGraph* graph,
- OptimizingCompilerStats* stats = nullptr,
- const char* name = kInstructionSimplifierPassName)
+ explicit InstructionSimplifier(HGraph* graph,
+ OptimizingCompilerStats* stats = nullptr,
+ const char* name = kInstructionSimplifierPassName)
: HOptimization(graph, name, stats) {}
static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier";
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index 495f3fd232..56e4c7a9c2 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -44,6 +44,14 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) {
size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
Primitive::Type type = instruction->GetType();
+ // TODO: Implement reading (length + compression) for String compression feature from
+ // negative offset (count_offset - data_offset). Thumb2Assembler does not support T4
+ // encoding of "LDR (immediate)" at the moment.
+ // Don't move array pointer if it is charAt because we need to take the count first.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ return;
+ }
+
if (type == Primitive::kPrimLong
|| type == Primitive::kPrimFloat
|| type == Primitive::kPrimDouble) {
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 3d297dacc0..782110c40a 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -48,7 +48,9 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor {
class InstructionSimplifierArm : public HOptimization {
public:
InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
- : HOptimization(graph, "instruction_simplifier_arm", stats) {}
+ : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
+
+ static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm";
void Run() OVERRIDE {
InstructionSimplifierArmVisitor visitor(graph_, stats_);
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6d107d571f..d0dd650024 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -140,6 +140,13 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) {
void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+ // Don't move the array pointer if it is charAt because we need to take the count first.
+ // TODO: Implement reading (length + compression) for String compression feature from
+ // negative offset (count_offset - data_offset) using LDP and clobbering an extra temporary.
+ // Note that "LDR (Immediate)" does not have a "signed offset" encoding.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ return;
+ }
if (TryExtractArrayAccessAddress(instruction,
instruction->GetArray(),
instruction->GetIndex(),
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 28648b3bea..f71684efe9 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -82,8 +82,9 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
class InstructionSimplifierArm64 : public HOptimization {
public:
InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats)
- : HOptimization(graph, "instruction_simplifier_arm64", stats) {}
-
+ : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
+ static constexpr const char* kInstructionSimplifierArm64PassName
+ = "instruction_simplifier_arm64";
void Run() OVERRIDE {
InstructionSimplifierArm64Visitor visitor(graph_, stats_);
visitor.VisitReversePostOrder();
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 8f7778fe68..04e063c92e 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -259,7 +259,8 @@ bool TryExtractArrayAccessAddress(HInstruction* access,
HIntConstant* offset = graph->GetIntConstant(data_offset);
HIntermediateAddress* address =
new (arena) HIntermediateAddress(array, offset, kNoDexPc);
- address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
+ // TODO: Is it ok to not have this on the intermediate address?
+ // address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
access->GetBlock()->InsertInstructionBefore(address, access);
access->ReplaceInput(address, 0);
// Both instructions must depend on GC to prevent any instruction that can
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 418d59c6cb..fc6ff7b197 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -18,14 +18,11 @@
#include "art_method.h"
#include "class_linker.h"
-#include "dex/quick/dex_file_method_inliner.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "driver/compiler_driver.h"
#include "invoke_type.h"
#include "mirror/dex_cache-inl.h"
#include "nodes.h"
-#include "quick/inline_method_analyser.h"
-#include "scoped_thread_state_change.h"
+#include "scoped_thread_state_change-inl.h"
#include "thread-inl.h"
#include "utils.h"
@@ -36,7 +33,7 @@ static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) {
switch (i) {
case Intrinsics::kNone:
return kInterface; // Non-sensical for intrinsic.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
return IsStatic;
#include "intrinsics_list.h"
@@ -52,7 +49,7 @@ static inline IntrinsicNeedsEnvironmentOrCache NeedsEnvironmentOrCache(Intrinsic
switch (i) {
case Intrinsics::kNone:
return kNeedsEnvironmentOrCache; // Non-sensical for intrinsic.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
return NeedsEnvironmentOrCache;
#include "intrinsics_list.h"
@@ -68,7 +65,7 @@ static inline IntrinsicSideEffects GetSideEffects(Intrinsics i) {
switch (i) {
case Intrinsics::kNone:
return kAllSideEffects;
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
return SideEffects;
#include "intrinsics_list.h"
@@ -84,7 +81,7 @@ static inline IntrinsicExceptions GetExceptions(Intrinsics i) {
switch (i) {
case Intrinsics::kNone:
return kCanThrow;
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
return Exceptions;
#include "intrinsics_list.h"
@@ -95,430 +92,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
return kCanThrow;
}
-static Primitive::Type GetType(uint64_t data, bool is_op_size) {
- if (is_op_size) {
- switch (static_cast<OpSize>(data)) {
- case kSignedByte:
- return Primitive::kPrimByte;
- case kSignedHalf:
- return Primitive::kPrimShort;
- case k32:
- return Primitive::kPrimInt;
- case k64:
- return Primitive::kPrimLong;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << data;
- UNREACHABLE();
- }
- } else {
- if ((data & kIntrinsicFlagIsLong) != 0) {
- return Primitive::kPrimLong;
- }
- if ((data & kIntrinsicFlagIsObject) != 0) {
- return Primitive::kPrimNot;
- }
- return Primitive::kPrimInt;
- }
-}
-
-static Intrinsics GetIntrinsic(InlineMethod method) {
- switch (method.opcode) {
- // Floating-point conversions.
- case kIntrinsicDoubleCvt:
- return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
- Intrinsics::kDoubleDoubleToRawLongBits : Intrinsics::kDoubleLongBitsToDouble;
- case kIntrinsicFloatCvt:
- return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
- Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat;
- case kIntrinsicFloat2Int:
- return Intrinsics::kFloatFloatToIntBits;
- case kIntrinsicDouble2Long:
- return Intrinsics::kDoubleDoubleToLongBits;
-
- // Floating-point tests.
- case kIntrinsicFloatIsInfinite:
- return Intrinsics::kFloatIsInfinite;
- case kIntrinsicDoubleIsInfinite:
- return Intrinsics::kDoubleIsInfinite;
- case kIntrinsicFloatIsNaN:
- return Intrinsics::kFloatIsNaN;
- case kIntrinsicDoubleIsNaN:
- return Intrinsics::kDoubleIsNaN;
-
- // Bit manipulations.
- case kIntrinsicReverseBits:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimInt:
- return Intrinsics::kIntegerReverse;
- case Primitive::kPrimLong:
- return Intrinsics::kLongReverse;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- case kIntrinsicReverseBytes:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimShort:
- return Intrinsics::kShortReverseBytes;
- case Primitive::kPrimInt:
- return Intrinsics::kIntegerReverseBytes;
- case Primitive::kPrimLong:
- return Intrinsics::kLongReverseBytes;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- case kIntrinsicRotateRight:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimInt:
- return Intrinsics::kIntegerRotateRight;
- case Primitive::kPrimLong:
- return Intrinsics::kLongRotateRight;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- case kIntrinsicRotateLeft:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimInt:
- return Intrinsics::kIntegerRotateLeft;
- case Primitive::kPrimLong:
- return Intrinsics::kLongRotateLeft;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
-
- // Misc data processing.
- case kIntrinsicBitCount:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimInt:
- return Intrinsics::kIntegerBitCount;
- case Primitive::kPrimLong:
- return Intrinsics::kLongBitCount;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- case kIntrinsicCompare:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimInt:
- return Intrinsics::kIntegerCompare;
- case Primitive::kPrimLong:
- return Intrinsics::kLongCompare;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- case kIntrinsicHighestOneBit:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimInt:
- return Intrinsics::kIntegerHighestOneBit;
- case Primitive::kPrimLong:
- return Intrinsics::kLongHighestOneBit;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- case kIntrinsicLowestOneBit:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimInt:
- return Intrinsics::kIntegerLowestOneBit;
- case Primitive::kPrimLong:
- return Intrinsics::kLongLowestOneBit;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- case kIntrinsicNumberOfLeadingZeros:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimInt:
- return Intrinsics::kIntegerNumberOfLeadingZeros;
- case Primitive::kPrimLong:
- return Intrinsics::kLongNumberOfLeadingZeros;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- case kIntrinsicNumberOfTrailingZeros:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimInt:
- return Intrinsics::kIntegerNumberOfTrailingZeros;
- case Primitive::kPrimLong:
- return Intrinsics::kLongNumberOfTrailingZeros;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- case kIntrinsicSignum:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimInt:
- return Intrinsics::kIntegerSignum;
- case Primitive::kPrimLong:
- return Intrinsics::kLongSignum;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
-
- // Abs.
- case kIntrinsicAbsDouble:
- return Intrinsics::kMathAbsDouble;
- case kIntrinsicAbsFloat:
- return Intrinsics::kMathAbsFloat;
- case kIntrinsicAbsInt:
- return Intrinsics::kMathAbsInt;
- case kIntrinsicAbsLong:
- return Intrinsics::kMathAbsLong;
-
- // Min/max.
- case kIntrinsicMinMaxDouble:
- return ((method.d.data & kIntrinsicFlagMin) == 0) ?
- Intrinsics::kMathMaxDoubleDouble : Intrinsics::kMathMinDoubleDouble;
- case kIntrinsicMinMaxFloat:
- return ((method.d.data & kIntrinsicFlagMin) == 0) ?
- Intrinsics::kMathMaxFloatFloat : Intrinsics::kMathMinFloatFloat;
- case kIntrinsicMinMaxInt:
- return ((method.d.data & kIntrinsicFlagMin) == 0) ?
- Intrinsics::kMathMaxIntInt : Intrinsics::kMathMinIntInt;
- case kIntrinsicMinMaxLong:
- return ((method.d.data & kIntrinsicFlagMin) == 0) ?
- Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong;
-
- // More math builtins.
- case kIntrinsicCos:
- return Intrinsics::kMathCos;
- case kIntrinsicSin:
- return Intrinsics::kMathSin;
- case kIntrinsicAcos:
- return Intrinsics::kMathAcos;
- case kIntrinsicAsin:
- return Intrinsics::kMathAsin;
- case kIntrinsicAtan:
- return Intrinsics::kMathAtan;
- case kIntrinsicAtan2:
- return Intrinsics::kMathAtan2;
- case kIntrinsicCbrt:
- return Intrinsics::kMathCbrt;
- case kIntrinsicCosh:
- return Intrinsics::kMathCosh;
- case kIntrinsicExp:
- return Intrinsics::kMathExp;
- case kIntrinsicExpm1:
- return Intrinsics::kMathExpm1;
- case kIntrinsicHypot:
- return Intrinsics::kMathHypot;
- case kIntrinsicLog:
- return Intrinsics::kMathLog;
- case kIntrinsicLog10:
- return Intrinsics::kMathLog10;
- case kIntrinsicNextAfter:
- return Intrinsics::kMathNextAfter;
- case kIntrinsicSinh:
- return Intrinsics::kMathSinh;
- case kIntrinsicTan:
- return Intrinsics::kMathTan;
- case kIntrinsicTanh:
- return Intrinsics::kMathTanh;
-
- // Misc math.
- case kIntrinsicSqrt:
- return Intrinsics::kMathSqrt;
- case kIntrinsicCeil:
- return Intrinsics::kMathCeil;
- case kIntrinsicFloor:
- return Intrinsics::kMathFloor;
- case kIntrinsicRint:
- return Intrinsics::kMathRint;
- case kIntrinsicRoundDouble:
- return Intrinsics::kMathRoundDouble;
- case kIntrinsicRoundFloat:
- return Intrinsics::kMathRoundFloat;
-
- // System.arraycopy.
- case kIntrinsicSystemArrayCopyCharArray:
- return Intrinsics::kSystemArrayCopyChar;
-
- case kIntrinsicSystemArrayCopy:
- return Intrinsics::kSystemArrayCopy;
-
- // Thread.currentThread.
- case kIntrinsicCurrentThread:
- return Intrinsics::kThreadCurrentThread;
-
- // Memory.peek.
- case kIntrinsicPeek:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimByte:
- return Intrinsics::kMemoryPeekByte;
- case Primitive::kPrimShort:
- return Intrinsics::kMemoryPeekShortNative;
- case Primitive::kPrimInt:
- return Intrinsics::kMemoryPeekIntNative;
- case Primitive::kPrimLong:
- return Intrinsics::kMemoryPeekLongNative;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
-
- // Memory.poke.
- case kIntrinsicPoke:
- switch (GetType(method.d.data, true)) {
- case Primitive::kPrimByte:
- return Intrinsics::kMemoryPokeByte;
- case Primitive::kPrimShort:
- return Intrinsics::kMemoryPokeShortNative;
- case Primitive::kPrimInt:
- return Intrinsics::kMemoryPokeIntNative;
- case Primitive::kPrimLong:
- return Intrinsics::kMemoryPokeLongNative;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
-
- // String.
- case kIntrinsicCharAt:
- return Intrinsics::kStringCharAt;
- case kIntrinsicCompareTo:
- return Intrinsics::kStringCompareTo;
- case kIntrinsicEquals:
- return Intrinsics::kStringEquals;
- case kIntrinsicGetCharsNoCheck:
- return Intrinsics::kStringGetCharsNoCheck;
- case kIntrinsicIsEmptyOrLength:
- return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ?
- Intrinsics::kStringLength : Intrinsics::kStringIsEmpty;
- case kIntrinsicIndexOf:
- return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
- Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
- case kIntrinsicNewStringFromBytes:
- return Intrinsics::kStringNewStringFromBytes;
- case kIntrinsicNewStringFromChars:
- return Intrinsics::kStringNewStringFromChars;
- case kIntrinsicNewStringFromString:
- return Intrinsics::kStringNewStringFromString;
-
- case kIntrinsicCas:
- switch (GetType(method.d.data, false)) {
- case Primitive::kPrimNot:
- return Intrinsics::kUnsafeCASObject;
- case Primitive::kPrimInt:
- return Intrinsics::kUnsafeCASInt;
- case Primitive::kPrimLong:
- return Intrinsics::kUnsafeCASLong;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- case kIntrinsicUnsafeGet: {
- const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile);
- switch (GetType(method.d.data, false)) {
- case Primitive::kPrimInt:
- return is_volatile ? Intrinsics::kUnsafeGetVolatile : Intrinsics::kUnsafeGet;
- case Primitive::kPrimLong:
- return is_volatile ? Intrinsics::kUnsafeGetLongVolatile : Intrinsics::kUnsafeGetLong;
- case Primitive::kPrimNot:
- return is_volatile ? Intrinsics::kUnsafeGetObjectVolatile : Intrinsics::kUnsafeGetObject;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- }
- case kIntrinsicUnsafePut: {
- enum Sync { kNoSync, kVolatile, kOrdered };
- const Sync sync =
- ((method.d.data & kIntrinsicFlagIsVolatile) != 0) ? kVolatile :
- ((method.d.data & kIntrinsicFlagIsOrdered) != 0) ? kOrdered :
- kNoSync;
- switch (GetType(method.d.data, false)) {
- case Primitive::kPrimInt:
- switch (sync) {
- case kNoSync:
- return Intrinsics::kUnsafePut;
- case kVolatile:
- return Intrinsics::kUnsafePutVolatile;
- case kOrdered:
- return Intrinsics::kUnsafePutOrdered;
- }
- break;
- case Primitive::kPrimLong:
- switch (sync) {
- case kNoSync:
- return Intrinsics::kUnsafePutLong;
- case kVolatile:
- return Intrinsics::kUnsafePutLongVolatile;
- case kOrdered:
- return Intrinsics::kUnsafePutLongOrdered;
- }
- break;
- case Primitive::kPrimNot:
- switch (sync) {
- case kNoSync:
- return Intrinsics::kUnsafePutObject;
- case kVolatile:
- return Intrinsics::kUnsafePutObjectVolatile;
- case kOrdered:
- return Intrinsics::kUnsafePutObjectOrdered;
- }
- break;
- default:
- LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
- UNREACHABLE();
- }
- break;
- }
-
- // 1.8.
- case kIntrinsicUnsafeGetAndAddInt:
- return Intrinsics::kUnsafeGetAndAddInt;
- case kIntrinsicUnsafeGetAndAddLong:
- return Intrinsics::kUnsafeGetAndAddLong;
- case kIntrinsicUnsafeGetAndSetInt:
- return Intrinsics::kUnsafeGetAndSetInt;
- case kIntrinsicUnsafeGetAndSetLong:
- return Intrinsics::kUnsafeGetAndSetLong;
- case kIntrinsicUnsafeGetAndSetObject:
- return Intrinsics::kUnsafeGetAndSetObject;
- case kIntrinsicUnsafeLoadFence:
- return Intrinsics::kUnsafeLoadFence;
- case kIntrinsicUnsafeStoreFence:
- return Intrinsics::kUnsafeStoreFence;
- case kIntrinsicUnsafeFullFence:
- return Intrinsics::kUnsafeFullFence;
-
- // Virtual cases.
-
- case kIntrinsicReferenceGetReferent:
- return Intrinsics::kReferenceGetReferent;
-
- // Quick inliner cases. Remove after refactoring. They are here so that we can use the
- // compiler to warn on missing cases.
-
- case kInlineOpNop:
- case kInlineOpReturnArg:
- case kInlineOpNonWideConst:
- case kInlineOpIGet:
- case kInlineOpIPut:
- case kInlineOpConstructor:
- return Intrinsics::kNone;
-
- // String init cases, not intrinsics.
-
- case kInlineStringInit:
- return Intrinsics::kNone;
-
- // No default case to make the compiler warn on missing cases.
- }
- return Intrinsics::kNone;
-}
-
-static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile& dex_file) {
- // The DexFileMethodInliner should have checked whether the methods are agreeing with
- // what we expect, i.e., static methods are called as such. Add another check here for
- // our expectations:
- //
+static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) {
// Whenever the intrinsic is marked as static, report an error if we find an InvokeVirtual.
//
// Whenever the intrinsic is marked as direct and we find an InvokeVirtual, a devirtualization
@@ -532,9 +106,7 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile
// inline. If the precise type is known, however, the instruction will be sharpened to an
// InvokeStaticOrDirect.
InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
- InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
- invoke->AsInvokeStaticOrDirect()->GetOptimizedInvokeType() :
- invoke->IsInvokeVirtual() ? kVirtual : kSuper;
+ InvokeType invoke_type = invoke->GetInvokeType();
switch (intrinsic_type) {
case kStatic:
return (invoke_type == kStatic);
@@ -544,13 +116,9 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile
return true;
}
if (invoke_type == kVirtual) {
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ ArtMethod* art_method = invoke->GetResolvedMethod();
ScopedObjectAccess soa(Thread::Current());
- ArtMethod* art_method =
- class_linker->FindDexCache(soa.Self(), dex_file)->GetResolvedMethod(
- invoke->GetDexMethodIndex(), class_linker->GetImagePointerSize());
- return art_method != nullptr &&
- (art_method->IsFinal() || art_method->GetDeclaringClass()->IsFinal());
+ return (art_method->IsFinal() || art_method->GetDeclaringClass()->IsFinal());
}
return false;
@@ -563,35 +131,28 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile
}
}
-// TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod.
void IntrinsicsRecognizer::Run() {
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ ScopedObjectAccess soa(Thread::Current());
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
inst_it.Advance()) {
HInstruction* inst = inst_it.Current();
if (inst->IsInvoke()) {
HInvoke* invoke = inst->AsInvoke();
- InlineMethod method;
- const DexFile& dex_file = invoke->GetDexFile();
- DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(&dex_file);
- DCHECK(inliner != nullptr);
- if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) {
- Intrinsics intrinsic = GetIntrinsic(method);
-
- if (intrinsic != Intrinsics::kNone) {
- if (!CheckInvokeType(intrinsic, invoke, dex_file)) {
- LOG(WARNING) << "Found an intrinsic with unexpected invoke type: "
- << intrinsic << " for "
- << PrettyMethod(invoke->GetDexMethodIndex(), invoke->GetDexFile())
- << invoke->DebugName();
- } else {
- invoke->SetIntrinsic(intrinsic,
- NeedsEnvironmentOrCache(intrinsic),
- GetSideEffects(intrinsic),
- GetExceptions(intrinsic));
- MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized);
- }
+ ArtMethod* art_method = invoke->GetResolvedMethod();
+ if (art_method != nullptr && art_method->IsIntrinsic()) {
+ Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic());
+ if (!CheckInvokeType(intrinsic, invoke)) {
+ LOG(WARNING) << "Found an intrinsic with unexpected invoke type: "
+ << intrinsic << " for "
+ << invoke->GetDexFile().PrettyMethod(invoke->GetDexMethodIndex())
+ << invoke->DebugName();
+ } else {
+ invoke->SetIntrinsic(intrinsic,
+ NeedsEnvironmentOrCache(intrinsic),
+ GetSideEffects(intrinsic),
+ GetExceptions(intrinsic));
+ MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized);
}
}
}
@@ -604,7 +165,7 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
case Intrinsics::kNone:
os << "None";
break;
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
os << # Name; \
break;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 3429a8fdbb..1e73cf67df 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -27,9 +27,6 @@ namespace art {
class CompilerDriver;
class DexFile;
-// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751
-static constexpr bool kRoundIsPlusPointFive = false;
-
// Positive floating-point infinities.
static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U;
static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000);
@@ -37,17 +34,14 @@ static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000)
// Recognize intrinsics from HInvoke nodes.
class IntrinsicsRecognizer : public HOptimization {
public:
- IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats)
- : HOptimization(graph, kIntrinsicsRecognizerPassName, stats),
- driver_(driver) {}
+ IntrinsicsRecognizer(HGraph* graph, OptimizingCompilerStats* stats)
+ : HOptimization(graph, kIntrinsicsRecognizerPassName, stats) {}
void Run() OVERRIDE;
static constexpr const char* kIntrinsicsRecognizerPassName = "intrinsics_recognition";
private:
- CompilerDriver* driver_;
-
DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer);
};
@@ -61,7 +55,7 @@ class IntrinsicVisitor : public ValueObject {
switch (invoke->GetIntrinsic()) {
case Intrinsics::kNone:
return;
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, ...) \
case Intrinsics::k ## Name: \
Visit ## Name(invoke); \
return;
@@ -76,7 +70,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, ...) \
virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
#include "intrinsics_list.h"
@@ -246,6 +240,24 @@ UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence) \
UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence) \
UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence)
+template <typename IntrinsicLocationsBuilder, typename Codegenerator>
+bool IsCallFreeIntrinsic(HInvoke* invoke, Codegenerator* codegen) {
+ if (invoke->GetIntrinsic() != Intrinsics::kNone) {
+ // This invoke may have intrinsic code generation defined. However, we must
+ // now also determine if this code generation is truly there and call-free
+ // (not unimplemented, no bail on instruction features, or call on slow path).
+ // This is done by actually calling the locations builder on the instruction
+ // and clearing out the locations once result is known. We assume this
+ // call only has creating locations as side effects!
+ // TODO: Avoid wasting Arena memory.
+ IntrinsicLocationsBuilder builder(codegen);
+ bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall();
+ invoke->SetLocations(nullptr);
+ return success;
+ }
+ return false;
+}
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_H_
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index be061f53f7..8790c1e4f1 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -41,6 +41,97 @@ ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() {
using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ Register dest = locations->InAt(2).AsRegister<Register>();
+ Location dest_pos = locations->InAt(3);
+ Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
+ Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
+ Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
+ Register tmp = locations->GetTemp(3).AsRegister<Register>();
+
+ __ Bind(GetEntryLabel());
+ // Compute the base destination address in `dst_curr_addr`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(dst_curr_addr, dest, element_size * constant + offset);
+ } else {
+ __ add(dst_curr_addr,
+ dest,
+ ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(dst_curr_addr, offset);
+ }
+
+ Label loop;
+ __ Bind(&loop);
+ __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex));
+ __ MaybeUnpoisonHeapReference(tmp);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // tmp = ReadBarrier::Mark(tmp);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(tmp, SP);
+ DCHECK_NE(tmp, LR);
+ DCHECK_NE(tmp, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary (and not preserved). It thus cannot be used by
+ // any live register in this slow path.
+ DCHECK_NE(src_curr_addr, IP);
+ DCHECK_NE(dst_curr_addr, IP);
+ DCHECK_NE(src_stop_addr, IP);
+ DCHECK_NE(tmp, IP);
+ DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(tmp);
+ __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex));
+ __ cmp(src_curr_addr, ShifterOperand(src_stop_addr));
+ __ b(&loop, NE);
+ __ b(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM);
+};
+
+#undef __
+
+IntrinsicLocationsBuilderARM::IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen)
+ : arena_(codegen->GetGraph()->GetArena()),
+ assembler_(codegen->GetAssembler()),
+ features_(codegen->GetInstructionSetFeatures()) {}
+
bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
Dispatch(invoke);
LocationSummary* res = invoke->GetLocations();
@@ -561,15 +652,18 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- can_call ?
- LocationSummary::kCallOnSlowPath :
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(),
- can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// We need a temporary register for the read barrier marking slow
// path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
@@ -797,8 +891,13 @@ void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
HInvoke* invoke,
Primitive::Type type) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -807,36 +906,65 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
locations->SetInAt(4, Location::RequiresRegister());
// If heap poisoning is enabled, we don't want the unpoisoning
- // operations to potentially clobber the output.
- Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot)
+ // operations to potentially clobber the output. Likewise when
+ // emitting a (Baker) read barrier, which may call.
+ Location::OutputOverlap overlaps =
+ ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
? Location::kOutputOverlap
: Location::kNoOutputOverlap;
locations->SetOut(Location::RequiresRegister(), overlaps);
+ // Temporary registers used in CAS. In the object case
+ // (UnsafeCASObject intrinsic), these are also used for
+ // card-marking, and possibly for (Baker) read barrier.
locations->AddTemp(Location::RequiresRegister()); // Pointer.
locations->AddTemp(Location::RequiresRegister()); // Temp 1.
}
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) {
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* codegen) {
DCHECK_NE(type, Primitive::kPrimLong);
ArmAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
- Register out = locations->Out().AsRegister<Register>(); // Boolean result.
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>(); // Boolean result.
- Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
- Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Offset (discard high 4B).
- Register expected_lo = locations->InAt(3).AsRegister<Register>(); // Expected.
- Register value_lo = locations->InAt(4).AsRegister<Register>(); // Value.
+ Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
+ Location offset_loc = locations->InAt(2);
+ Register offset = offset_loc.AsRegisterPairLow<Register>(); // Offset (discard high 4B).
+ Register expected = locations->InAt(3).AsRegister<Register>(); // Expected.
+ Register value = locations->InAt(4).AsRegister<Register>(); // Value.
- Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>(); // Pointer to actual memory.
- Register tmp_lo = locations->GetTemp(1).AsRegister<Register>(); // Value in memory.
+ Location tmp_ptr_loc = locations->GetTemp(0);
+ Register tmp_ptr = tmp_ptr_loc.AsRegister<Register>(); // Pointer to actual memory.
+ Register tmp = locations->GetTemp(1).AsRegister<Register>(); // Value in memory.
if (type == Primitive::kPrimNot) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
// Mark card for object assuming new value is stored. Worst case we will mark an unchanged
// object and scan the receiver at the next GC for nothing.
bool value_can_be_null = true; // TODO: Worth finding out this information?
- codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo, value_can_be_null);
+ codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ out_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ /* offset */ 0u,
+ /* index */ offset_loc,
+ ScaleFactor::TIMES_1,
+ tmp_ptr_loc,
+ /* needs_null_check */ false,
+ /* always_update_field */ true,
+ &tmp);
+ }
}
// Prevent reordering with prior memory operations.
@@ -848,12 +976,12 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
__ add(tmp_ptr, base, ShifterOperand(offset));
if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
- codegen->GetAssembler()->PoisonHeapReference(expected_lo);
- if (value_lo == expected_lo) {
- // Do not poison `value_lo`, as it is the same register as
- // `expected_lo`, which has just been poisoned.
+ __ PoisonHeapReference(expected);
+ if (value == expected) {
+ // Do not poison `value`, as it is the same register as
+ // `expected`, which has just been poisoned.
} else {
- codegen->GetAssembler()->PoisonHeapReference(value_lo);
+ __ PoisonHeapReference(value);
}
}
@@ -865,37 +993,29 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
Label loop_head;
__ Bind(&loop_head);
- // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
- // the reference stored in the object before attempting the CAS,
- // similar to the one in the art::Unsafe_compareAndSwapObject JNI
- // implementation.
- //
- // Note that this code is not (yet) used when read barriers are
- // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject).
- DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
- __ ldrex(tmp_lo, tmp_ptr);
+ __ ldrex(tmp, tmp_ptr);
- __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
+ __ subs(tmp, tmp, ShifterOperand(expected));
__ it(EQ, ItState::kItT);
- __ strex(tmp_lo, value_lo, tmp_ptr, EQ);
- __ cmp(tmp_lo, ShifterOperand(1), EQ);
+ __ strex(tmp, value, tmp_ptr, EQ);
+ __ cmp(tmp, ShifterOperand(1), EQ);
__ b(&loop_head, EQ);
__ dmb(ISH);
- __ rsbs(out, tmp_lo, ShifterOperand(1));
+ __ rsbs(out, tmp, ShifterOperand(1));
__ it(CC);
__ mov(out, ShifterOperand(0), CC);
if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
- codegen->GetAssembler()->UnpoisonHeapReference(expected_lo);
- if (value_lo == expected_lo) {
- // Do not unpoison `value_lo`, as it is the same register as
- // `expected_lo`, which has just been unpoisoned.
+ __ UnpoisonHeapReference(expected);
+ if (value == expected) {
+ // Do not unpoison `value`, as it is the same register as
+ // `expected`, which has just been unpoisoned.
} else {
- codegen->GetAssembler()->UnpoisonHeapReference(value_lo);
+ __ UnpoisonHeapReference(value);
}
}
}
@@ -904,33 +1024,23 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) {
CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+ GenCas(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
- GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+ GenCas(invoke, Primitive::kPrimNot, codegen_);
}
void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
@@ -945,6 +1055,11 @@ void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ // Need temporary registers for String compression's feature.
+ if (mirror::kUseStringCompression) {
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
}
@@ -959,10 +1074,16 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
Register temp0 = locations->GetTemp(0).AsRegister<Register>();
Register temp1 = locations->GetTemp(1).AsRegister<Register>();
Register temp2 = locations->GetTemp(2).AsRegister<Register>();
+ Register temp3, temp4;
+ if (mirror::kUseStringCompression) {
+ temp3 = locations->GetTemp(3).AsRegister<Register>();
+ temp4 = locations->GetTemp(4).AsRegister<Register>();
+ }
Label loop;
Label find_char_diff;
Label end;
+ Label different_compression;
// Get offsets of count and value fields within a string object.
const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -983,20 +1104,40 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
// Reference equality check, return 0 if same reference.
__ subs(out, str, ShifterOperand(arg));
__ b(&end, EQ);
- // Load lengths of this and argument strings.
- __ ldr(temp2, Address(str, count_offset));
- __ ldr(temp1, Address(arg, count_offset));
+ if (mirror::kUseStringCompression) {
+ // Load lengths of this and argument strings.
+ __ ldr(temp3, Address(str, count_offset));
+ __ ldr(temp4, Address(arg, count_offset));
+ // Clean out compression flag from lengths.
+ __ bic(temp0, temp3, ShifterOperand(0x80000000));
+ __ bic(IP, temp4, ShifterOperand(0x80000000));
+ } else {
+ // Load lengths of this and argument strings.
+ __ ldr(temp0, Address(str, count_offset));
+ __ ldr(IP, Address(arg, count_offset));
+ }
// out = length diff.
- __ subs(out, temp2, ShifterOperand(temp1));
+ __ subs(out, temp0, ShifterOperand(IP));
// temp0 = min(len(str), len(arg)).
- __ it(Condition::LT, kItElse);
- __ mov(temp0, ShifterOperand(temp2), Condition::LT);
- __ mov(temp0, ShifterOperand(temp1), Condition::GE);
+ __ it(GT);
+ __ mov(temp0, ShifterOperand(IP), GT);
// Shorter string is empty?
__ CompareAndBranchIfZero(temp0, &end);
+ if (mirror::kUseStringCompression) {
+ // Check if both strings using same compression style to use this comparison loop.
+ __ eors(temp3, temp3, ShifterOperand(temp4));
+ __ b(&different_compression, MI);
+ }
// Store offset of string value in preparation for comparison loop.
__ mov(temp1, ShifterOperand(value_offset));
+ if (mirror::kUseStringCompression) {
+ // For string compression, calculate the number of bytes to compare (not chars).
+ // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
+ __ cmp(temp4, ShifterOperand(0));
+ __ it(GE);
+ __ add(temp0, temp0, ShifterOperand(temp0), GE);
+ }
// Assertions that must hold in order to compare multiple characters at a time.
CHECK_ALIGNED(value_offset, 8);
@@ -1006,6 +1147,7 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
DCHECK_EQ(char_size, 2u);
+ Label find_char_diff_2nd_cmp;
// Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
__ Bind(&loop);
__ ldr(IP, Address(str, temp1));
@@ -1013,43 +1155,113 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
__ cmp(IP, ShifterOperand(temp2));
__ b(&find_char_diff, NE);
__ add(temp1, temp1, ShifterOperand(char_size * 2));
- __ sub(temp0, temp0, ShifterOperand(2));
__ ldr(IP, Address(str, temp1));
__ ldr(temp2, Address(arg, temp1));
__ cmp(IP, ShifterOperand(temp2));
- __ b(&find_char_diff, NE);
+ __ b(&find_char_diff_2nd_cmp, NE);
__ add(temp1, temp1, ShifterOperand(char_size * 2));
- __ subs(temp0, temp0, ShifterOperand(2));
-
- __ b(&loop, GT);
+ // With string compression, we have compared 8 bytes, otherwise 4 chars.
+ __ subs(temp0, temp0, ShifterOperand(mirror::kUseStringCompression ? 8 : 4));
+ __ b(&loop, HI);
__ b(&end);
- // Find the single 16-bit character difference.
+ __ Bind(&find_char_diff_2nd_cmp);
+ if (mirror::kUseStringCompression) {
+ __ subs(temp0, temp0, ShifterOperand(4)); // 4 bytes previously compared.
+ __ b(&end, LS); // Was the second comparison fully beyond the end?
+ } else {
+ // Without string compression, we can start treating temp0 as signed
+ // and rely on the signed comparison below.
+ __ sub(temp0, temp0, ShifterOperand(2));
+ }
+
+ // Find the single character difference.
__ Bind(&find_char_diff);
// Get the bit position of the first character that differs.
__ eor(temp1, temp2, ShifterOperand(IP));
__ rbit(temp1, temp1);
__ clz(temp1, temp1);
- // temp0 = number of 16-bit characters remaining to compare.
- // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and
- // after the end of the shorter string data).
-
- // (temp1 >> 4) = character where difference occurs between the last two words compared, on the
- // interval [0,1] (0 for low half-word different, 1 for high half-word different).
-
- // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just
- // return length diff (out).
- __ cmp(temp0, ShifterOperand(temp1, LSR, 4));
- __ b(&end, LE);
+ // temp0 = number of characters remaining to compare.
+ // (Without string compression, it could be < 1 if a difference is found by the second CMP
+ // in the comparison loop, and after the end of the shorter string data).
+
+ // Without string compression (temp1 >> 4) = character where difference occurs between the last
+ // two words compared, in the interval [0,1].
+ // (0 for low half-word different, 1 for high half-word different).
+ // With string compression, (temp1 << 3) = byte where the difference occurs,
+ // in the interval [0,3].
+
+ // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
+ // the remaining string data, so just return length diff (out).
+ // The comparison is unsigned for string compression, otherwise signed.
+ __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4));
+ __ b(&end, mirror::kUseStringCompression ? LS : LE);
// Extract the characters and calculate the difference.
+ Label uncompressed_string, continue_process;
+ if (mirror::kUseStringCompression) {
+ __ cmp(temp4, ShifterOperand(0));
+ __ b(&uncompressed_string, GE);
+ __ bic(temp1, temp1, ShifterOperand(0x7));
+ __ b(&continue_process);
+ }
+ __ Bind(&uncompressed_string);
__ bic(temp1, temp1, ShifterOperand(0xf));
+ __ Bind(&continue_process);
+
__ Lsr(temp2, temp2, temp1);
__ Lsr(IP, IP, temp1);
+ Label calculate_difference, uncompressed_string_extract_chars;
+ if (mirror::kUseStringCompression) {
+ __ cmp(temp4, ShifterOperand(0));
+ __ b(&uncompressed_string_extract_chars, GE);
+ __ ubfx(temp2, temp2, 0, 8);
+ __ ubfx(IP, IP, 0, 8);
+ __ b(&calculate_difference);
+ }
+ __ Bind(&uncompressed_string_extract_chars);
__ movt(temp2, 0);
__ movt(IP, 0);
+ __ Bind(&calculate_difference);
__ sub(out, IP, ShifterOperand(temp2));
+ __ b(&end);
+
+ if (mirror::kUseStringCompression) {
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ Label loop_arg_compressed, loop_this_compressed, find_diff;
+ // Comparison for different compression style.
+ // This part is when THIS is compressed and ARG is not.
+ __ Bind(&different_compression);
+ __ add(temp2, str, ShifterOperand(value_offset));
+ __ add(temp3, arg, ShifterOperand(value_offset));
+ __ cmp(temp4, ShifterOperand(0));
+ __ b(&loop_arg_compressed, LT);
+
+ __ Bind(&loop_this_compressed);
+ __ ldrb(IP, Address(temp2, c_char_size, Address::PostIndex));
+ __ ldrh(temp4, Address(temp3, char_size, Address::PostIndex));
+ __ cmp(IP, ShifterOperand(temp4));
+ __ b(&find_diff, NE);
+ __ subs(temp0, temp0, ShifterOperand(1));
+ __ b(&loop_this_compressed, GT);
+ __ b(&end);
+
+ // This part is when THIS is not compressed and ARG is.
+ __ Bind(&loop_arg_compressed);
+ __ ldrh(IP, Address(temp2, char_size, Address::PostIndex));
+ __ ldrb(temp4, Address(temp3, c_char_size, Address::PostIndex));
+ __ cmp(IP, ShifterOperand(temp4));
+ __ b(&find_diff, NE);
+ __ subs(temp0, temp0, ShifterOperand(1));
+ __ b(&loop_arg_compressed, GT);
+ __ b(&end);
+
+ // Calculate the difference.
+ __ Bind(&find_diff);
+ __ sub(out, IP, ShifterOperand(temp4));
+ }
__ Bind(&end);
@@ -1086,7 +1298,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
Register temp1 = locations->GetTemp(1).AsRegister<Register>();
Register temp2 = locations->GetTemp(2).AsRegister<Register>();
- Label loop;
+ Label loop, preloop;
Label end;
Label return_true;
Label return_false;
@@ -1120,11 +1332,15 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
__ ldr(temp, Address(str, count_offset));
__ ldr(temp1, Address(arg, count_offset));
// Check if lengths are equal, return false if they're not.
+ // Also compares the compression style, if differs return false.
__ cmp(temp, ShifterOperand(temp1));
__ b(&return_false, NE);
// Return true if both strings are empty.
+ if (mirror::kUseStringCompression) {
+ // Length needs to be masked out first because 0 is treated as compressed.
+ __ bic(temp, temp, ShifterOperand(0x80000000));
+ }
__ cbz(temp, &return_true);
-
// Reference equality check, return true if same reference.
__ cmp(str, ShifterOperand(arg));
__ b(&return_true, EQ);
@@ -1133,10 +1349,19 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
DCHECK_ALIGNED(value_offset, 4);
static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
- __ LoadImmediate(temp1, value_offset);
-
+ if (mirror::kUseStringCompression) {
+ // If not compressed, directly to fast compare. Else do preprocess on length.
+ __ cmp(temp1, ShifterOperand(0));
+ __ b(&preloop, GT);
+ // Mask out compression flag and adjust length for compressed string (8-bit)
+ // as if it is a 16-bit data, new_length = (length + 1) / 2.
+ __ add(temp, temp, ShifterOperand(1));
+ __ Lsr(temp, temp, 1);
+ __ Bind(&preloop);
+ }
// Loop to compare strings 2 characters at a time starting at the front of the string.
// Ok to do this because strings with an odd length are zero-padded.
+ __ LoadImmediate(temp1, value_offset);
__ Bind(&loop);
__ ldr(out, Address(str, temp1));
__ ldr(temp2, Address(arg, temp1));
@@ -1200,10 +1425,8 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke,
__ LoadImmediate(tmp_reg, 0);
}
- __ LoadFromOffset(kLoadWord, LR, TR,
- QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pIndexOf).Int32Value());
+ codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
- __ blx(LR);
if (slow_path != nullptr) {
__ Bind(slow_path->GetExitLabel());
@@ -1212,7 +1435,7 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke,
void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
// best to align the inputs accordingly.
@@ -1232,7 +1455,7 @@ void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
// best to align the inputs accordingly.
@@ -1250,7 +1473,7 @@ void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1270,13 +1493,8 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
codegen_->AddSlowPath(slow_path);
__ b(slow_path->GetEntryLabel(), EQ);
- __ LoadFromOffset(kLoadWord,
- LR,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromBytes).Int32Value());
+ codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
- __ blx(LR);
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
@@ -1292,26 +1510,19 @@ void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke
}
void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) {
- ArmAssembler* assembler = GetAssembler();
-
// No need to emit code checking whether `locations->InAt(2)` is a null
// pointer, as callers of the native method
//
// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
//
// all include a null check on `data` before calling that method.
- __ LoadFromOffset(kLoadWord,
- LR,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromChars).Int32Value());
+ codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
- __ blx(LR);
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1328,18 +1539,16 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke)
codegen_->AddSlowPath(slow_path);
__ b(slow_path->GetEntryLabel(), EQ);
- __ LoadFromOffset(kLoadWord,
- LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromString).Int32Value());
+ codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
- __ blx(LR);
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+
__ Bind(slow_path->GetExitLabel());
}
void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1362,6 +1571,13 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
locations->SetInAt(4, Location::RequiresRegister());
}
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP cannot be used in
+ // ReadBarrierSystemArrayCopySlowPathARM (because that register
+ // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
+ // temporary register from the register allocator.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
static void CheckPosition(ArmAssembler* assembler,
@@ -1427,9 +1643,9 @@ static void CheckPosition(ArmAssembler* assembler,
}
void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
ArmAssembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1438,18 +1654,22 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = locations->InAt(0).AsRegister<Register>();
Location src_pos = locations->InAt(1);
Register dest = locations->InAt(2).AsRegister<Register>();
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
- Register temp1 = locations->GetTemp(0).AsRegister<Register>();
- Register temp2 = locations->GetTemp(1).AsRegister<Register>();
- Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ Location temp3_loc = locations->GetTemp(2);
+ Register temp3 = temp3_loc.AsRegister<Register>();
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
Label conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -1465,7 +1685,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmp(src, ShifterOperand(dest));
- __ b(slow_path->GetEntryLabel(), EQ);
+ __ b(intrinsic_slow_path->GetEntryLabel(), EQ);
}
// Checked when building locations.
@@ -1477,7 +1697,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
__ b(&conditions_on_positions_validated, NE);
}
__ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
- __ b(slow_path->GetEntryLabel(), GT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), GT);
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1490,19 +1710,19 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
} else {
__ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>()));
}
- __ b(slow_path->GetEntryLabel(), LT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), LT);
}
__ Bind(&conditions_on_positions_validated);
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
- __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
- __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
}
// If the length is negative, bail out.
@@ -1511,7 +1731,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ cmp(length.AsRegister<Register>(), ShifterOperand(0));
- __ b(slow_path->GetEntryLabel(), LT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), LT);
}
// Validity checks: source.
@@ -1519,7 +1739,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -1528,7 +1748,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -1537,112 +1757,287 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
- __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
- bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- __ MaybeUnpoisonHeapReference(temp1);
- __ MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
- }
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
- __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ }
- if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp2->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
- __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
- }
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ Label do_copy;
+ __ b(&do_copy, EQ);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ b(intrinsic_slow_path->GetEntryLabel(), NE);
+ }
+ } else {
+ // Non read barrier code.
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
+ bool did_unpoison = false;
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
- __ cmp(temp1, ShifterOperand(temp2));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp2->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- Label do_copy;
- __ b(&do_copy, EQ);
- if (!did_unpoison) {
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ Label do_copy;
+ __ b(&do_copy, EQ);
+ if (!did_unpoison) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
__ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ // No need to unpoison the result, we're comparing against null.
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ b(intrinsic_slow_path->GetEntryLabel(), NE);
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
- // No need to unpoison the result, we're comparing against null.
- __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ b(slow_path->GetEntryLabel(), NE);
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp3` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ }
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
__ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
}
- // Compute base source address, base destination address, and end source address.
-
int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ // Compute the base source address in `temp1`.
if (src_pos.IsConstant()) {
int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
__ AddConstant(temp1, src, element_size * constant + offset);
} else {
- __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2));
+ __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift));
__ AddConstant(temp1, offset);
}
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ AddConstant(temp2, dest, element_size * constant + offset);
- } else {
- __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2));
- __ AddConstant(temp2, offset);
- }
-
+ // Compute the end source address in `temp3`.
if (length.IsConstant()) {
int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
__ AddConstant(temp3, temp1, element_size * constant);
} else {
- __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2));
+ __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift));
}
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- Label loop, done;
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&done, EQ);
- __ Bind(&loop);
- __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
- __ str(IP, Address(temp2, element_size, Address::PostIndex));
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&loop, NE);
- __ Bind(&done);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // The base destination address is computed later, as `temp2` is
+ // used for intermediate computations.
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ Label loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&done, EQ);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `temp2`.
+ __ add(src, src, ShifterOperand(temp2, LSR, 32));
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+ // Carry flag is the last bit shifted out by LSRS.
+ __ b(read_barrier_slow_path->GetEntryLabel(), CS);
+
+ // Fast-path copy.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(temp2, dest, element_size * constant + offset);
+ } else {
+ __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(temp2, offset);
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(temp2, dest, element_size * constant + offset);
+ } else {
+ __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(temp2, offset);
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ Label loop, done;
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&done, EQ);
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -1651,7 +2046,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
Register(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1722,13 +2117,11 @@ static void GenFPToFPCall(HInvoke* invoke,
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(0)));
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(1)));
- __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmPointerSize>(entry).Int32Value());
// Native code uses the soft float ABI.
__ vmovrrd(calling_convention.GetRegisterAt(0),
calling_convention.GetRegisterAt(1),
FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
- __ blx(LR);
- codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+ codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
__ vmovdrr(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
calling_convention.GetRegisterAt(0),
calling_convention.GetRegisterAt(1));
@@ -1748,7 +2141,6 @@ static void GenFPFPToFPCall(HInvoke* invoke,
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(2)));
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(3)));
- __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmPointerSize>(entry).Int32Value());
// Native code uses the soft float ABI.
__ vmovrrd(calling_convention.GetRegisterAt(0),
calling_convention.GetRegisterAt(1),
@@ -1756,8 +2148,7 @@ static void GenFPFPToFPCall(HInvoke* invoke,
__ vmovrrd(calling_convention.GetRegisterAt(2),
calling_convention.GetRegisterAt(3),
FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()));
- __ blx(LR);
- codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+ codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
__ vmovdrr(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
calling_convention.GetRegisterAt(0),
calling_convention.GetRegisterAt(1));
@@ -2070,22 +2461,31 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register src_ptr = locations->GetTemp(1).AsRegister<Register>();
Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
- // src range to copy.
- __ add(src_ptr, srcObj, ShifterOperand(value_offset));
- __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
-
+ Label done, compressed_string_loop;
// dst to be copied.
__ add(dst_ptr, dstObj, ShifterOperand(data_offset));
__ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
__ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
-
- // Do the copy.
- Label loop, remainder, done;
-
// Early out for valid zero-length retrievals.
__ b(&done, EQ);
+ // src range to copy.
+ __ add(src_ptr, srcObj, ShifterOperand(value_offset));
+ Label compressed_string_preloop;
+ if (mirror::kUseStringCompression) {
+ // Location of count in string.
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ // String's length.
+ __ ldr(IP, Address(srcObj, count_offset));
+ __ cmp(IP, ShifterOperand(0));
+ __ b(&compressed_string_preloop, LT);
+ }
+ __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
+
+ // Do the copy.
+ Label loop, remainder;
+
// Save repairing the value of num_chr on the < 4 character path.
__ subs(IP, num_chr, ShifterOperand(4));
__ b(&remainder, LT);
@@ -2114,6 +2514,20 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ subs(num_chr, num_chr, ShifterOperand(1));
__ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
__ b(&remainder, GT);
+ __ b(&done);
+
+ if (mirror::kUseStringCompression) {
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+ __ Bind(&compressed_string_preloop);
+ __ add(src_ptr, src_ptr, ShifterOperand(srcBegin));
+ __ Bind(&compressed_string_loop);
+ __ ldrb(IP, Address(src_ptr, c_char_size, Address::PostIndex));
+ __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
+ __ subs(num_chr, num_chr, ShifterOperand(1));
+ __ b(&compressed_string_loop, GT);
+ }
__ Bind(&done);
}
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
index e01b6fffb8..7f20ea4b1f 100644
--- a/compiler/optimizing/intrinsics_arm.h
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -33,14 +33,11 @@ class CodeGeneratorARM;
class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor {
public:
- IntrinsicLocationsBuilderARM(ArenaAllocator* arena,
- ArmAssembler* assembler,
- const ArmInstructionSetFeatures& features)
- : arena_(arena), assembler_(assembler), features_(features) {}
+ explicit IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen);
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -67,7 +64,7 @@ class IntrinsicCodeGeneratorARM FINAL : public IntrinsicVisitor {
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index e3a9d27a53..db1c022868 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -29,11 +29,11 @@
using namespace vixl::aarch64; // NOLINT(build/namespaces)
-// TODO: make vixl clean wrt -Wshadow.
+// TODO(VIXL): Make VIXL compile with -Wshadow.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
#pragma GCC diagnostic pop
namespace art {
@@ -144,6 +144,73 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
};
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
+ : SlowPathCodeARM64(instruction), tmp_(tmp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+ Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
+ Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
+ Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+ Register tmp_reg = WRegisterFrom(tmp_);
+
+ __ Bind(GetEntryLabel());
+ vixl::aarch64::Label slow_copy_loop;
+ __ Bind(&slow_copy_loop);
+ __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
+ codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // tmp_reg = ReadBarrier::Mark(tmp_reg);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(tmp_.reg(), LR);
+ DCHECK_NE(tmp_.reg(), WSP);
+ DCHECK_NE(tmp_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary (and not preserved). It thus cannot be used by
+ // any live register in this slow path.
+ DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
+ DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
+ DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
+ DCHECK_NE(tmp_.reg(), IP0);
+ DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
+ // This runtime call does not require a stack map.
+ codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
+ __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&slow_copy_loop, ne);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
+
+ private:
+ Location tmp_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
+};
#undef __
bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
@@ -796,9 +863,9 @@ static void GenUnsafeGet(HInvoke* invoke,
codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
trg_loc,
base,
- /* offset */ 0U,
+ /* offset */ 0u,
/* index */ offset_loc,
- /* scale_factor */ 0U,
+ /* scale_factor */ 0u,
temp,
/* needs_null_check */ false,
is_volatile);
@@ -813,7 +880,7 @@ static void GenUnsafeGet(HInvoke* invoke,
if (type == Primitive::kPrimNot) {
DCHECK(trg.IsW());
- codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+ codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
}
}
}
@@ -823,15 +890,18 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- can_call ?
- LocationSummary::kCallOnSlowPath :
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(),
- can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
}
void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
@@ -1016,8 +1086,13 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
HInvoke* invoke,
Primitive::Type type) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -1026,20 +1101,29 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
locations->SetInAt(4, Location::RequiresRegister());
// If heap poisoning is enabled, we don't want the unpoisoning
- // operations to potentially clobber the output.
- Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot)
+ // operations to potentially clobber the output. Likewise when
+ // emitting a (Baker) read barrier, which may call.
+ Location::OutputOverlap overlaps =
+ ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
? Location::kOutputOverlap
: Location::kNoOutputOverlap;
locations->SetOut(Location::RequiresRegister(), overlaps);
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register for (Baker) read barrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) {
MacroAssembler* masm = codegen->GetVIXLAssembler();
+ LocationSummary* locations = invoke->GetLocations();
- Register out = WRegisterFrom(locations->Out()); // Boolean result.
+ Location out_loc = locations->Out();
+ Register out = WRegisterFrom(out_loc); // Boolean result.
Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
- Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
+ Location offset_loc = locations->InAt(2);
+ Register offset = XRegisterFrom(offset_loc); // Long offset.
Register expected = RegisterFrom(locations->InAt(3), type); // Expected.
Register value = RegisterFrom(locations->InAt(4), type); // Value.
@@ -1048,6 +1132,27 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// Mark card for object assuming new value is stored.
bool value_can_be_null = true; // TODO: Worth finding out this information?
codegen->MarkGCCard(base, value, value_can_be_null);
+
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Register temp = WRegisterFrom(locations->GetTemp(0));
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ out_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ /* offset */ 0u,
+ /* index */ offset_loc,
+ /* scale_factor */ 0u,
+ temp,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false,
+ /* always_update_field */ true);
+ }
}
UseScratchRegisterScope temps(masm);
@@ -1075,14 +1180,6 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
vixl::aarch64::Label loop_head, exit_loop;
__ Bind(&loop_head);
- // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
- // the reference stored in the object before attempting the CAS,
- // similar to the one in the art::Unsafe_compareAndSwapObject JNI
- // implementation.
- //
- // Note that this code is not (yet) used when read barriers are
- // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
- DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
__ Ldaxr(tmp_value, MemOperand(tmp_ptr));
__ Cmp(tmp_value, expected);
__ B(&exit_loop, ne);
@@ -1109,14 +1206,9 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1124,22 +1216,17 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+ GenCas(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
+ GenCas(invoke, Primitive::kPrimLong, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
- GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+ GenCas(invoke, Primitive::kPrimNot, codegen_);
}
void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
@@ -1153,6 +1240,11 @@ void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ // Need temporary registers for String compression's feature.
+ if (mirror::kUseStringCompression) {
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
}
@@ -1160,17 +1252,25 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
- Register str = XRegisterFrom(locations->InAt(0));
- Register arg = XRegisterFrom(locations->InAt(1));
+ Register str = InputRegisterAt(invoke, 0);
+ Register arg = InputRegisterAt(invoke, 1);
+ DCHECK(str.IsW());
+ DCHECK(arg.IsW());
Register out = OutputRegister(invoke);
Register temp0 = WRegisterFrom(locations->GetTemp(0));
Register temp1 = WRegisterFrom(locations->GetTemp(1));
Register temp2 = WRegisterFrom(locations->GetTemp(2));
+ Register temp3, temp5;
+ if (mirror::kUseStringCompression) {
+ temp3 = WRegisterFrom(locations->GetTemp(3));
+ temp5 = WRegisterFrom(locations->GetTemp(4));
+ }
vixl::aarch64::Label loop;
vixl::aarch64::Label find_char_diff;
vixl::aarch64::Label end;
+ vixl::aarch64::Label different_compression;
// Get offsets of count and value fields within a string object.
const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1191,9 +1291,18 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
// Reference equality check, return 0 if same reference.
__ Subs(out, str, arg);
__ B(&end, eq);
- // Load lengths of this and argument strings.
- __ Ldr(temp0, MemOperand(str.X(), count_offset));
- __ Ldr(temp1, MemOperand(arg.X(), count_offset));
+ if (mirror::kUseStringCompression) {
+ // Load lengths of this and argument strings.
+ __ Ldr(temp3, HeapOperand(str, count_offset));
+ __ Ldr(temp5, HeapOperand(arg, count_offset));
+ // Clean out compression flag from lengths.
+ __ Bic(temp0, temp3, Operand(static_cast<int32_t>(0x80000000)));
+ __ Bic(temp1, temp5, Operand(static_cast<int32_t>(0x80000000)));
+ } else {
+ // Load lengths of this and argument strings.
+ __ Ldr(temp0, HeapOperand(str, count_offset));
+ __ Ldr(temp1, HeapOperand(arg, count_offset));
+ }
// Return zero if both strings are empty.
__ Orr(out, temp0, temp1);
__ Cbz(out, &end);
@@ -1204,8 +1313,22 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
// Shorter string is empty?
__ Cbz(temp2, &end);
+ if (mirror::kUseStringCompression) {
+ // Check if both strings using same compression style to use this comparison loop.
+ __ Eor(temp3.W(), temp3, Operand(temp5));
+ __ Tbnz(temp3.W(), kWRegSize - 1, &different_compression);
+ }
// Store offset of string value in preparation for comparison loop.
__ Mov(temp1, value_offset);
+ if (mirror::kUseStringCompression) {
+ // For string compression, calculate the number of bytes to compare (not chars).
+ // This could be in theory exceed INT32_MAX, so treat temp2 as unsigned.
+ vixl::aarch64::Label let_it_signed;
+ __ Cmp(temp5, Operand(0));
+ __ B(lt, &let_it_signed);
+ __ Add(temp2, temp2, Operand(temp2));
+ __ Bind(&let_it_signed);
+ }
UseScratchRegisterScope scratch_scope(masm);
Register temp4 = scratch_scope.AcquireX();
@@ -1222,34 +1345,95 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
// Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
__ Bind(&loop);
- __ Ldr(temp4, MemOperand(str.X(), temp1));
- __ Ldr(temp0, MemOperand(arg.X(), temp1));
+ __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
+ __ Ldr(temp0, MemOperand(arg.X(), temp1.X()));
__ Cmp(temp4, temp0);
__ B(ne, &find_char_diff);
__ Add(temp1, temp1, char_size * 4);
- __ Subs(temp2, temp2, 4);
- __ B(gt, &loop);
+ // With string compression, we have compared 8 bytes, otherwise 4 chars.
+ __ Subs(temp2, temp2, (mirror::kUseStringCompression) ? 8 : 4);
+ __ B(hi, &loop);
__ B(&end);
// Promote temp1 to an X reg, ready for EOR.
temp1 = temp1.X();
- // Find the single 16-bit character difference.
+ // Find the single character difference.
__ Bind(&find_char_diff);
// Get the bit position of the first character that differs.
__ Eor(temp1, temp0, temp4);
__ Rbit(temp1, temp1);
__ Clz(temp1, temp1);
- // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then
+ // If the number of chars remaining <= the index where the difference occurs (0-3), then
// the difference occurs outside the remaining string data, so just return length diff (out).
- __ Cmp(temp2, Operand(temp1, LSR, 4));
- __ B(le, &end);
+ // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
+ // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
+ // unsigned when string compression is disabled.
+ // When it's enabled, the comparison must be unsigned.
+ __ Cmp(temp2, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
+ __ B(ls, &end);
// Extract the characters and calculate the difference.
+ vixl::aarch64::Label uncompressed_string, continue_process;
+ if (mirror:: kUseStringCompression) {
+ __ Tbz(temp5, kWRegSize - 1, &uncompressed_string);
+ __ Bic(temp1, temp1, 0x7);
+ __ B(&continue_process);
+ }
+ __ Bind(&uncompressed_string);
__ Bic(temp1, temp1, 0xf);
+ __ Bind(&continue_process);
+
__ Lsr(temp0, temp0, temp1);
__ Lsr(temp4, temp4, temp1);
+ vixl::aarch64::Label uncompressed_string_extract_chars;
+ if (mirror::kUseStringCompression) {
+ __ Tbz(temp5, kWRegSize - 1, &uncompressed_string_extract_chars);
+ __ And(temp4, temp4, 0xff);
+ __ Sub(out, temp4.W(), Operand(temp0.W(), UXTB));
+ __ B(&end);
+ }
+ __ Bind(&uncompressed_string_extract_chars);
__ And(temp4, temp4, 0xffff);
- __ Sub(out, temp4, Operand(temp0, UXTH));
+ __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH));
+ __ B(&end);
+
+ if (mirror::kUseStringCompression) {
+ vixl::aarch64::Label loop_this_compressed, loop_arg_compressed, find_diff;
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ temp0 = temp0.W();
+ temp1 = temp1.W();
+ // Comparison for different compression style.
+ // This part is when THIS is compressed and ARG is not.
+ __ Bind(&different_compression);
+ __ Add(temp0, str, Operand(value_offset));
+ __ Add(temp1, arg, Operand(value_offset));
+ __ Cmp(temp5, Operand(0));
+ __ B(lt, &loop_arg_compressed);
+
+ __ Bind(&loop_this_compressed);
+ __ Ldrb(temp3, MemOperand(temp0.X(), c_char_size, PostIndex));
+ __ Ldrh(temp5, MemOperand(temp1.X(), char_size, PostIndex));
+ __ Cmp(temp3, Operand(temp5));
+ __ B(ne, &find_diff);
+ __ Subs(temp2, temp2, 1);
+ __ B(gt, &loop_this_compressed);
+ __ B(&end);
+
+ // This part is when THIS is not compressed and ARG is.
+ __ Bind(&loop_arg_compressed);
+ __ Ldrh(temp3, MemOperand(temp0.X(), char_size, PostIndex));
+ __ Ldrb(temp5, MemOperand(temp1.X(), c_char_size, PostIndex));
+ __ Cmp(temp3, Operand(temp5));
+ __ B(ne, &find_diff);
+ __ Subs(temp2, temp2, 1);
+ __ B(gt, &loop_arg_compressed);
+ __ B(&end);
+
+ // Calculate the difference.
+ __ Bind(&find_diff);
+ __ Sub(out, temp3.W(), Operand(temp5.W(), UXTH));
+ }
__ Bind(&end);
@@ -1284,7 +1468,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
Register temp1 = WRegisterFrom(locations->GetTemp(0));
Register temp2 = WRegisterFrom(locations->GetTemp(1));
- vixl::aarch64::Label loop;
+ vixl::aarch64::Label loop, preloop;
vixl::aarch64::Label end;
vixl::aarch64::Label return_true;
vixl::aarch64::Label return_false;
@@ -1322,22 +1506,37 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
__ Ldr(temp, MemOperand(str.X(), count_offset));
__ Ldr(temp1, MemOperand(arg.X(), count_offset));
// Check if lengths are equal, return false if they're not.
+ // Also compares the compression style, if differs return false.
__ Cmp(temp, temp1);
__ B(&return_false, ne);
- // Store offset of string value in preparation for comparison loop
- __ Mov(temp1, value_offset);
// Return true if both strings are empty.
+ if (mirror::kUseStringCompression) {
+ // Length needs to be masked out first because 0 is treated as compressed.
+ __ Bic(temp, temp, Operand(static_cast<int32_t>(0x80000000)));
+ }
__ Cbz(temp, &return_true);
// Assertions that must hold in order to compare strings 4 characters at a time.
DCHECK_ALIGNED(value_offset, 8);
static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
+ if (mirror::kUseStringCompression) {
+ // If not compressed, directly to fast compare. Else do preprocess on length.
+ __ Cmp(temp1, Operand(0));
+ __ B(&preloop, gt);
+ // Mask out compression flag and adjust length for compressed string (8-bit)
+ // as if it is a 16-bit data, new_length = (length + 1) / 2
+ __ Add(temp, temp, 1);
+ __ Lsr(temp, temp, 1);
+ }
+
temp1 = temp1.X();
temp2 = temp2.X();
-
// Loop to compare strings 4 characters at a time starting at the beginning of the string.
// Ok to do this because strings are zero-padded to be 8-byte aligned.
+ // Store offset of string value in preparation for comparison loop
+ __ Bind(&preloop);
+ __ Mov(temp1, value_offset);
__ Bind(&loop);
__ Ldr(out, MemOperand(str.X(), temp1));
__ Ldr(temp2, MemOperand(arg.X(), temp1));
@@ -1397,9 +1596,8 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke,
__ Mov(tmp_reg, 0);
}
- __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pIndexOf).Int32Value()));
+ codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
- __ Blr(lr);
if (slow_path != nullptr) {
__ Bind(slow_path->GetExitLabel());
@@ -1408,7 +1606,7 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke,
void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
// best to align the inputs accordingly.
@@ -1428,7 +1626,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
// best to align the inputs accordingly.
@@ -1446,7 +1644,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1466,12 +1664,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke)
codegen_->AddSlowPath(slow_path);
__ B(eq, slow_path->GetEntryLabel());
- __ Ldr(lr,
- MemOperand(tr,
- QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromBytes).Int32Value()));
+ codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
- __ Blr(lr);
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
@@ -1487,25 +1681,19 @@ void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invo
}
void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
- MacroAssembler* masm = GetVIXLAssembler();
-
// No need to emit code checking whether `locations->InAt(2)` is a null
// pointer, as callers of the native method
//
// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
//
// all include a null check on `data` before calling that method.
- __ Ldr(lr,
- MemOperand(tr,
- QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromChars).Int32Value()));
+ codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
- __ Blr(lr);
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
@@ -1522,12 +1710,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke
codegen_->AddSlowPath(slow_path);
__ B(eq, slow_path->GetEntryLabel());
- __ Ldr(lr,
- MemOperand(tr,
- QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromString).Int32Value()));
+ codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
- __ Blr(lr);
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
@@ -1562,13 +1746,9 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke)
}
static void GenFPToFPCall(HInvoke* invoke,
- MacroAssembler* masm,
CodeGeneratorARM64* codegen,
QuickEntrypointEnum entry) {
- __ Ldr(lr, MemOperand(tr,
- GetThreadOffset<kArm64PointerSize>(entry).Int32Value()));
- __ Blr(lr);
- codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+ codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
}
void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
@@ -1576,7 +1756,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCos);
+ GenFPToFPCall(invoke, codegen_, kQuickCos);
}
void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
@@ -1584,7 +1764,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSin);
+ GenFPToFPCall(invoke, codegen_, kQuickSin);
}
void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
@@ -1592,7 +1772,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAcos);
+ GenFPToFPCall(invoke, codegen_, kQuickAcos);
}
void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
@@ -1600,7 +1780,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAsin);
+ GenFPToFPCall(invoke, codegen_, kQuickAsin);
}
void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
@@ -1608,7 +1788,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan);
+ GenFPToFPCall(invoke, codegen_, kQuickAtan);
}
void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
@@ -1616,7 +1796,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCbrt);
+ GenFPToFPCall(invoke, codegen_, kQuickCbrt);
}
void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
@@ -1624,7 +1804,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCosh);
+ GenFPToFPCall(invoke, codegen_, kQuickCosh);
}
void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
@@ -1632,7 +1812,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExp);
+ GenFPToFPCall(invoke, codegen_, kQuickExp);
}
void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
@@ -1640,7 +1820,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExpm1);
+ GenFPToFPCall(invoke, codegen_, kQuickExpm1);
}
void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
@@ -1648,7 +1828,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog);
+ GenFPToFPCall(invoke, codegen_, kQuickLog);
}
void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
@@ -1656,7 +1836,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog10);
+ GenFPToFPCall(invoke, codegen_, kQuickLog10);
}
void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
@@ -1664,7 +1844,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSinh);
+ GenFPToFPCall(invoke, codegen_, kQuickSinh);
}
void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
@@ -1672,7 +1852,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTan);
+ GenFPToFPCall(invoke, codegen_, kQuickTan);
}
void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
@@ -1680,7 +1860,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTanh);
+ GenFPToFPCall(invoke, codegen_, kQuickTanh);
}
void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
@@ -1688,7 +1868,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan2);
+ GenFPToFPCall(invoke, codegen_, kQuickAtan2);
}
void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
@@ -1696,7 +1876,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickHypot);
+ GenFPToFPCall(invoke, codegen_, kQuickHypot);
}
void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
@@ -1704,7 +1884,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
- GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickNextAfter);
+ GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
}
void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1720,6 +1900,10 @@ void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke)
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ // Need temporary register for String compression feature.
+ if (mirror::kUseStringCompression) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1747,29 +1931,41 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register src_ptr = XRegisterFrom(locations->GetTemp(0));
Register num_chr = XRegisterFrom(locations->GetTemp(1));
Register tmp1 = XRegisterFrom(locations->GetTemp(2));
+ Register tmp3;
+ if (mirror::kUseStringCompression) {
+ tmp3 = WRegisterFrom(locations->GetTemp(3));
+ }
UseScratchRegisterScope temps(masm);
Register dst_ptr = temps.AcquireX();
Register tmp2 = temps.AcquireX();
- // src address to copy from.
- __ Add(src_ptr, srcObj, Operand(value_offset));
- __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
+ vixl::aarch64::Label done;
+ vixl::aarch64::Label compressed_string_loop;
+ __ Sub(num_chr, srcEnd, srcBegin);
+ // Early out for valid zero-length retrievals.
+ __ Cbz(num_chr, &done);
// dst address start to copy to.
__ Add(dst_ptr, dstObj, Operand(data_offset));
__ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
- __ Sub(num_chr, srcEnd, srcBegin);
+ // src address to copy from.
+ __ Add(src_ptr, srcObj, Operand(value_offset));
+ vixl::aarch64::Label compressed_string_preloop;
+ if (mirror::kUseStringCompression) {
+ // Location of count in string.
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ // String's length.
+ __ Ldr(tmp3, MemOperand(srcObj, count_offset));
+ __ Tbnz(tmp3, kWRegSize - 1, &compressed_string_preloop);
+ }
+ __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
// Do the copy.
vixl::aarch64::Label loop;
- vixl::aarch64::Label done;
vixl::aarch64::Label remainder;
- // Early out for valid zero-length retrievals.
- __ Cbz(num_chr, &done);
-
// Save repairing the value of num_chr on the < 8 character path.
__ Subs(tmp1, num_chr, 8);
__ B(lt, &remainder);
@@ -1795,6 +1991,20 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Subs(num_chr, num_chr, 1);
__ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
__ B(gt, &remainder);
+ __ B(&done);
+
+ if (mirror::kUseStringCompression) {
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ __ Bind(&compressed_string_preloop);
+ __ Add(src_ptr, src_ptr, Operand(srcBegin));
+ // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+ __ Bind(&compressed_string_loop);
+ __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
+ __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
+ __ Subs(num_chr, num_chr, Operand(1));
+ __ B(gt, &compressed_string_loop);
+ }
__ Bind(&done);
}
@@ -2033,9 +2243,9 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128;
// We want to use two temporary registers in order to reduce the register pressure in arm64.
// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2088,12 +2298,20 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP0, obtained from the VIXL scratch register
+ // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
+ // (because that register is clobbered by ReadBarrierMarkRegX
+ // entry points). Get an extra temporary register from the
+ // register allocator.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2102,6 +2320,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = XRegisterFrom(locations->InAt(0));
Location src_pos = locations->InAt(1);
@@ -2109,10 +2328,12 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
Register temp1 = WRegisterFrom(locations->GetTemp(0));
+ Location temp1_loc = LocationFrom(temp1);
Register temp2 = WRegisterFrom(locations->GetTemp(1));
+ Location temp2_loc = LocationFrom(temp2);
- SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
vixl::aarch64::Label conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -2128,7 +2349,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ Cmp(src, dest);
- __ B(slow_path->GetEntryLabel(), eq);
+ __ B(intrinsic_slow_path->GetEntryLabel(), eq);
}
// Checked when building locations.
DCHECK(!optimizations.GetDestinationIsSource()
@@ -2139,7 +2360,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
__ B(&conditions_on_positions_validated, ne);
}
__ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
- __ B(slow_path->GetEntryLabel(), gt);
+ __ B(intrinsic_slow_path->GetEntryLabel(), gt);
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2148,19 +2369,19 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
}
__ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
- __ B(slow_path->GetEntryLabel(), lt);
+ __ B(intrinsic_slow_path->GetEntryLabel(), lt);
}
__ Bind(&conditions_on_positions_validated);
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
- __ Cbz(src, slow_path->GetEntryLabel());
+ __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
- __ Cbz(dest, slow_path->GetEntryLabel());
+ __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
}
// We have already checked in the LocationsBuilder for the constant case.
@@ -2168,17 +2389,17 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
// If the length is negative, bail out.
- __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
+ __ Tbnz(WRegisterFrom(length), kWRegSize - 1, intrinsic_slow_path->GetEntryLabel());
// If the length >= 128 then (currently) prefer native implementation.
__ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
- __ B(slow_path->GetEntryLabel(), ge);
+ __ B(intrinsic_slow_path->GetEntryLabel(), ge);
}
// Validity checks: source.
CheckSystemArrayCopyPosition(masm,
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -2187,90 +2408,236 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
{
// We use a block to end the scratch scope before the write barrier, thus
// freeing the temporary registers so they can be used in `MarkGCCard`.
UseScratchRegisterScope temps(masm);
+ // Note: Because it is acquired from VIXL's scratch register pool,
+ // `temp3` might be IP0, and thus cannot be used as `ref` argument
+ // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
+ // calls below (see ReadBarrierMarkSlowPathARM64 for more details).
Register temp3 = temps.AcquireW();
+
if (!optimizations.GetDoesNotNeedTypeCheck()) {
// Check whether all elements of the source array are assignable to the component
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ Ldr(temp1, MemOperand(dest, class_offset));
- __ Ldr(temp2, MemOperand(src, class_offset));
- bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
- }
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ Ldr(temp3, HeapOperand(temp1, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ src.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ temp1,
+ component_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+ __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ }
- if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp2->component_type_
- __ Ldr(temp3, HeapOperand(temp2, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
- }
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ dest.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ temp1,
+ component_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+ }
- __ Cmp(temp1, temp2);
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ src.W(),
+ class_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ Cmp(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ vixl::aarch64::Label do_copy;
+ __ B(&do_copy, eq);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ temp1,
+ component_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ Ldr(temp1, HeapOperand(temp1, super_offset));
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ B(intrinsic_slow_path->GetEntryLabel(), ne);
+ }
+ } else {
+ // Non read barrier code.
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ Ldr(temp1, MemOperand(dest, class_offset));
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ Ldr(temp2, MemOperand(src, class_offset));
+ bool did_unpoison = false;
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ Ldr(temp3, HeapOperand(temp1, component_offset));
+ __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- vixl::aarch64::Label do_copy;
- __ B(&do_copy, eq);
- if (!did_unpoison) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp2->component_type_
+ __ Ldr(temp3, HeapOperand(temp2, component_offset));
+ __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ __ Cmp(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ vixl::aarch64::Label do_copy;
+ __ B(&do_copy, eq);
+ if (!did_unpoison) {
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ Ldr(temp1, HeapOperand(temp1, component_offset));
codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ Ldr(temp1, HeapOperand(temp1, super_offset));
+ // No need to unpoison the result, we're comparing against null.
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ B(intrinsic_slow_path->GetEntryLabel(), ne);
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ Ldr(temp1, HeapOperand(temp1, component_offset));
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ Ldr(temp1, HeapOperand(temp1, super_offset));
- // No need to unpoison the result, we're comparing against null.
- __ Cbnz(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ B(slow_path->GetEntryLabel(), ne);
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ Ldr(temp1, HeapOperand(src.W(), class_offset));
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ Ldr(temp3, HeapOperand(temp1, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ src.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ temp1,
+ component_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ Ldr(temp1, HeapOperand(src.W(), class_offset));
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ Ldr(temp2, HeapOperand(temp1, component_offset));
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+ }
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
+ __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
}
Register src_curr_addr = temp1.X();
Register dst_curr_addr = temp2.X();
- Register src_stop_addr = temp3.X();
+ Register src_stop_addr;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP0, obtained from the VIXL scratch
+ // register pool as `temp3`, cannot be used in
+ // ReadBarrierSystemArrayCopySlowPathARM64 (because that
+ // register is clobbered by ReadBarrierMarkRegX entry points).
+ // So another temporary register allocated by the register
+ // allocator instead.
+ DCHECK_EQ(LocationFrom(temp3).reg(), IP0);
+ src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+ } else {
+ src_stop_addr = temp3.X();
+ }
GenSystemArrayCopyAddresses(masm,
Primitive::kPrimNot,
@@ -2283,25 +2650,98 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
dst_curr_addr,
src_stop_addr);
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- vixl::aarch64::Label loop, done;
const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
- __ Bind(&loop);
- __ Cmp(src_curr_addr, src_stop_addr);
- __ B(&done, eq);
- {
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ vixl::aarch64::Label loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&done, eq);
+
Register tmp = temps.AcquireW();
+ // Make sure `tmp` is not IP0, as it is clobbered by
+ // ReadBarrierMarkRegX entry points in
+ // ReadBarrierSystemArrayCopySlowPathARM64.
+ DCHECK_NE(LocationFrom(tmp).reg(), IP0);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `tmp`.
+ __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCodeARM64* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
__ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
__ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&loop, ne);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ vixl::aarch64::Label loop, done;
+ __ Bind(&loop);
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&done, eq);
+ {
+ Register tmp = temps.AcquireW();
+ __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+ __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ }
+ __ B(&loop);
+ __ Bind(&done);
}
- __ B(&loop);
- __ Bind(&done);
}
// We only need one card marking on the destination array.
codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
static void GenIsInfinite(LocationSummary* locations,
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 525153621b..28e41cb086 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -42,7 +42,7 @@ class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor {
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -66,7 +66,7 @@ class IntrinsicCodeGeneratorARM64 FINAL : public IntrinsicVisitor {
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
deleted file mode 100644
index db60238fb4..0000000000
--- a/compiler/optimizing/intrinsics_list.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
-#define ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
-
-// All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected
-// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual), then whether it requires an
-// environment, may have side effects, or may throw exceptions.
-
-#define INTRINSICS_LIST(V) \
- V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(DoubleIsInfinite, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(DoubleIsNaN, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(DoubleLongBitsToDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(FloatFloatToRawIntBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(FloatFloatToIntBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(FloatIsInfinite, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(FloatIsNaN, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(IntegerCompare, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(IntegerHighestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(IntegerLowestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(IntegerSignum, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(LongCompare, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(LongHighestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(LongLowestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(LongRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(LongSignum, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(ShortReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathAbsDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathAbsFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathAbsLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathAbsInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathMinDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathMinFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathMinLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathMinIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathMaxDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathCos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathSin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathAcos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathAsin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathAtan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathAtan2, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathCbrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathCosh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathExp, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathExpm1, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathHypot, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathLog, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathLog10, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathSinh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathTan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathTanh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathSqrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathCeil, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathFloor, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathRint, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
- V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
- V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
- V(MemoryPeekLongNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
- V(MemoryPeekShortNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
- V(MemoryPokeByte, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
- V(MemoryPokeIntNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
- V(MemoryPokeLongNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
- V(MemoryPokeShortNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \
- V(StringCharAt, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
- V(StringCompareTo, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
- V(StringEquals, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
- V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
- V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
- V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
- V(StringIsEmpty, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \
- V(StringLength, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \
- V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeCASInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeCASLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeCASObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeGet, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeGetVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeGetObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeGetObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeGetLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeGetLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafePut, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafePutOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafePutVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafePutObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafePutObjectOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafePutObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafePutLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafePutLongOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafePutLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeGetAndAddInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeGetAndAddLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeGetAndSetInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeGetAndSetLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeGetAndSetObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeLoadFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeStoreFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(UnsafeFullFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
- V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow)
-
-#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
-#undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ // #define is only for lint.
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 9449f79169..5239f8f020 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -634,7 +634,7 @@ static void GenBitCount(LocationSummary* locations,
// For 64-bit quantities, this algorithm gets executed twice, (once
// for in_lo, and again for in_hi), but saves a few instructions
// because the mask values only have to be loaded once. Using this
- // algorithm the count for a 64-bit operand can be performed in 33
+ // algorithm the count for a 64-bit operand can be performed in 29
// instructions compared to a loop-based algorithm which required 47
// instructions.
@@ -687,37 +687,36 @@ static void GenBitCount(LocationSummary* locations,
__ Srl(tmp_lo, tmp_lo, 2);
__ And(tmp_lo, tmp_lo, AT);
__ Addu(tmp_lo, out_lo, tmp_lo);
- __ Srl(out_lo, tmp_lo, 4);
- __ Addu(out_lo, out_lo, tmp_lo);
__ And(out_hi, tmp_hi, AT);
__ Srl(tmp_hi, tmp_hi, 2);
__ And(tmp_hi, tmp_hi, AT);
__ Addu(tmp_hi, out_hi, tmp_hi);
- __ Srl(out_hi, tmp_hi, 4);
- __ Addu(out_hi, out_hi, tmp_hi);
+ // Here we deviate from the original algorithm a bit. We've reached
+ // the stage where the bitfields holding the subtotals are large
+ // enough to hold the combined subtotals for both the low word, and
+ // the high word. This means that we can add the subtotals for the
+ // the high, and low words into a single word, and compute the final
+ // result for both the high, and low words using fewer instructions.
__ LoadConst32(AT, 0x0F0F0F0F);
- __ And(out_lo, out_lo, AT);
- __ And(out_hi, out_hi, AT);
+ __ Addu(TMP, tmp_hi, tmp_lo);
+
+ __ Srl(out, TMP, 4);
+ __ And(out, out, AT);
+ __ And(TMP, TMP, AT);
+ __ Addu(out, out, TMP);
__ LoadConst32(AT, 0x01010101);
if (isR6) {
- __ MulR6(out_lo, out_lo, AT);
-
- __ MulR6(out_hi, out_hi, AT);
+ __ MulR6(out, out, AT);
} else {
- __ MulR2(out_lo, out_lo, AT);
-
- __ MulR2(out_hi, out_hi, AT);
+ __ MulR2(out, out, AT);
}
- __ Srl(out_lo, out_lo, 24);
- __ Srl(out_hi, out_hi, 24);
-
- __ Addu(out, out_hi, out_lo);
+ __ Srl(out, out, 24);
}
}
@@ -1875,7 +1874,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
// int java.lang.String.compareTo(String anotherString)
void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1895,13 +1894,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) {
SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
codegen_->AddSlowPath(slow_path);
__ Beqz(argument, slow_path->GetEntryLabel());
-
- __ LoadFromOffset(kLoadWord,
- T9,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pStringCompareTo).Int32Value());
- __ Jalr(T9);
- __ Nop();
+ codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
__ Bind(slow_path->GetExitLabel());
}
@@ -2055,13 +2048,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
__ Clear(tmp_reg);
}
- __ LoadFromOffset(kLoadWord,
- T9,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pIndexOf).Int32Value());
- __ Jalr(T9);
- __ Nop();
-
+ codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
if (slow_path != nullptr) {
__ Bind(slow_path->GetExitLabel());
}
@@ -2070,7 +2057,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
// int java.lang.String.indexOf(int ch)
void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime
// calling convention. So it's best to align the inputs accordingly.
@@ -2095,7 +2082,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringIndexOf(HInvoke* invoke) {
// int java.lang.String.indexOf(int ch, int fromIndex)
void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime
// calling convention. So it's best to align the inputs accordingly.
@@ -2121,7 +2108,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
// java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2140,14 +2127,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromBytes(HInvoke* invoke)
SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
codegen_->AddSlowPath(slow_path);
__ Beqz(byte_array, slow_path->GetEntryLabel());
-
- __ LoadFromOffset(kLoadWord,
- T9,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromBytes).Int32Value());
- __ Jalr(T9);
- __ Nop();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
__ Bind(slow_path->GetExitLabel());
}
@@ -2165,28 +2145,19 @@ void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromChars(HInvoke* invok
}
void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromChars(HInvoke* invoke) {
- MipsAssembler* assembler = GetAssembler();
-
// No need to emit code checking whether `locations->InAt(2)` is a null
// pointer, as callers of the native method
//
// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
//
// all include a null check on `data` before calling that method.
-
- __ LoadFromOffset(kLoadWord,
- T9,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromChars).Int32Value());
- __ Jalr(T9);
- __ Nop();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
}
// java.lang.StringFactory.newStringFromString(String toCopy)
void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -2202,14 +2173,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromString(HInvoke* invoke)
SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
codegen_->AddSlowPath(slow_path);
__ Beqz(string_to_copy, slow_path->GetEntryLabel());
-
- __ LoadFromOffset(kLoadWord,
- T9,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromString).Int32Value());
- __ Jalr(T9);
- __ Nop();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
@@ -2456,16 +2420,18 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
__ FloorWS(FTMP, in);
__ Mfc1(out, FTMP);
- __ LoadConst32(TMP, 1);
+ if (!IsR6()) {
+ __ LoadConst32(TMP, -1);
+ }
- // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
+ // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0;
__ LoadConst32(AT, std::numeric_limits<int32_t>::max());
__ Bne(AT, out, &finite);
__ Mtc1(ZERO, FTMP);
if (IsR6()) {
__ CmpLtS(FTMP, in, FTMP);
- __ Mfc1(AT, FTMP);
+ __ Mfc1(TMP, FTMP);
} else {
__ ColtS(in, FTMP);
}
@@ -2474,28 +2440,26 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
__ Bind(&finite);
- // TMP = (0.5f <= (in - out)) ? 1 : 0;
+ // TMP = (0.5f <= (in - out)) ? -1 : 0;
__ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float".
__ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
__ SubS(FTMP, in, FTMP);
__ Mtc1(AT, half);
if (IsR6()) {
__ CmpLeS(FTMP, half, FTMP);
- __ Mfc1(AT, FTMP);
+ __ Mfc1(TMP, FTMP);
} else {
__ ColeS(half, FTMP);
}
__ Bind(&add);
- if (IsR6()) {
- __ Selnez(TMP, TMP, AT);
- } else {
+ if (!IsR6()) {
__ Movf(TMP, ZERO);
}
- // Return out += TMP.
- __ Addu(out, out, TMP);
+ // Return out -= TMP.
+ __ Subu(out, out, TMP);
__ Bind(&done);
}
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index 575a7d0a23..e134cb882e 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor {
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -60,7 +60,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor {
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 8d4d3e5e91..1d153e2e18 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1519,7 +1519,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
// int java.lang.String.compareTo(String anotherString)
void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1540,12 +1540,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) {
codegen_->AddSlowPath(slow_path);
__ Beqzc(argument, slow_path->GetEntryLabel());
- __ LoadFromOffset(kLoadDoubleword,
- T9,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pStringCompareTo).Int32Value());
- __ Jalr(T9);
- __ Nop();
+ codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
__ Bind(slow_path->GetExitLabel());
}
@@ -1691,13 +1686,8 @@ static void GenerateStringIndexOf(HInvoke* invoke,
__ Clear(tmp_reg);
}
- __ LoadFromOffset(kLoadDoubleword,
- T9,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pIndexOf).Int32Value());
+ codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
- __ Jalr(T9);
- __ Nop();
if (slow_path != nullptr) {
__ Bind(slow_path->GetExitLabel());
@@ -1707,7 +1697,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
// int java.lang.String.indexOf(int ch)
void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime
// calling convention. So it's best to align the inputs accordingly.
@@ -1728,7 +1718,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) {
// int java.lang.String.indexOf(int ch, int fromIndex)
void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
// We have a hand-crafted assembly stub that follows the runtime
// calling convention. So it's best to align the inputs accordingly.
@@ -1748,7 +1738,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
// java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1768,15 +1758,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke
codegen_->AddSlowPath(slow_path);
__ Beqzc(byte_array, slow_path->GetEntryLabel());
- __ LoadFromOffset(kLoadDoubleword,
- T9,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
- pAllocStringFromBytes).Int32Value());
+ codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
- __ Jalr(T9);
- __ Nop();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
@@ -1794,29 +1777,20 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* inv
}
void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) {
- Mips64Assembler* assembler = GetAssembler();
-
// No need to emit code checking whether `locations->InAt(2)` is a null
// pointer, as callers of the native method
//
// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
//
// all include a null check on `data` before calling that method.
- __ LoadFromOffset(kLoadDoubleword,
- T9,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
- pAllocStringFromChars).Int32Value());
+ codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
- __ Jalr(T9);
- __ Nop();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
// java.lang.StringFactory.newStringFromString(String toCopy)
void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1833,15 +1807,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromString(HInvoke* invok
codegen_->AddSlowPath(slow_path);
__ Beqzc(string_to_copy, slow_path->GetEntryLabel());
- __ LoadFromOffset(kLoadDoubleword,
- T9,
- TR,
- QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize,
- pAllocStringFromString).Int32Value());
+ codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
- __ Jalr(T9);
- __ Nop();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
@@ -1890,11 +1857,11 @@ static void GenHighestOneBit(LocationSummary* locations,
if (type == Primitive::kPrimLong) {
__ Dclz(TMP, in);
__ LoadConst64(AT, INT64_C(0x8000000000000000));
- __ Dsrlv(out, AT, TMP);
+ __ Dsrlv(AT, AT, TMP);
} else {
__ Clz(TMP, in);
__ LoadConst32(AT, 0x80000000);
- __ Srlv(out, AT, TMP);
+ __ Srlv(AT, AT, TMP);
}
// For either value of "type", when "in" is zero, "out" should also
// be zero. Without this extra "and" operation, when "in" is zero,
@@ -1902,7 +1869,7 @@ static void GenHighestOneBit(LocationSummary* locations,
// the MIPS logical shift operations "dsrlv", and "srlv" don't use
// the shift amount (TMP) directly; they use either (TMP % 64) or
// (TMP % 32), respectively.
- __ And(out, out, in);
+ __ And(out, AT, in);
}
// int java.lang.Integer.highestOneBit(int)
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
index 4137fbd1b6..5b95c26a21 100644
--- a/compiler/optimizing/intrinsics_mips64.h
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor {
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -60,7 +60,7 @@ class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor {
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 65f4def48b..aae3899847 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -70,6 +70,105 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ Register src = locations->InAt(0).AsRegister<Register>();
+ Location src_pos = locations->InAt(1);
+ Register dest = locations->InAt(2).AsRegister<Register>();
+ Location dest_pos = locations->InAt(3);
+ Location length = locations->InAt(4);
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+ __ Bind(GetEntryLabel());
+ // In this code path, registers `temp1`, `temp2`, and `temp3`
+ // (resp.) are not used for the base source address, the base
+ // destination address, and the end source address (resp.), as in
+ // other SystemArrayCopy intrinsic code paths. Instead they are
+ // (resp.) used for:
+ // - the loop index (`i`);
+ // - the source index (`src_index`) and the loaded (source)
+ // reference (`value`); and
+ // - the destination index (`dest_index`).
+
+ // i = 0
+ __ xorl(temp1, temp1);
+ NearLabel loop;
+ __ Bind(&loop);
+ // value = src_array[i + src_pos]
+ if (src_pos.IsConstant()) {
+ int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+ int32_t adjusted_offset = offset + constant * element_size;
+ __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
+ } else {
+ __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+ __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
+ }
+ __ MaybeUnpoisonHeapReference(temp2);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // value = ReadBarrier::Mark(value)
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(temp2, ESP);
+ DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
+ // This runtime call does not require a stack map.
+ x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(temp2);
+ // dest_array[i + dest_pos] = value
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ int32_t adjusted_offset = offset + constant * element_size;
+ __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
+ } else {
+ __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+ __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
+ }
+ // ++i
+ __ addl(temp1, Immediate(1));
+ // if (i != length) goto loop
+ x86_codegen->GenerateIntCompare(temp1_loc, length);
+ __ j(kNotEqual, &loop);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
+};
+
+#undef __
+
#define __ assembler->
static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
@@ -752,20 +851,20 @@ void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
}
-// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
-// as it needs 64 bit instructions.
void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
- // See intrinsics.h.
- if (!kRoundIsPlusPointFive) {
- return;
- }
-
// Do we have instruction support?
if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
+ HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
+ DCHECK(static_or_direct != nullptr);
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresFpuRegister());
+ if (static_or_direct->HasSpecialInput() &&
+ invoke->InputAt(
+ static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
locations->SetOut(Location::RequiresRegister());
locations->AddTemp(Location::RequiresFpuRegister());
locations->AddTemp(Location::RequiresFpuRegister());
@@ -774,7 +873,7 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
// We have to fall back to a call to the intrinsic.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly);
+ LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
locations->SetOut(Location::RegisterLocation(EAX));
@@ -784,47 +883,54 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
LocationSummary* locations = invoke->GetLocations();
- if (locations->WillCall()) {
+ if (locations->WillCall()) { // TODO: can we reach this?
InvokeOutOfLineIntrinsic(codegen_, invoke);
return;
}
- // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
Register out = locations->Out().AsRegister<Register>();
- XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
- NearLabel done, nan;
+ NearLabel skip_incr, done;
X86Assembler* assembler = GetAssembler();
- // Generate 0.5 into inPlusPointFive.
- __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
- __ movd(inPlusPointFive, out);
-
- // Add in the input.
- __ addss(inPlusPointFive, in);
-
- // And truncate to an integer.
- __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
-
+ // Since no direct x86 rounding instruction matches the required semantics,
+ // this intrinsic is implemented as follows:
+ // result = floor(in);
+ // if (in - result >= 0.5f)
+ // result = result + 1.0f;
+ __ movss(t2, in);
+ __ roundss(t1, in, Immediate(1));
+ __ subss(t2, t1);
+ if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
+ // Direct constant area available.
+ Register constant_area = locations->InAt(1).AsRegister<Register>();
+ __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area));
+ __ j(kBelow, &skip_incr);
+ __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area));
+ __ Bind(&skip_incr);
+ } else {
+ // No constant area: go through stack.
+ __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
+ __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
+ __ comiss(t2, Address(ESP, 4));
+ __ j(kBelow, &skip_incr);
+ __ addss(t1, Address(ESP, 0));
+ __ Bind(&skip_incr);
+ __ addl(ESP, Immediate(8));
+ }
+
+ // Final conversion to an integer. Unfortunately this also does not have a
+ // direct x86 instruction, since NaN should map to 0 and large positive
+ // values need to be clipped to the extreme value.
__ movl(out, Immediate(kPrimIntMax));
- // maxInt = int-to-float(out)
- __ cvtsi2ss(maxInt, out);
-
- // if inPlusPointFive >= maxInt goto done
- __ comiss(inPlusPointFive, maxInt);
- __ j(kAboveEqual, &done);
-
- // if input == NaN goto nan
- __ j(kUnordered, &nan);
-
- // output = float-to-int-truncate(input)
- __ cvttss2si(out, inPlusPointFive);
- __ jmp(&done);
- __ Bind(&nan);
-
- // output = 0
- __ xorl(out, out);
+ __ cvtsi2ss(t2, out);
+ __ comiss(t1, t2);
+ __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
+ __ movl(out, Immediate(0)); // does not change flags
+ __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
+ __ cvttss2si(out, t1);
__ Bind(&done);
}
@@ -857,7 +963,7 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntry
}
// Now do the actual call.
- __ fs()->call(Address::Absolute(GetThreadOffset<kX86PointerSize>(entry)));
+ codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
// Extract the return value from the FP stack.
__ fstpl(Address(ESP, 0));
@@ -866,8 +972,6 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntry
// And clean up the stack.
__ addl(ESP, Immediate(16));
__ cfi().AdjustCFAOffset(-16);
-
- codegen->RecordPcInfo(invoke, invoke->GetDexPc());
}
void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
@@ -1216,7 +1320,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
// The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1237,7 +1341,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pStringCompareTo)));
+ codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
__ Bind(slow_path->GetExitLabel());
}
@@ -1297,23 +1401,39 @@ void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
__ cmpl(str, arg);
__ j(kEqual, &return_true);
- // Load length of receiver string.
+ // Load length and compression flag of receiver string.
__ movl(ecx, Address(str, count_offset));
- // Check if lengths are equal, return false if they're not.
+ // Check if lengths and compression flags are equal, return false if they're not.
+ // Two identical strings will always have same compression style since
+ // compression style is decided on alloc.
__ cmpl(ecx, Address(arg, count_offset));
__ j(kNotEqual, &return_false);
- // Return true if both strings are empty.
- __ jecxz(&return_true);
+ if (mirror::kUseStringCompression) {
+ NearLabel string_uncompressed;
+ // Differ cases into both compressed or both uncompressed. Different compression style
+ // is cut above.
+ __ cmpl(ecx, Immediate(0));
+ __ j(kGreaterEqual, &string_uncompressed);
+ // Divide string length by 2, rounding up, and continue as if uncompressed.
+ // Merge clearing the compression flag (+0x80000000) with +1 for rounding.
+ __ addl(ecx, Immediate(0x80000001));
+ __ shrl(ecx, Immediate(1));
+ __ Bind(&string_uncompressed);
+ }
+ // Return true if strings are empty.
+ __ jecxz(&return_true);
// Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
__ leal(esi, Address(str, value_offset));
__ leal(edi, Address(arg, value_offset));
- // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths.
+ // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
+ // divisible by 2.
__ addl(ecx, Immediate(1));
__ shrl(ecx, Immediate(1));
- // Assertions that must hold in order to compare strings 2 characters at a time.
+ // Assertions that must hold in order to compare strings 2 characters (uncompressed)
+ // or 4 characters (compressed) at a time.
DCHECK_ALIGNED(value_offset, 4);
static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
@@ -1357,6 +1477,10 @@ static void CreateStringIndexOfLocations(HInvoke* invoke,
locations->AddTemp(Location::RegisterLocation(ECX));
// Need another temporary to be able to compute the result.
locations->AddTemp(Location::RequiresRegister());
+ if (mirror::kUseStringCompression) {
+ // Need another temporary to be able to save unflagged string length.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
static void GenerateStringIndexOf(HInvoke* invoke,
@@ -1374,6 +1498,8 @@ static void GenerateStringIndexOf(HInvoke* invoke,
Register counter = locations->GetTemp(0).AsRegister<Register>();
Register string_length = locations->GetTemp(1).AsRegister<Register>();
Register out = locations->Out().AsRegister<Register>();
+ // Only used when string compression feature is on.
+ Register string_length_flagged;
// Check our assumptions for registers.
DCHECK_EQ(string_obj, EDI);
@@ -1411,6 +1537,12 @@ static void GenerateStringIndexOf(HInvoke* invoke,
// Load string length, i.e., the count field of the string.
__ movl(string_length, Address(string_obj, count_offset));
+ if (mirror::kUseStringCompression) {
+ string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
+ __ movl(string_length_flagged, string_length);
+ // Mask out first bit used as compression flag.
+ __ andl(string_length, Immediate(INT32_MAX));
+ }
// Do a zero-length check.
// TODO: Support jecxz.
@@ -1436,20 +1568,50 @@ static void GenerateStringIndexOf(HInvoke* invoke,
__ cmpl(start_index, Immediate(0));
__ cmovl(kGreater, counter, start_index);
- // Move to the start of the string: string_obj + value_offset + 2 * start_index.
- __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
-
- // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
- // compare.
+ if (mirror::kUseStringCompression) {
+ NearLabel modify_counter, offset_uncompressed_label;
+ __ cmpl(string_length_flagged, Immediate(0));
+ __ j(kGreaterEqual, &offset_uncompressed_label);
+ // Move to the start of the string: string_obj + value_offset + start_index.
+ __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
+ __ jmp(&modify_counter);
+
+ // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+ __ Bind(&offset_uncompressed_label);
+ __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+
+ // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
+ // compare.
+ __ Bind(&modify_counter);
+ } else {
+ __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+ }
__ negl(counter);
__ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
}
- // Everything is set up for repne scasw:
- // * Comparison address in EDI.
- // * Counter in ECX.
- __ repne_scasw();
-
+ if (mirror::kUseStringCompression) {
+ NearLabel uncompressed_string_comparison;
+ NearLabel comparison_done;
+ __ cmpl(string_length_flagged, Immediate(0));
+ __ j(kGreater, &uncompressed_string_comparison);
+
+ // Check if EAX (search_value) is ASCII.
+ __ cmpl(search_value, Immediate(127));
+ __ j(kGreater, &not_found_label);
+ // Comparing byte-per-byte.
+ __ repne_scasb();
+ __ jmp(&comparison_done);
+
+ // Everything is set up for repne scasw:
+ // * Comparison address in EDI.
+ // * Counter in ECX.
+ __ Bind(&uncompressed_string_comparison);
+ __ repne_scasw();
+ __ Bind(&comparison_done);
+ } else {
+ __ repne_scasw();
+ }
// Did we find a match?
__ j(kNotEqual, &not_found_label);
@@ -1490,7 +1652,7 @@ void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1510,9 +1672,8 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromBytes)));
+ codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
@@ -1528,22 +1689,19 @@ void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke
}
void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
- X86Assembler* assembler = GetAssembler();
-
// No need to emit code checking whether `locations->InAt(2)` is a null
// pointer, as callers of the native method
//
// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
//
// all include a null check on `data` before calling that method.
- __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromChars)));
+ codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1560,10 +1718,8 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke)
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ fs()->call(
- Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromString)));
+ codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
@@ -1608,38 +1764,64 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
DCHECK_EQ(char_size, 2u);
- // Compute the address of the destination buffer.
- __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
-
- // Compute the address of the source string.
- if (srcBegin.IsConstant()) {
- // Compute the address of the source string by adding the number of chars from
- // the source beginning to the value offset of a string.
- __ leal(ESI, Address(obj, srcBegin_value * char_size + value_offset));
- } else {
- __ leal(ESI, Address(obj, srcBegin.AsRegister<Register>(),
- ScaleFactor::TIMES_2, value_offset));
- }
-
// Compute the number of chars (words) to move.
- // Now is the time to save ECX, since we don't know if it will be used later.
+ // Save ECX, since we don't know if it will be used later.
__ pushl(ECX);
int stack_adjust = kX86WordSize;
__ cfi().AdjustCFAOffset(stack_adjust);
DCHECK_EQ(srcEnd, ECX);
if (srcBegin.IsConstant()) {
- if (srcBegin_value != 0) {
- __ subl(ECX, Immediate(srcBegin_value));
- }
+ __ subl(ECX, Immediate(srcBegin_value));
} else {
DCHECK(srcBegin.IsRegister());
__ subl(ECX, srcBegin.AsRegister<Register>());
}
- // Do the move.
+ NearLabel done;
+ if (mirror::kUseStringCompression) {
+ // Location of count in string
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ __ pushl(EAX);
+ __ cfi().AdjustCFAOffset(stack_adjust);
+
+ NearLabel copy_loop, copy_uncompressed;
+ __ cmpl(Address(obj, count_offset), Immediate(0));
+ __ j(kGreaterEqual, &copy_uncompressed);
+ // Compute the address of the source string by adding the number of chars from
+ // the source beginning to the value offset of a string.
+ __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
+
+ // Start the loop to copy String's value to Array of Char.
+ __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
+ __ Bind(&copy_loop);
+ __ jecxz(&done);
+ // Use EAX temporary (convert byte from ESI to word).
+ // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
+ __ movzxb(EAX, Address(ESI, 0));
+ __ movw(Address(EDI, 0), EAX);
+ __ leal(EDI, Address(EDI, char_size));
+ __ leal(ESI, Address(ESI, c_char_size));
+ // TODO: Add support for LOOP to X86Assembler.
+ __ subl(ECX, Immediate(1));
+ __ jmp(&copy_loop);
+ __ Bind(&copy_uncompressed);
+ }
+
+ // Do the copy for uncompressed string.
+ // Compute the address of the destination buffer.
+ __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
+ __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
__ rep_movsw();
- // And restore ECX.
+ __ Bind(&done);
+ if (mirror::kUseStringCompression) {
+ // Restore EAX.
+ __ popl(EAX);
+ __ cfi().AdjustCFAOffset(-stack_adjust);
+ }
+ // Restore ECX.
__ popl(ECX);
__ cfi().AdjustCFAOffset(-stack_adjust);
}
@@ -1828,10 +2010,9 @@ static void GenUnsafeGet(HInvoke* invoke,
Register output = output_loc.AsRegister<Register>();
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
- invoke, output_loc, base, src, temp, /* needs_null_check */ false);
+ invoke, output_loc, base, src, /* needs_null_check */ false);
} else {
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
codegen->GenerateReadBarrierSlow(
@@ -1875,10 +2056,13 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- can_call ?
- LocationSummary::kCallOnSlowPath :
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
@@ -1892,12 +2076,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
}
} else {
locations->SetOut(Location::RequiresRegister(),
- can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
- }
- if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
}
}
@@ -2076,10 +2255,16 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
}
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
+ Primitive::Type type,
HInvoke* invoke) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -2099,7 +2284,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty
// Force a byte register for the output.
locations->SetOut(Location::RegisterLocation(EAX));
if (type == Primitive::kPrimNot) {
- // Need temp registers for card-marking.
+ // Need temporary registers for card-marking, and possibly for
+ // (Baker) read barrier.
locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
// Need a byte register for marking.
locations->AddTemp(Location::RegisterLocation(ECX));
@@ -2115,14 +2301,9 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2138,7 +2319,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
Location out = locations->Out();
DCHECK_EQ(out.AsRegister<Register>(), EAX);
+ // The address of the field within the holding object.
+ Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
+
if (type == Primitive::kPrimNot) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+
Register expected = locations->InAt(3).AsRegister<Register>();
// Ensure `expected` is in EAX (required by the CMPXCHG instruction).
DCHECK_EQ(expected, EAX);
@@ -2146,11 +2338,20 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
// Mark card for object assuming new value is stored.
bool value_can_be_null = true; // TODO: Worth finding out this information?
- codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
- locations->GetTemp(1).AsRegister<Register>(),
- base,
- value,
- value_can_be_null);
+ codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ temp1_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ field_addr,
+ /* needs_null_check */ false,
+ /* always_update_field */ true,
+ &temp2);
+ }
bool base_equals_value = (base == value);
if (kPoisonHeapReferences) {
@@ -2158,7 +2359,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
// If `base` and `value` are the same register location, move
// `value` to a temporary register. This way, poisoning
// `value` won't invalidate `base`.
- value = locations->GetTemp(0).AsRegister<Register>();
+ value = temp1;
__ movl(value, base);
}
@@ -2177,19 +2378,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
__ PoisonHeapReference(value);
}
- // TODO: Add a read barrier for the reference stored in the object
- // before attempting the CAS, similar to the one in the
- // art::Unsafe_compareAndSwapObject JNI implementation.
- //
- // Note that this code is not (yet) used when read barriers are
- // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject).
- DCHECK(!kEmitCompilerReadBarrier);
- __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+ __ LockCmpxchgl(field_addr, value);
// LOCK CMPXCHG has full barrier semantics, and we don't need
// scheduling barriers at this time.
- // Convert ZF into the boolean result.
+ // Convert ZF into the Boolean result.
__ setb(kZero, out.AsRegister<Register>());
__ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
@@ -2213,8 +2407,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
// Ensure the expected value is in EAX (required by the CMPXCHG
// instruction).
DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
- __ LockCmpxchgl(Address(base, offset, TIMES_1, 0),
- locations->InAt(4).AsRegister<Register>());
+ __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
} else if (type == Primitive::kPrimLong) {
// Ensure the expected value is in EAX:EDX and that the new
// value is in EBX:ECX (required by the CMPXCHG8B instruction).
@@ -2222,7 +2415,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
- __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
+ __ LockCmpxchg8b(field_addr);
} else {
LOG(FATAL) << "Unexpected CAS type " << type;
}
@@ -2230,7 +2423,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
// LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
// don't need scheduling barriers at this time.
- // Convert ZF into the boolean result.
+ // Convert ZF into the Boolean result.
__ setb(kZero, out.AsRegister<Register>());
__ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
}
@@ -2245,14 +2438,9 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
GenCAS(Primitive::kPrimNot, invoke, codegen_);
}
@@ -2671,9 +2859,9 @@ static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1)
}
void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2703,9 +2891,9 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
X86Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2714,17 +2902,21 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = locations->InAt(0).AsRegister<Register>();
Location src_pos = locations->InAt(1);
Register dest = locations->InAt(2).AsRegister<Register>();
Location dest_pos = locations->InAt(3);
- Location length = locations->InAt(4);
- Register temp1 = locations->GetTemp(0).AsRegister<Register>();
- Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Location length_arg = locations->InAt(4);
+ Location length = length_arg;
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
NearLabel conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -2740,7 +2932,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmpl(src, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2748,7 +2940,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, &conditions_on_positions_validated);
}
__ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
- __ j(kGreater, slow_path->GetEntryLabel());
+ __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2758,10 +2950,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
__ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
} else {
__ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
}
@@ -2770,16 +2962,17 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
__ testl(src, src);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
__ testl(dest, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
- Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+ Location temp3_loc = locations->GetTemp(2);
+ Register temp3 = temp3_loc.AsRegister<Register>();
if (length.IsStackSlot()) {
__ movl(temp3, Address(ESP, length.GetStackIndex()));
length = Location::RegisterLocation(temp3);
@@ -2791,7 +2984,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
// Validity checks: source.
@@ -2799,7 +2992,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -2808,7 +3001,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -2817,72 +3010,159 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
+
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ testl(temp1, temp1);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
__ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(dest, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp2 = temp1->component_type_
- __ movl(temp2, Address(temp1, component_offset));
- __ testl(temp2, temp2);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp2);
- __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- // Re-poison the heap reference to make the compare instruction below
- // compare two poisoned references.
- __ PoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (length.Equals(Location::RegisterLocation(temp3))) {
+ // When Baker read barriers are enabled, register `temp3`,
+ // which in the present case contains the `length` parameter,
+ // will be overwritten below. Make the `length` location
+ // reference the original stack location; it will be moved
+ // back to `temp3` later if necessary.
+ DCHECK(length_arg.IsStackSlot());
+ length = length_arg;
+ }
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false);
+ __ testl(temp2, temp2);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ cmpl(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ NearLabel do_copy;
+ __ j(kEqual, &do_copy);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
} else {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(dest, class_offset));
- }
+ // Non read barrier code.
- // Note: if poisoning is on, we are here comparing two poisoned references.
- __ cmpl(temp1, Address(src, class_offset));
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ movl(temp1, Address(dest, class_offset));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ movl(temp2, Address(temp1, component_offset));
+ __ testl(temp2, temp2);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp2);
+ __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ // Re-poison the heap reference to make the compare instruction below
+ // compare two poisoned references.
+ __ PoisonHeapReference(temp1);
+ }
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- NearLabel do_copy;
- __ j(kEqual, &do_copy);
+ // Note: if heap poisoning is on, we are comparing two poisoned references here.
+ __ cmpl(temp1, Address(src, class_offset));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ NearLabel do_copy;
+ __ j(kEqual, &do_copy);
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
+ }
+ } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+ // Bail out if the source is not a non primitive array.
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
__ MaybeUnpoisonHeapReference(temp1);
// /* HeapReference<Class> */ temp1 = temp1->component_type_
__ movl(temp1, Address(temp1, component_offset));
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
__ MaybeUnpoisonHeapReference(temp1);
- __ cmpl(Address(temp1, super_offset), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
}
- } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ testl(temp1, temp1);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp1);
__ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- // Compute base source address, base destination address, and end source address.
+ // Compute the base source address in `temp1`.
int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
DCHECK_EQ(element_size, 4);
uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
@@ -2893,35 +3173,138 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
}
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp2, Address(dest, element_size * constant + offset));
- } else {
- __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // If it is needed (in the case of the fast-path loop), the base
+ // destination address is computed later, as `temp2` is used for
+ // intermediate computations.
- if (length.IsConstant()) {
- int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp3, Address(temp1, element_size * constant));
+ // Compute the end source address in `temp3`.
+ if (length.IsConstant()) {
+ int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp3, Address(temp1, element_size * constant));
+ } else {
+ if (length.IsStackSlot()) {
+ // Location `length` is again pointing at a stack slot, as
+ // register `temp3` (which was containing the length parameter
+ // earlier) has been overwritten; restore it now
+ DCHECK(length.Equals(length_arg));
+ __ movl(temp3, Address(ESP, length.GetStackIndex()));
+ length = Location::RegisterLocation(temp3);
+ }
+ __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+ }
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // for (size_t i = 0; i != length; ++i) {
+ // dest_array[dest_pos + i] =
+ // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
+ // }
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ NearLabel loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+ constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+ constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // goto slow_path;
+ // At this point, just do the "if" and make sure that flags are preserved until the branch.
+ __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86 memory model.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // We have done the "if" of the gray bit check above, now branch based on the flags.
+ __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+
+ // Set the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp2, Address(dest, element_size * constant + offset));
+ } else {
+ __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ pushl(Address(temp1, 0));
+ __ cfi().AdjustCFAOffset(4);
+ __ popl(Address(temp2, 0));
+ __ cfi().AdjustCFAOffset(-4);
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
} else {
- __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
- }
-
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- NearLabel loop, done;
- __ cmpl(temp1, temp3);
- __ j(kEqual, &done);
- __ Bind(&loop);
- __ pushl(Address(temp1, 0));
- __ cfi().AdjustCFAOffset(4);
- __ popl(Address(temp2, 0));
- __ cfi().AdjustCFAOffset(-4);
- __ addl(temp1, Immediate(element_size));
- __ addl(temp2, Immediate(element_size));
- __ cmpl(temp1, temp3);
- __ j(kNotEqual, &loop);
- __ Bind(&done);
+ // Non read barrier code.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp2, Address(dest, element_size * constant + offset));
+ } else {
+ __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+ }
+
+ // Compute the end source address in `temp3`.
+ if (length.IsConstant()) {
+ int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp3, Address(temp1, element_size * constant));
+ } else {
+ __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ NearLabel loop, done;
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+ __ Bind(&loop);
+ __ pushl(Address(temp1, 0));
+ __ cfi().AdjustCFAOffset(4);
+ __ popl(Address(temp2, 0));
+ __ cfi().AdjustCFAOffset(-4);
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -2930,7 +3313,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
Register(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index 08bd197400..3743cb1371 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor {
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -61,7 +61,7 @@ class IntrinsicCodeGeneratorX86 FINAL : public IntrinsicVisitor {
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 7e0d72930c..cdef22f6de 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -64,6 +64,65 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+ CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
+ CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
+
+ __ Bind(GetEntryLabel());
+ NearLabel loop;
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ // TODO: Inline the mark bit check before calling the runtime?
+ // TMP = ReadBarrier::Mark(TMP);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
+ // This runtime call does not require a stack map.
+ x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(CpuRegister(TMP));
+ __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
+ __ addl(src_curr_addr, Immediate(element_size));
+ __ addl(dst_curr_addr, Immediate(element_size));
+ __ cmpl(src_curr_addr, src_stop_addr);
+ __ j(kNotEqual, &loop);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
+};
+
+#undef __
+
#define __ assembler->
static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -583,6 +642,7 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresRegister());
locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
return;
}
@@ -597,10 +657,7 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
}
void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
- // See intrinsics.h.
- if (kRoundIsPlusPointFive) {
- CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
- }
+ CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
@@ -610,47 +667,41 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
return;
}
- // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- NearLabel done, nan;
+ XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ NearLabel skip_incr, done;
X86_64Assembler* assembler = GetAssembler();
- // Load 0.5 into inPlusPointFive.
- __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
-
- // Add in the input.
- __ addss(inPlusPointFive, in);
-
- // And truncate to an integer.
- __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
-
- // Load maxInt into out.
- codegen_->Load64BitValue(out, kPrimIntMax);
-
- // if inPlusPointFive >= maxInt goto done
- __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
- __ j(kAboveEqual, &done);
-
- // if input == NaN goto nan
- __ j(kUnordered, &nan);
-
- // output = float-to-int-truncate(input)
- __ cvttss2si(out, inPlusPointFive);
- __ jmp(&done);
- __ Bind(&nan);
-
- // output = 0
- __ xorl(out, out);
+ // Since no direct x86 rounding instruction matches the required semantics,
+ // this intrinsic is implemented as follows:
+ // result = floor(in);
+ // if (in - result >= 0.5f)
+ // result = result + 1.0f;
+ __ movss(t2, in);
+ __ roundss(t1, in, Immediate(1));
+ __ subss(t2, t1);
+ __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
+ __ j(kBelow, &skip_incr);
+ __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
+ __ Bind(&skip_incr);
+
+ // Final conversion to an integer. Unfortunately this also does not have a
+ // direct x86 instruction, since NaN should map to 0 and large positive
+ // values need to be clipped to the extreme value.
+ codegen_->Load32BitValue(out, kPrimIntMax);
+ __ cvtsi2ss(t2, out);
+ __ comiss(t1, t2);
+ __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
+ __ movl(out, Immediate(0)); // does not change flags
+ __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
+ __ cvttss2si(out, t1);
__ Bind(&done);
}
void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
- // See intrinsics.h.
- if (kRoundIsPlusPointFive) {
- CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
- }
+ CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
@@ -660,39 +711,36 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
return;
}
- // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- NearLabel done, nan;
+ XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ NearLabel skip_incr, done;
X86_64Assembler* assembler = GetAssembler();
- // Load 0.5 into inPlusPointFive.
- __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
-
- // Add in the input.
- __ addsd(inPlusPointFive, in);
-
- // And truncate to an integer.
- __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
-
- // Load maxLong into out.
+ // Since no direct x86 rounding instruction matches the required semantics,
+ // this intrinsic is implemented as follows:
+ // result = floor(in);
+ // if (in - result >= 0.5)
+ // result = result + 1.0f;
+ __ movsd(t2, in);
+ __ roundsd(t1, in, Immediate(1));
+ __ subsd(t2, t1);
+ __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
+ __ j(kBelow, &skip_incr);
+ __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
+ __ Bind(&skip_incr);
+
+ // Final conversion to an integer. Unfortunately this also does not have a
+ // direct x86 instruction, since NaN should map to 0 and large positive
+ // values need to be clipped to the extreme value.
codegen_->Load64BitValue(out, kPrimLongMax);
-
- // if inPlusPointFive >= maxLong goto done
- __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
- __ j(kAboveEqual, &done);
-
- // if input == NaN goto nan
- __ j(kUnordered, &nan);
-
- // output = double-to-long-truncate(input)
- __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true);
- __ jmp(&done);
- __ Bind(&nan);
-
- // output = 0
- __ xorl(out, out);
+ __ cvtsi2sd(t2, out, /* is64bit */ true);
+ __ comisd(t1, t2);
+ __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
+ __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit
+ __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
+ __ cvttsd2si(out, t1, /* is64bit */ true);
__ Bind(&done);
}
@@ -718,10 +766,8 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
LocationSummary* locations = invoke->GetLocations();
DCHECK(locations->WillCall());
DCHECK(invoke->IsInvokeStaticOrDirect());
- X86_64Assembler* assembler = codegen->GetAssembler();
- __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64PointerSize>(entry), true));
- codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+ codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
}
void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
@@ -1064,9 +1110,9 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1074,9 +1120,9 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
X86_64Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1085,18 +1131,23 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
Location src_pos = locations->InAt(1);
CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
- CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
- CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
- CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
+ Location temp1_loc = locations->GetTemp(0);
+ CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
+ Location temp2_loc = locations->GetTemp(1);
+ CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+ Location temp3_loc = locations->GetTemp(2);
+ CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
+ Location TMP_loc = Location::RegisterLocation(TMP);
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
NearLabel conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -1112,7 +1163,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmpl(src, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1120,7 +1171,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, &conditions_on_positions_validated);
}
__ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
- __ j(kGreater, slow_path->GetEntryLabel());
+ __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1130,10 +1181,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
__ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
} else {
__ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
}
@@ -1142,13 +1193,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
__ testl(src, src);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
__ testl(dest, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
// If the length is negative, bail out.
@@ -1157,7 +1208,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
// Validity checks: source.
@@ -1165,7 +1216,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -1174,7 +1225,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -1183,38 +1234,80 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ movl(temp1, Address(dest, class_offset));
- __ movl(temp2, Address(src, class_offset));
+
bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- __ MaybeUnpoisonHeapReference(temp1);
- __ MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
+ // If heap poisoning is enabled, `temp1` and `temp2` have been
+ // unpoisoned by the the previous calls to
+ // GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ movl(temp1, Address(dest, class_offset));
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ movl(temp2, Address(src, class_offset));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
}
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ TMP = temp1->component_type_
- __ movl(CpuRegister(TMP), Address(temp1, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `TMP` has been unpoisoned by
+ // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ TMP = temp2->component_type_
- __ movl(CpuRegister(TMP), Address(temp2, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ TMP = temp2->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `TMP` has been unpoisoned by
+ // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ TMP = temp2->component_type_
+ __ movl(CpuRegister(TMP), Address(temp2, component_offset));
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
__ cmpl(temp1, temp2);
@@ -1222,34 +1315,56 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (optimizations.GetDestinationIsTypedObjectArray()) {
NearLabel do_copy;
__ j(kEqual, &do_copy);
- if (!did_unpoison) {
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ } else {
+ if (!did_unpoison) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
__ MaybeUnpoisonHeapReference(temp1);
+ // No need to unpoison the following heap reference load, as
+ // we're comparing against null.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ movl(temp1, Address(temp1, super_offset));
- // No need to unpoison the result, we're comparing against null.
- __ testl(temp1, temp1);
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
__ Bind(&do_copy);
} else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ TMP = temp1->component_type_
- __ movl(CpuRegister(TMP), Address(temp1, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+ // No need to unpoison `TMP` now, as we're comparing against null.
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
// Compute base source address, base destination address, and end source address.
@@ -1277,19 +1392,88 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
}
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- NearLabel loop, done;
- __ cmpl(temp1, temp3);
- __ j(kEqual, &done);
- __ Bind(&loop);
- __ movl(CpuRegister(TMP), Address(temp1, 0));
- __ movl(Address(temp2, 0), CpuRegister(TMP));
- __ addl(temp1, Immediate(element_size));
- __ addl(temp2, Immediate(element_size));
- __ cmpl(temp1, temp3);
- __ j(kNotEqual, &loop);
- __ Bind(&done);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ NearLabel loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+ constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+ constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // goto slow_path;
+ // At this point, just do the "if" and make sure that flags are preserved until the branch.
+ __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86-64 memory model.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // We have done the "if" of the gray bit check above, now branch based on the flags.
+ __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(temp1, 0));
+ __ movl(Address(temp2, 0), CpuRegister(TMP));
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ NearLabel loop, done;
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(temp1, 0));
+ __ movl(Address(temp2, 0), CpuRegister(TMP));
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -1298,12 +1482,12 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
CpuRegister(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1324,8 +1508,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, pStringCompareTo),
- /* no_rip */ true));
+ codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
__ Bind(slow_path->GetExitLabel());
}
@@ -1385,14 +1568,27 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
__ cmpl(str, arg);
__ j(kEqual, &return_true);
- // Load length of receiver string.
+ // Load length and compression flag of receiver string.
__ movl(rcx, Address(str, count_offset));
- // Check if lengths are equal, return false if they're not.
+ // Check if lengths and compressiond flags are equal, return false if they're not.
+ // Two identical strings will always have same compression style since
+ // compression style is decided on alloc.
__ cmpl(rcx, Address(arg, count_offset));
__ j(kNotEqual, &return_false);
+
+ if (mirror::kUseStringCompression) {
+ NearLabel string_uncompressed;
+ // Both string are compressed.
+ __ cmpl(rcx, Immediate(0));
+ __ j(kGreaterEqual, &string_uncompressed);
+ // Divide string length by 2, rounding up, and continue as if uncompressed.
+ // Merge clearing the compression flag with +1 for rounding.
+ __ addl(rcx, Immediate(static_cast<int32_t>(0x80000001)));
+ __ shrl(rcx, Immediate(1));
+ __ Bind(&string_uncompressed);
+ }
// Return true if both strings are empty.
__ jrcxz(&return_true);
-
// Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
__ leal(rsi, Address(str, value_offset));
__ leal(rdi, Address(arg, value_offset));
@@ -1401,7 +1597,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
__ addl(rcx, Immediate(3));
__ shrl(rcx, Immediate(2));
- // Assertions that must hold in order to compare strings 4 characters at a time.
+ // Assertions that must hold in order to compare strings 4 characters (uncompressed)
+ // or 8 characters (compressed) at a time.
DCHECK_ALIGNED(value_offset, 8);
static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
@@ -1491,7 +1688,8 @@ static void GenerateStringIndexOf(HInvoke* invoke,
__ j(kAbove, slow_path->GetEntryLabel());
}
- // From here down, we know that we are looking for a char that fits in 16 bits.
+ // From here down, we know that we are looking for a char that fits in
+ // 16 bits (uncompressed) or 8 bits (compressed).
// Location of reference to data array within the String object.
int32_t value_offset = mirror::String::ValueOffset().Int32Value();
// Location of count within the String object.
@@ -1499,6 +1697,12 @@ static void GenerateStringIndexOf(HInvoke* invoke,
// Load string length, i.e., the count field of the string.
__ movl(string_length, Address(string_obj, count_offset));
+ if (mirror::kUseStringCompression) {
+ // Use TMP to keep string_length_flagged.
+ __ movl(CpuRegister(TMP), string_length);
+ // Mask out first bit used as compression flag.
+ __ andl(string_length, Immediate(INT32_MAX));
+ }
// Do a length check.
// TODO: Support jecxz.
@@ -1509,7 +1713,6 @@ static void GenerateStringIndexOf(HInvoke* invoke,
if (start_at_zero) {
// Number of chars to scan is the same as the string length.
__ movl(counter, string_length);
-
// Move to the start of the string.
__ addq(string_obj, Immediate(value_offset));
} else {
@@ -1524,19 +1727,44 @@ static void GenerateStringIndexOf(HInvoke* invoke,
__ cmpl(start_index, Immediate(0));
__ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough.
- // Move to the start of the string: string_obj + value_offset + 2 * start_index.
- __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
-
+ if (mirror::kUseStringCompression) {
+ NearLabel modify_counter, offset_uncompressed_label;
+ __ cmpl(CpuRegister(TMP), Immediate(0));
+ __ j(kGreaterEqual, &offset_uncompressed_label);
+ __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
+ __ jmp(&modify_counter);
+ // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+ __ Bind(&offset_uncompressed_label);
+ __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+ __ Bind(&modify_counter);
+ } else {
+ __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+ }
// Now update ecx, the work counter: it's gonna be string.length - start_index.
__ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
__ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
}
- // Everything is set up for repne scasw:
- // * Comparison address in RDI.
- // * Counter in ECX.
- __ repne_scasw();
-
+ if (mirror::kUseStringCompression) {
+ NearLabel uncompressed_string_comparison;
+ NearLabel comparison_done;
+ __ cmpl(CpuRegister(TMP), Immediate(0));
+ __ j(kGreater, &uncompressed_string_comparison);
+ // Check if RAX (search_value) is ASCII.
+ __ cmpl(search_value, Immediate(127));
+ __ j(kGreater, &not_found_label);
+ // Comparing byte-per-byte.
+ __ repne_scasb();
+ __ jmp(&comparison_done);
+ // Everything is set up for repne scasw:
+ // * Comparison address in RDI.
+ // * Counter in ECX.
+ __ Bind(&uncompressed_string_comparison);
+ __ repne_scasw();
+ __ Bind(&comparison_done);
+ } else {
+ __ repne_scasw();
+ }
// Did we find a match?
__ j(kNotEqual, &not_found_label);
@@ -1577,7 +1805,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1597,11 +1825,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
- pAllocStringFromBytes),
- /* no_rip */ true));
+ codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
@@ -1617,24 +1842,19 @@ void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* inv
}
void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
- X86_64Assembler* assembler = GetAssembler();
-
// No need to emit code checking whether `locations->InAt(2)` is a null
// pointer, as callers of the native method
//
// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
//
// all include a null check on `data` before calling that method.
- __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
- pAllocStringFromChars),
- /* no_rip */ true));
+ codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kCallOnMainAndSlowPath,
kIntrinsified);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -1651,11 +1871,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invok
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
- __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize,
- pAllocStringFromString),
- /* no_rip */ true));
+ codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
@@ -1699,32 +1916,54 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
DCHECK_EQ(char_size, 2u);
- // Compute the address of the destination buffer.
- __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
-
- // Compute the address of the source string.
- if (srcBegin.IsConstant()) {
- // Compute the address of the source string by adding the number of chars from
- // the source beginning to the value offset of a string.
- __ leaq(CpuRegister(RSI), Address(obj, srcBegin_value * char_size + value_offset));
- } else {
- __ leaq(CpuRegister(RSI), Address(obj, srcBegin.AsRegister<CpuRegister>(),
- ScaleFactor::TIMES_2, value_offset));
- }
-
+ NearLabel done;
// Compute the number of chars (words) to move.
__ movl(CpuRegister(RCX), srcEnd);
if (srcBegin.IsConstant()) {
- if (srcBegin_value != 0) {
- __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
- }
+ __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
} else {
DCHECK(srcBegin.IsRegister());
__ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
}
+ if (mirror::kUseStringCompression) {
+ NearLabel copy_uncompressed, copy_loop;
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ // Location of count in string.
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ __ cmpl(Address(obj, count_offset), Immediate(0));
+ __ j(kGreaterEqual, &copy_uncompressed);
+ // Compute the address of the source string by adding the number of chars from
+ // the source beginning to the value offset of a string.
+ __ leaq(CpuRegister(RSI),
+ CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
+ // Start the loop to copy String's value to Array of Char.
+ __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
+
+ __ Bind(&copy_loop);
+ __ jrcxz(&done);
+ // Use TMP as temporary (convert byte from RSI to word).
+ // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
+ __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
+ __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
+ __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
+ __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
+ // TODO: Add support for LOOP to X86_64Assembler.
+ __ subl(CpuRegister(RCX), Immediate(1));
+ __ jmp(&copy_loop);
+
+ __ Bind(&copy_uncompressed);
+ }
+
+ __ leaq(CpuRegister(RSI),
+ CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
+ // Compute the address of the destination buffer.
+ __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
// Do the move.
__ rep_movsw();
+
+ __ Bind(&done);
}
static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
@@ -1903,10 +2142,9 @@ static void GenUnsafeGet(HInvoke* invoke,
case Primitive::kPrimNot: {
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
- invoke, output_loc, base, src, temp, /* needs_null_check */ false);
+ invoke, output_loc, base, src, /* needs_null_check */ false);
} else {
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
codegen->GenerateReadBarrierSlow(
@@ -1929,46 +2167,42 @@ static void GenUnsafeGet(HInvoke* invoke,
}
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
- HInvoke* invoke,
- Primitive::Type type) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
bool can_call = kEmitCompilerReadBarrier &&
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- can_call ?
- LocationSummary::kCallOnSlowPath :
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(),
- can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
- if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
- }
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
@@ -2099,10 +2333,16 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
}
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
+ Primitive::Type type,
HInvoke* invoke) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -2113,7 +2353,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty
locations->SetOut(Location::RequiresRegister());
if (type == Primitive::kPrimNot) {
- // Need temp registers for card-marking.
+ // Need temporary registers for card-marking, and possibly for
+ // (Baker) read barrier.
locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
locations->AddTemp(Location::RequiresRegister());
}
@@ -2128,14 +2369,9 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2152,16 +2388,37 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
// Ensure `expected` is in RAX (required by the CMPXCHG instruction).
DCHECK_EQ(expected.AsRegister(), RAX);
CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ Location out_loc = locations->Out();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (type == Primitive::kPrimNot) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+
// Mark card for object assuming new value is stored.
bool value_can_be_null = true; // TODO: Worth finding out this information?
- codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
- locations->GetTemp(1).AsRegister<CpuRegister>(),
- base,
- value,
- value_can_be_null);
+ codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
+
+ // The address of the field within the holding object.
+ Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ out_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ field_addr,
+ /* needs_null_check */ false,
+ /* always_update_field */ true,
+ &temp1,
+ &temp2);
+ }
bool base_equals_value = (base.AsRegister() == value.AsRegister());
Register value_reg = value.AsRegister();
@@ -2170,7 +2427,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
// If `base` and `value` are the same register location, move
// `value_reg` to a temporary register. This way, poisoning
// `value_reg` won't invalidate `base`.
- value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
+ value_reg = temp1.AsRegister();
__ movl(CpuRegister(value_reg), base);
}
@@ -2189,19 +2446,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
__ PoisonHeapReference(CpuRegister(value_reg));
}
- // TODO: Add a read barrier for the reference stored in the object
- // before attempting the CAS, similar to the one in the
- // art::Unsafe_compareAndSwapObject JNI implementation.
- //
- // Note that this code is not (yet) used when read barriers are
- // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject).
- DCHECK(!kEmitCompilerReadBarrier);
- __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
+ __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
// LOCK CMPXCHG has full barrier semantics, and we don't need
// scheduling barriers at this time.
- // Convert ZF into the boolean result.
+ // Convert ZF into the Boolean result.
__ setcc(kZero, out);
__ movzxb(out, out);
@@ -2234,7 +2484,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
// LOCK CMPXCHG has full barrier semantics, and we don't need
// scheduling barriers at this time.
- // Convert ZF into the boolean result.
+ // Convert ZF into the Boolean result.
__ setcc(kZero, out);
__ movzxb(out, out);
}
@@ -2249,14 +2499,9 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
GenCAS(Primitive::kPrimNot, invoke, codegen_);
}
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index 155ff6548b..97404aa568 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor {
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
@@ -61,7 +61,7 @@ class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor {
// Define visitor methods.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) OVERRIDE;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index a0ded74d6d..eb2d18dd88 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -15,6 +15,7 @@
*/
#include "licm.h"
+
#include "side_effects_analysis.h"
namespace art {
@@ -90,8 +91,7 @@ void LICM::Run() {
}
// Post order visit to visit inner loops before outer loops.
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
if (!block->IsLoopHeader()) {
// Only visit the loop when we reach the header.
continue;
diff --git a/compiler/optimizing/linear_order.cc b/compiler/optimizing/linear_order.cc
new file mode 100644
index 0000000000..80cecd41dc
--- /dev/null
+++ b/compiler/optimizing/linear_order.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linear_order.h"
+
+namespace art {
+
+static bool InSameLoop(HLoopInformation* first_loop, HLoopInformation* second_loop) {
+ return first_loop == second_loop;
+}
+
+static bool IsLoop(HLoopInformation* info) {
+ return info != nullptr;
+}
+
+static bool IsInnerLoop(HLoopInformation* outer, HLoopInformation* inner) {
+ return (inner != outer)
+ && (inner != nullptr)
+ && (outer != nullptr)
+ && inner->IsIn(*outer);
+}
+
+// Helper method to update work list for linear order.
+static void AddToListForLinearization(ArenaVector<HBasicBlock*>* worklist, HBasicBlock* block) {
+ HLoopInformation* block_loop = block->GetLoopInformation();
+ auto insert_pos = worklist->rbegin(); // insert_pos.base() will be the actual position.
+ for (auto end = worklist->rend(); insert_pos != end; ++insert_pos) {
+ HBasicBlock* current = *insert_pos;
+ HLoopInformation* current_loop = current->GetLoopInformation();
+ if (InSameLoop(block_loop, current_loop)
+ || !IsLoop(current_loop)
+ || IsInnerLoop(current_loop, block_loop)) {
+ // The block can be processed immediately.
+ break;
+ }
+ }
+ worklist->insert(insert_pos.base(), block);
+}
+
+// Helper method to validate linear order.
+static bool IsLinearOrderWellFormed(const HGraph* graph, ArenaVector<HBasicBlock*>* linear_order) {
+ for (HBasicBlock* header : graph->GetBlocks()) {
+ if (header == nullptr || !header->IsLoopHeader()) {
+ continue;
+ }
+ HLoopInformation* loop = header->GetLoopInformation();
+ size_t num_blocks = loop->GetBlocks().NumSetBits();
+ size_t found_blocks = 0u;
+ for (HBasicBlock* block : *linear_order) {
+ if (loop->Contains(*block)) {
+ found_blocks++;
+ if (found_blocks == 1u && block != header) {
+ // First block is not the header.
+ return false;
+ } else if (found_blocks == num_blocks && !loop->IsBackEdge(*block)) {
+ // Last block is not a back edge.
+ return false;
+ }
+ } else if (found_blocks != 0u && found_blocks != num_blocks) {
+ // Blocks are not adjacent.
+ return false;
+ }
+ }
+ DCHECK_EQ(found_blocks, num_blocks);
+ }
+ return true;
+}
+
+void LinearizeGraph(const HGraph* graph,
+ ArenaAllocator* allocator,
+ ArenaVector<HBasicBlock*>* linear_order) {
+ DCHECK(linear_order->empty());
+ // Create a reverse post ordering with the following properties:
+ // - Blocks in a loop are consecutive,
+ // - Back-edge is the last block before loop exits.
+ //
+ // (1): Record the number of forward predecessors for each block. This is to
+ // ensure the resulting order is reverse post order. We could use the
+ // current reverse post order in the graph, but it would require making
+ // order queries to a GrowableArray, which is not the best data structure
+ // for it.
+ ArenaVector<uint32_t> forward_predecessors(graph->GetBlocks().size(),
+ allocator->Adapter(kArenaAllocLinearOrder));
+ for (HBasicBlock* block : graph->GetReversePostOrder()) {
+ size_t number_of_forward_predecessors = block->GetPredecessors().size();
+ if (block->IsLoopHeader()) {
+ number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges();
+ }
+ forward_predecessors[block->GetBlockId()] = number_of_forward_predecessors;
+ }
+ // (2): Following a worklist approach, first start with the entry block, and
+ // iterate over the successors. When all non-back edge predecessors of a
+ // successor block are visited, the successor block is added in the worklist
+ // following an order that satisfies the requirements to build our linear graph.
+ linear_order->reserve(graph->GetReversePostOrder().size());
+ ArenaVector<HBasicBlock*> worklist(allocator->Adapter(kArenaAllocLinearOrder));
+ worklist.push_back(graph->GetEntryBlock());
+ do {
+ HBasicBlock* current = worklist.back();
+ worklist.pop_back();
+ linear_order->push_back(current);
+ for (HBasicBlock* successor : current->GetSuccessors()) {
+ int block_id = successor->GetBlockId();
+ size_t number_of_remaining_predecessors = forward_predecessors[block_id];
+ if (number_of_remaining_predecessors == 1) {
+ AddToListForLinearization(&worklist, successor);
+ }
+ forward_predecessors[block_id] = number_of_remaining_predecessors - 1;
+ }
+ } while (!worklist.empty());
+
+ DCHECK(graph->HasIrreducibleLoops() || IsLinearOrderWellFormed(graph, linear_order));
+}
+
+} // namespace art
diff --git a/compiler/optimizing/linear_order.h b/compiler/optimizing/linear_order.h
new file mode 100644
index 0000000000..7122d67be9
--- /dev/null
+++ b/compiler/optimizing/linear_order.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_
+#define ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_
+
+#include "nodes.h"
+
+namespace art {
+
+// Linearizes the 'graph' such that:
+// (1): a block is always after its dominator,
+// (2): blocks of loops are contiguous.
+//
+// Storage is obtained through 'allocator' and the linear order it computed
+// into 'linear_order'. Once computed, iteration can be expressed as:
+//
+// for (HBasicBlock* block : linear_order) // linear order
+//
+// for (HBasicBlock* block : ReverseRange(linear_order)) // linear post order
+//
+void LinearizeGraph(const HGraph* graph,
+ ArenaAllocator* allocator,
+ ArenaVector<HBasicBlock*>* linear_order);
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index bd74368e17..37b58ded59 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -56,8 +56,7 @@ static void TestCode(const uint16_t* data, const char* expected) {
liveness.Analyze();
std::ostringstream buffer;
- for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph->GetBlocks()) {
buffer << "Block " << block->GetBlockId() << std::endl;
size_t ssa_values = liveness.GetNumberOfSsaValues();
BitVector* live_in = liveness.GetLiveInSet(*block);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 7347686830..b91e9e6868 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -168,7 +168,9 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> {
const int16_t declaring_class_def_index_; // declaring class's def's dex index.
bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop
// side effects because this location is stored
- // into inside a loop.
+ // into inside a loop. This gives
+ // better info on whether a singleton's location
+ // value may be killed by loop side effects.
DISALLOW_COPY_AND_ASSIGN(HeapLocation);
};
@@ -420,8 +422,26 @@ class HeapLocationCollector : public HGraphVisitor {
void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
has_heap_stores_ = true;
- if (instruction->GetBlock()->GetLoopInformation() != nullptr) {
- location->SetValueKilledByLoopSideEffects(true);
+ if (location->GetReferenceInfo()->IsSingleton()) {
+ // A singleton's location value may be killed by loop side effects if it's
+ // defined before that loop, and it's stored into inside that loop.
+ HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+ if (loop_info != nullptr) {
+ HInstruction* ref = location->GetReferenceInfo()->GetReference();
+ DCHECK(ref->IsNewInstance());
+ if (loop_info->IsDefinedOutOfTheLoop(ref)) {
+ // ref's location value may be killed by this loop's side effects.
+ location->SetValueKilledByLoopSideEffects(true);
+ } else {
+ // ref is defined inside this loop so this loop's side effects cannot
+ // kill its location value at the loop header since ref/its location doesn't
+ // exist yet at the loop header.
+ }
+ }
+ } else {
+ // For non-singletons, value_killed_by_loop_side_effects_ is inited to
+ // true.
+ DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true);
}
}
@@ -810,9 +830,6 @@ class LSEVisitor : public HGraphVisitor {
if (loop_info != nullptr) {
// instruction is a store in the loop so the loop must does write.
DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
- // If it's a singleton, IsValueKilledByLoopSideEffects() must be true.
- DCHECK(!ref_info->IsSingleton() ||
- heap_location_collector_.GetHeapLocation(idx)->IsValueKilledByLoopSideEffects());
if (loop_info->IsDefinedOutOfTheLoop(original_ref)) {
DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
@@ -1029,8 +1046,8 @@ void LoadStoreElimination::Run() {
return;
}
HeapLocationCollector heap_location_collector(graph_);
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- heap_location_collector.VisitBasicBlock(it.Current());
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ heap_location_collector.VisitBasicBlock(block);
}
if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) {
// Bail out if there are too many heap locations to deal with.
@@ -1048,8 +1065,8 @@ void LoadStoreElimination::Run() {
}
heap_location_collector.BuildAliasingMatrix();
LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_);
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- lse_visitor.VisitBasicBlock(it.Current());
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ lse_visitor.VisitBasicBlock(block);
}
lse_visitor.RemoveInstructions();
}
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 83596da41a..d157509758 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -27,12 +27,14 @@ LocationSummary::LocationSummary(HInstruction* instruction,
: inputs_(instruction->InputCount(),
instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)),
temps_(instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)),
- output_overlaps_(Location::kOutputOverlap),
call_kind_(call_kind),
+ intrinsified_(intrinsified),
+ has_custom_slow_path_calling_convention_(false),
+ output_overlaps_(Location::kOutputOverlap),
stack_mask_(nullptr),
register_mask_(0),
- live_registers_(),
- intrinsified_(intrinsified) {
+ live_registers_(RegisterSet::Empty()),
+ custom_slow_path_caller_saves_(RegisterSet::Empty()) {
instruction->SetLocations(this);
if (NeedsSafepoint()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 7a78bfdc8d..da27928ef2 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -20,6 +20,7 @@
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/bit_field.h"
+#include "base/bit_utils.h"
#include "base/bit_vector.h"
#include "base/value_object.h"
@@ -376,6 +377,10 @@ class Location : public ValueObject {
return PolicyField::Decode(GetPayload());
}
+ bool RequiresRegisterKind() const {
+ return GetPolicy() == kRequiresRegister || GetPolicy() == kRequiresFpuRegister;
+ }
+
uintptr_t GetEncoding() const {
return GetPayload();
}
@@ -415,7 +420,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs);
class RegisterSet : public ValueObject {
public:
- RegisterSet() : core_registers_(0), floating_point_registers_(0) {}
+ static RegisterSet Empty() { return RegisterSet(); }
void Add(Location loc) {
if (loc.IsRegister()) {
@@ -448,7 +453,7 @@ class RegisterSet : public ValueObject {
}
size_t GetNumberOfRegisters() const {
- return __builtin_popcount(core_registers_) + __builtin_popcount(floating_point_registers_);
+ return POPCOUNT(core_registers_) + POPCOUNT(floating_point_registers_);
}
uint32_t GetCoreRegisters() const {
@@ -460,10 +465,10 @@ class RegisterSet : public ValueObject {
}
private:
+ RegisterSet() : core_registers_(0), floating_point_registers_(0) {}
+
uint32_t core_registers_;
uint32_t floating_point_registers_;
-
- DISALLOW_COPY_AND_ASSIGN(RegisterSet);
};
static constexpr bool kIntrinsified = true;
@@ -480,13 +485,14 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> {
public:
enum CallKind {
kNoCall,
+ kCallOnMainAndSlowPath,
kCallOnSlowPath,
kCallOnMainOnly
};
- LocationSummary(HInstruction* instruction,
- CallKind call_kind = kNoCall,
- bool intrinsified = false);
+ explicit LocationSummary(HInstruction* instruction,
+ CallKind call_kind = kNoCall,
+ bool intrinsified = false);
void SetInAt(uint32_t at, Location location) {
inputs_[at] = location;
@@ -540,10 +546,44 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> {
Location Out() const { return output_; }
- bool CanCall() const { return call_kind_ != kNoCall; }
- bool WillCall() const { return call_kind_ == kCallOnMainOnly; }
- bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; }
- bool NeedsSafepoint() const { return CanCall(); }
+ bool CanCall() const {
+ return call_kind_ != kNoCall;
+ }
+
+ bool WillCall() const {
+ return call_kind_ == kCallOnMainOnly || call_kind_ == kCallOnMainAndSlowPath;
+ }
+
+ bool CallsOnSlowPath() const {
+ return call_kind_ == kCallOnSlowPath || call_kind_ == kCallOnMainAndSlowPath;
+ }
+
+ bool OnlyCallsOnSlowPath() const {
+ return call_kind_ == kCallOnSlowPath;
+ }
+
+ bool CallsOnMainAndSlowPath() const {
+ return call_kind_ == kCallOnMainAndSlowPath;
+ }
+
+ bool NeedsSafepoint() const {
+ return CanCall();
+ }
+
+ void SetCustomSlowPathCallerSaves(const RegisterSet& caller_saves) {
+ DCHECK(OnlyCallsOnSlowPath());
+ has_custom_slow_path_calling_convention_ = true;
+ custom_slow_path_caller_saves_ = caller_saves;
+ }
+
+ bool HasCustomSlowPathCallingConvention() const {
+ return has_custom_slow_path_calling_convention_;
+ }
+
+ const RegisterSet& GetCustomSlowPathCallerSaves() const {
+ DCHECK(HasCustomSlowPathCallingConvention());
+ return custom_slow_path_caller_saves_;
+ }
void SetStackBit(uint32_t index) {
stack_mask_->SetBit(index);
@@ -604,18 +644,18 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> {
return intrinsified_;
}
- void SetIntrinsified(bool intrinsified) {
- intrinsified_ = intrinsified;
- }
-
private:
ArenaVector<Location> inputs_;
ArenaVector<Location> temps_;
+ const CallKind call_kind_;
+ // Whether these are locations for an intrinsified call.
+ const bool intrinsified_;
+ // Whether the slow path has default or custom calling convention.
+ bool has_custom_slow_path_calling_convention_;
// Whether the output overlaps with any of the inputs. If it overlaps, then it cannot
// share the same register as the inputs.
Location::OutputOverlap output_overlaps_;
Location output_;
- const CallKind call_kind_;
// Mask of objects that live in the stack.
BitVector* stack_mask_;
@@ -626,11 +666,10 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> {
// Registers that are in use at this position.
RegisterSet live_registers_;
- // Whether these are locations for an intrinsified call.
- bool intrinsified_;
+ // Custom slow path caller saves. Valid only if indicated by slow_path_calling_convention_.
+ RegisterSet custom_slow_path_caller_saves_;
- ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
- ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
+ friend class RegisterAllocatorTest;
DISALLOW_COPY_AND_ASSIGN(LocationSummary);
};
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
new file mode 100644
index 0000000000..51be1d1e91
--- /dev/null
+++ b/compiler/optimizing/loop_optimization.cc
@@ -0,0 +1,377 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loop_optimization.h"
+
+#include "linear_order.h"
+
+namespace art {
+
+// Remove the instruction from the graph. A bit more elaborate than the usual
+// instruction removal, since there may be a cycle in the use structure.
+static void RemoveFromCycle(HInstruction* instruction) {
+ instruction->RemoveAsUserOfAllInputs();
+ instruction->RemoveEnvironmentUsers();
+ instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false);
+}
+
+//
+// Class methods.
+//
+
+HLoopOptimization::HLoopOptimization(HGraph* graph,
+ HInductionVarAnalysis* induction_analysis)
+ : HOptimization(graph, kLoopOptimizationPassName),
+ induction_range_(induction_analysis),
+ loop_allocator_(nullptr),
+ top_loop_(nullptr),
+ last_loop_(nullptr),
+ iset_(nullptr),
+ induction_simplication_count_(0) {
+}
+
+void HLoopOptimization::Run() {
+ // Well-behaved loops only.
+ // TODO: make this less of a sledgehammer.
+ if (graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) {
+ return;
+ }
+
+ // Phase-local allocator that draws from the global pool. Since the allocator
+ // itself resides on the stack, it is destructed on exiting Run(), which
+ // implies its underlying memory is released immediately.
+ ArenaAllocator allocator(graph_->GetArena()->GetArenaPool());
+ loop_allocator_ = &allocator;
+
+ // Perform loop optimizations.
+ LocalRun();
+
+ // Detach.
+ loop_allocator_ = nullptr;
+ last_loop_ = top_loop_ = nullptr;
+}
+
+void HLoopOptimization::LocalRun() {
+ // Build the linear order using the phase-local allocator. This step enables building
+ // a loop hierarchy that properly reflects the outer-inner and previous-next relation.
+ ArenaVector<HBasicBlock*> linear_order(loop_allocator_->Adapter(kArenaAllocLinearOrder));
+ LinearizeGraph(graph_, loop_allocator_, &linear_order);
+
+ // Build the loop hierarchy.
+ for (HBasicBlock* block : linear_order) {
+ if (block->IsLoopHeader()) {
+ AddLoop(block->GetLoopInformation());
+ }
+ }
+
+ // Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use
+ // a temporary set that stores instructions using the phase-local allocator.
+ if (top_loop_ != nullptr) {
+ ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ iset_ = &iset;
+ TraverseLoopsInnerToOuter(top_loop_);
+ iset_ = nullptr; // detach
+ }
+}
+
+void HLoopOptimization::AddLoop(HLoopInformation* loop_info) {
+ DCHECK(loop_info != nullptr);
+ LoopNode* node = new (loop_allocator_) LoopNode(loop_info); // phase-local allocator
+ if (last_loop_ == nullptr) {
+ // First loop.
+ DCHECK(top_loop_ == nullptr);
+ last_loop_ = top_loop_ = node;
+ } else if (loop_info->IsIn(*last_loop_->loop_info)) {
+ // Inner loop.
+ node->outer = last_loop_;
+ DCHECK(last_loop_->inner == nullptr);
+ last_loop_ = last_loop_->inner = node;
+ } else {
+ // Subsequent loop.
+ while (last_loop_->outer != nullptr && !loop_info->IsIn(*last_loop_->outer->loop_info)) {
+ last_loop_ = last_loop_->outer;
+ }
+ node->outer = last_loop_->outer;
+ node->previous = last_loop_;
+ DCHECK(last_loop_->next == nullptr);
+ last_loop_ = last_loop_->next = node;
+ }
+}
+
+void HLoopOptimization::RemoveLoop(LoopNode* node) {
+ DCHECK(node != nullptr);
+ DCHECK(node->inner == nullptr);
+ if (node->previous != nullptr) {
+ // Within sequence.
+ node->previous->next = node->next;
+ if (node->next != nullptr) {
+ node->next->previous = node->previous;
+ }
+ } else {
+ // First of sequence.
+ if (node->outer != nullptr) {
+ node->outer->inner = node->next;
+ } else {
+ top_loop_ = node->next;
+ }
+ if (node->next != nullptr) {
+ node->next->outer = node->outer;
+ node->next->previous = nullptr;
+ }
+ }
+}
+
+void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
+ for ( ; node != nullptr; node = node->next) {
+ int current_induction_simplification_count = induction_simplication_count_;
+ if (node->inner != nullptr) {
+ TraverseLoopsInnerToOuter(node->inner);
+ }
+ // Visit loop after its inner loops have been visited. If the induction of any inner
+ // loop has been simplified, recompute the induction information of this loop first.
+ if (current_induction_simplification_count != induction_simplication_count_) {
+ induction_range_.ReVisit(node->loop_info);
+ }
+ SimplifyBlocks(node);
+ SimplifyInduction(node);
+ SimplifyBlocks(node);
+ if (node->inner == nullptr) {
+ RemoveIfEmptyInnerLoop(node);
+ }
+ }
+}
+
+void HLoopOptimization::SimplifyInduction(LoopNode* node) {
+ HBasicBlock* header = node->loop_info->GetHeader();
+ HBasicBlock* preheader = node->loop_info->GetPreHeader();
+ // Scan the phis in the header to find opportunities to simplify an induction
+ // cycle that is only used outside the loop. Replace these uses, if any, with
+ // the last value and remove the induction cycle.
+ // Examples: for (int i = 0; x != null; i++) { .... no i .... }
+ // for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
+ for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* phi = it.Current()->AsPhi();
+ iset_->clear();
+ int32_t use_count = 0;
+ if (IsPhiInduction(phi) &&
+ IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) &&
+ TryReplaceWithLastValue(phi, use_count, preheader)) {
+ for (HInstruction* i : *iset_) {
+ RemoveFromCycle(i);
+ }
+ induction_simplication_count_++;
+ }
+ }
+}
+
+void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
+ for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ // Remove instructions that are dead.
+ for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) {
+ HInstruction* instruction = i.Current();
+ if (instruction->IsDeadAndRemovable()) {
+ block->RemoveInstruction(instruction);
+ }
+ }
+ // Remove trivial control flow blocks from the loop-body.
+ if (block->GetPredecessors().size() == 1 &&
+ block->GetSuccessors().size() == 1 &&
+ block->GetFirstInstruction()->IsGoto()) {
+ HBasicBlock* pred = block->GetSinglePredecessor();
+ HBasicBlock* succ = block->GetSingleSuccessor();
+ if (succ->GetPredecessors().size() == 1) {
+ pred->ReplaceSuccessor(block, succ);
+ block->ClearDominanceInformation();
+ block->SetDominator(pred); // needed by next disconnect.
+ block->DisconnectAndDelete();
+ pred->AddDominatedBlock(succ);
+ succ->SetDominator(pred);
+ }
+ }
+ }
+}
+
+void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) {
+ HBasicBlock* header = node->loop_info->GetHeader();
+ HBasicBlock* preheader = node->loop_info->GetPreHeader();
+ // Ensure loop header logic is finite.
+ if (!induction_range_.IsFinite(node->loop_info)) {
+ return;
+ }
+ // Ensure there is only a single loop-body (besides the header).
+ HBasicBlock* body = nullptr;
+ for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
+ if (it.Current() != header) {
+ if (body != nullptr) {
+ return;
+ }
+ body = it.Current();
+ }
+ }
+ // Ensure there is only a single exit point.
+ if (header->GetSuccessors().size() != 2) {
+ return;
+ }
+ HBasicBlock* exit = (header->GetSuccessors()[0] == body)
+ ? header->GetSuccessors()[1]
+ : header->GetSuccessors()[0];
+ // Ensure exit can only be reached by exiting loop.
+ if (exit->GetPredecessors().size() != 1) {
+ return;
+ }
+ // Detect an empty loop: no side effects other than plain iteration. Replace
+ // subsequent index uses, if any, with the last value and remove the loop.
+ iset_->clear();
+ int32_t use_count = 0;
+ if (IsEmptyHeader(header) &&
+ IsEmptyBody(body) &&
+ IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) &&
+ TryReplaceWithLastValue(header->GetFirstPhi(), use_count, preheader)) {
+ body->DisconnectAndDelete();
+ exit->RemovePredecessor(header);
+ header->RemoveSuccessor(exit);
+ header->ClearDominanceInformation();
+ header->SetDominator(preheader); // needed by next disconnect.
+ header->DisconnectAndDelete();
+ preheader->AddSuccessor(exit);
+ preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator
+ preheader->AddDominatedBlock(exit);
+ exit->SetDominator(preheader);
+ // Update hierarchy.
+ RemoveLoop(node);
+ }
+}
+
+bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
+ ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
+ if (set != nullptr) {
+ for (HInstruction* i : *set) {
+ // Check that, other than phi, instruction are removable with uses contained in the cycle.
+ // TODO: investigate what cases are no longer in the graph.
+ if (i != phi) {
+ if (!i->IsInBlock() || !i->IsRemovable()) {
+ return false;
+ }
+ for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
+ if (set->find(use.GetUser()) == set->end()) {
+ return false;
+ }
+ }
+ }
+ }
+ DCHECK(iset_->empty());
+ iset_->insert(set->begin(), set->end()); // copy
+ return true;
+ }
+ return false;
+}
+
+// Find: phi: Phi(init, addsub)
+// s: SuspendCheck
+// c: Condition(phi, bound)
+// i: If(c)
+// TODO: Find a less pattern matching approach?
+bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) {
+ DCHECK(iset_->empty());
+ HInstruction* phi = block->GetFirstPhi();
+ if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi())) {
+ HInstruction* s = block->GetFirstInstruction();
+ if (s != nullptr && s->IsSuspendCheck()) {
+ HInstruction* c = s->GetNext();
+ if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) {
+ HInstruction* i = c->GetNext();
+ if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
+ iset_->insert(c);
+ iset_->insert(s);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) {
+ if (block->GetFirstPhi() == nullptr) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
+bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
+ HInstruction* instruction,
+ /*out*/ int32_t* use_count) {
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (iset_->find(user) == iset_->end()) { // not excluded?
+ HLoopInformation* other_loop_info = user->GetBlock()->GetLoopInformation();
+ if (other_loop_info != nullptr && other_loop_info->IsIn(*loop_info)) {
+ return false;
+ }
+ ++*use_count;
+ }
+ }
+ return true;
+}
+
+void HLoopOptimization::ReplaceAllUses(HInstruction* instruction, HInstruction* replacement) {
+ const HUseList<HInstruction*>& uses = instruction->GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end;) {
+ HInstruction* user = it->GetUser();
+ size_t index = it->GetIndex();
+ ++it; // increment before replacing
+ if (iset_->find(user) == iset_->end()) { // not excluded?
+ user->ReplaceInput(replacement, index);
+ induction_range_.Replace(user, instruction, replacement); // update induction
+ }
+ }
+ const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
+ for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) {
+ HEnvironment* user = it->GetUser();
+ size_t index = it->GetIndex();
+ ++it; // increment before replacing
+ if (iset_->find(user->GetHolder()) == iset_->end()) { // not excluded?
+ user->RemoveAsUserOfInput(index);
+ user->SetRawEnvAt(index, replacement);
+ replacement->AddEnvUseAt(user, index);
+ }
+ }
+}
+
+bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction,
+ int32_t use_count,
+ HBasicBlock* block) {
+ // If true uses appear after the loop, replace these uses with the last value. Environment
+ // uses can consume this value too, since any first true use is outside the loop (although
+ // this may imply that de-opting may look "ahead" a bit on the phi value). If there are only
+ // environment uses, the value is dropped altogether, since the computations have no effect.
+ if (use_count > 0) {
+ if (!induction_range_.CanGenerateLastValue(instruction)) {
+ return false;
+ }
+ ReplaceAllUses(instruction, induction_range_.GenerateLastValue(instruction, graph_, block));
+ }
+ return true;
+}
+
+} // namespace art
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
new file mode 100644
index 0000000000..e18d17531e
--- /dev/null
+++ b/compiler/optimizing/loop_optimization.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
+#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
+
+#include "induction_var_range.h"
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+/**
+ * Loop optimizations. Builds a loop hierarchy and applies optimizations to
+ * the detected nested loops, such as removal of dead induction and empty loops.
+ */
+class HLoopOptimization : public HOptimization {
+ public:
+ HLoopOptimization(HGraph* graph, HInductionVarAnalysis* induction_analysis);
+
+ void Run() OVERRIDE;
+
+ static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
+
+ private:
+ /**
+ * A single loop inside the loop hierarchy representation.
+ */
+ struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> {
+ explicit LoopNode(HLoopInformation* lp_info)
+ : loop_info(lp_info),
+ outer(nullptr),
+ inner(nullptr),
+ previous(nullptr),
+ next(nullptr) {}
+ HLoopInformation* const loop_info;
+ LoopNode* outer;
+ LoopNode* inner;
+ LoopNode* previous;
+ LoopNode* next;
+ };
+
+ void LocalRun();
+
+ void AddLoop(HLoopInformation* loop_info);
+ void RemoveLoop(LoopNode* node);
+
+ void TraverseLoopsInnerToOuter(LoopNode* node);
+
+ void SimplifyInduction(LoopNode* node);
+ void SimplifyBlocks(LoopNode* node);
+ void RemoveIfEmptyInnerLoop(LoopNode* node);
+
+ bool IsPhiInduction(HPhi* phi);
+ bool IsEmptyHeader(HBasicBlock* block);
+ bool IsEmptyBody(HBasicBlock* block);
+
+ bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
+ HInstruction* instruction,
+ /*out*/ int32_t* use_count);
+ void ReplaceAllUses(HInstruction* instruction, HInstruction* replacement);
+ bool TryReplaceWithLastValue(HInstruction* instruction,
+ int32_t use_count,
+ HBasicBlock* block);
+
+ // Range information based on prior induction variable analysis.
+ InductionVarRange induction_range_;
+
+ // Phase-local heap memory allocator for the loop optimizer. Storage obtained
+ // through this allocator is immediately released when the loop optimizer is done.
+ ArenaAllocator* loop_allocator_;
+
+ // Entries into the loop hierarchy representation. The hierarchy resides
+ // in phase-local heap memory.
+ LoopNode* top_loop_;
+ LoopNode* last_loop_;
+
+ // Temporary bookkeeping of a set of instructions.
+ // Contents reside in phase-local heap memory.
+ ArenaSet<HInstruction*>* iset_;
+
+ // Counter that tracks how many induction cycles have been simplified. Useful
+ // to trigger incremental updates of induction variable analysis of outer loops
+ // when the induction of inner loops has changed.
+ int32_t induction_simplication_count_;
+
+ friend class LoopOptimizationTest;
+
+ DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
new file mode 100644
index 0000000000..7805a69a06
--- /dev/null
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loop_optimization.h"
+#include "optimizing_unit_test.h"
+
+namespace art {
+
+/**
+ * Fixture class for the loop optimization tests. These unit tests focus
+ * constructing the loop hierarchy. Actual optimizations are tested
+ * through the checker tests.
+ */
+class LoopOptimizationTest : public CommonCompilerTest {
+ public:
+ LoopOptimizationTest()
+ : pool_(),
+ allocator_(&pool_),
+ graph_(CreateGraph(&allocator_)),
+ iva_(new (&allocator_) HInductionVarAnalysis(graph_)),
+ loop_opt_(new (&allocator_) HLoopOptimization(graph_, iva_)) {
+ BuildGraph();
+ }
+
+ ~LoopOptimizationTest() { }
+
+ /** Constructs bare minimum graph. */
+ void BuildGraph() {
+ graph_->SetNumberOfVRegs(1);
+ entry_block_ = new (&allocator_) HBasicBlock(graph_);
+ return_block_ = new (&allocator_) HBasicBlock(graph_);
+ exit_block_ = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(entry_block_);
+ graph_->AddBlock(return_block_);
+ graph_->AddBlock(exit_block_);
+ graph_->SetEntryBlock(entry_block_);
+ graph_->SetExitBlock(exit_block_);
+ parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+ entry_block_->AddInstruction(parameter_);
+ return_block_->AddInstruction(new (&allocator_) HReturnVoid());
+ exit_block_->AddInstruction(new (&allocator_) HExit());
+ entry_block_->AddSuccessor(return_block_);
+ return_block_->AddSuccessor(exit_block_);
+ }
+
+ /** Adds a loop nest at given position before successor. */
+ HBasicBlock* AddLoop(HBasicBlock* position, HBasicBlock* successor) {
+ HBasicBlock* header = new (&allocator_) HBasicBlock(graph_);
+ HBasicBlock* body = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(header);
+ graph_->AddBlock(body);
+ // Control flow.
+ position->ReplaceSuccessor(successor, header);
+ header->AddSuccessor(body);
+ header->AddSuccessor(successor);
+ header->AddInstruction(new (&allocator_) HIf(parameter_));
+ body->AddSuccessor(header);
+ body->AddInstruction(new (&allocator_) HGoto());
+ return header;
+ }
+
+ /** Performs analysis. */
+ void PerformAnalysis() {
+ graph_->BuildDominatorTree();
+ iva_->Run();
+ // Do not release the loop hierarchy.
+ loop_opt_->loop_allocator_ = &allocator_;
+ loop_opt_->LocalRun();
+ }
+
+ /** Constructs string representation of computed loop hierarchy. */
+ std::string LoopStructure() {
+ return LoopStructureRecurse(loop_opt_->top_loop_);
+ }
+
+ // Helper method
+ std::string LoopStructureRecurse(HLoopOptimization::LoopNode* node) {
+ std::string s;
+ for ( ; node != nullptr; node = node->next) {
+ s.append("[");
+ s.append(LoopStructureRecurse(node->inner));
+ s.append("]");
+ }
+ return s;
+ }
+
+ // General building fields.
+ ArenaPool pool_;
+ ArenaAllocator allocator_;
+ HGraph* graph_;
+ HInductionVarAnalysis* iva_;
+ HLoopOptimization* loop_opt_;
+
+ HBasicBlock* entry_block_;
+ HBasicBlock* return_block_;
+ HBasicBlock* exit_block_;
+
+ HInstruction* parameter_;
+};
+
+//
+// The actual tests.
+//
+
+TEST_F(LoopOptimizationTest, NoLoops) {
+ PerformAnalysis();
+ EXPECT_EQ("", LoopStructure());
+}
+
+TEST_F(LoopOptimizationTest, SingleLoop) {
+ AddLoop(entry_block_, return_block_);
+ PerformAnalysis();
+ EXPECT_EQ("[]", LoopStructure());
+}
+
+TEST_F(LoopOptimizationTest, LoopNest10) {
+ HBasicBlock* b = entry_block_;
+ HBasicBlock* s = return_block_;
+ for (int i = 0; i < 10; i++) {
+ s = AddLoop(b, s);
+ b = s->GetSuccessors()[0];
+ }
+ PerformAnalysis();
+ EXPECT_EQ("[[[[[[[[[[]]]]]]]]]]", LoopStructure());
+}
+
+TEST_F(LoopOptimizationTest, LoopSequence10) {
+ HBasicBlock* b = entry_block_;
+ HBasicBlock* s = return_block_;
+ for (int i = 0; i < 10; i++) {
+ b = AddLoop(b, s);
+ s = b->GetSuccessors()[1];
+ }
+ PerformAnalysis();
+ EXPECT_EQ("[][][][][][][][][][]", LoopStructure());
+}
+
+TEST_F(LoopOptimizationTest, LoopSequenceOfNests) {
+ HBasicBlock* b = entry_block_;
+ HBasicBlock* s = return_block_;
+ for (int i = 0; i < 10; i++) {
+ b = AddLoop(b, s);
+ s = b->GetSuccessors()[1];
+ HBasicBlock* bi = b->GetSuccessors()[0];
+ HBasicBlock* si = b;
+ for (int j = 0; j < i; j++) {
+ si = AddLoop(bi, si);
+ bi = si->GetSuccessors()[0];
+ }
+ }
+ PerformAnalysis();
+ EXPECT_EQ("[]"
+ "[[]]"
+ "[[[]]]"
+ "[[[[]]]]"
+ "[[[[[]]]]]"
+ "[[[[[[]]]]]]"
+ "[[[[[[[]]]]]]]"
+ "[[[[[[[[]]]]]]]]"
+ "[[[[[[[[[]]]]]]]]]"
+ "[[[[[[[[[[]]]]]]]]]]",
+ LoopStructure());
+}
+
+TEST_F(LoopOptimizationTest, LoopNestWithSequence) {
+ HBasicBlock* b = entry_block_;
+ HBasicBlock* s = return_block_;
+ for (int i = 0; i < 10; i++) {
+ s = AddLoop(b, s);
+ b = s->GetSuccessors()[0];
+ }
+ b = s;
+ s = b->GetSuccessors()[1];
+ for (int i = 0; i < 9; i++) {
+ b = AddLoop(b, s);
+ s = b->GetSuccessors()[1];
+ }
+ PerformAnalysis();
+ EXPECT_EQ("[[[[[[[[[[][][][][][][][][][]]]]]]]]]]", LoopStructure());
+}
+
+} // namespace art
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 2808e1b5fc..45c7eb1a46 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -25,7 +25,7 @@
#include "base/stl_util.h"
#include "intrinsics.h"
#include "mirror/class-inl.h"
-#include "scoped_thread_state_change.h"
+#include "scoped_thread_state_change-inl.h"
namespace art {
@@ -35,7 +35,7 @@ namespace art {
// double).
static constexpr bool kEnableFloatingPointStaticEvaluation = (FLT_EVAL_METHOD == 0);
-void HGraph::InitializeInexactObjectRTI(StackHandleScopeCollection* handles) {
+void HGraph::InitializeInexactObjectRTI(VariableSizedHandleScope* handles) {
ScopedObjectAccess soa(Thread::Current());
// Create the inexact Object reference type and store it in the HGraph.
ClassLinker* linker = Runtime::Current()->GetClassLinker();
@@ -179,16 +179,16 @@ GraphAnalysisResult HGraph::BuildDominatorTree() {
}
void HGraph::ClearDominanceInformation() {
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- it.Current()->ClearDominanceInformation();
+ for (HBasicBlock* block : GetReversePostOrder()) {
+ block->ClearDominanceInformation();
}
reverse_post_order_.clear();
}
void HGraph::ClearLoopInformation() {
SetHasIrreducibleLoops(false);
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- it.Current()->SetLoopInformation(nullptr);
+ for (HBasicBlock* block : GetReversePostOrder()) {
+ block->SetLoopInformation(nullptr);
}
}
@@ -275,8 +275,7 @@ void HGraph::ComputeDominanceInformation() {
bool update_occurred = true;
while (update_occurred) {
update_occurred = false;
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetReversePostOrder()) {
for (HBasicBlock* successor : block->GetSuccessors()) {
update_occurred |= UpdateDominatorOfSuccessor(block, successor);
}
@@ -287,8 +286,7 @@ void HGraph::ComputeDominanceInformation() {
// Make sure that there are no remaining blocks whose dominator information
// needs to be updated.
if (kIsDebugBuild) {
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetReversePostOrder()) {
for (HBasicBlock* successor : block->GetSuccessors()) {
DCHECK(!UpdateDominatorOfSuccessor(block, successor));
}
@@ -297,8 +295,7 @@ void HGraph::ComputeDominanceInformation() {
// Populate `dominated_blocks_` information after computing all dominators.
// The potential presence of irreducible loops requires to do it after.
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetReversePostOrder()) {
if (!block->IsEntryBlock()) {
block->GetDominator()->AddDominatedBlock(block);
}
@@ -375,8 +372,7 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
void HGraph::ComputeTryBlockInformation() {
// Iterate in reverse post order to propagate try membership information from
// predecessors to their successors.
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetReversePostOrder()) {
if (block->IsEntryBlock() || block->IsCatchBlock()) {
// Catch blocks after simplification have only exceptional predecessors
// and hence are never in tries.
@@ -446,8 +442,7 @@ GraphAnalysisResult HGraph::AnalyzeLoops() const {
// We iterate post order to ensure we visit inner loops before outer loops.
// `PopulateRecursive` needs this guarantee to know whether a natural loop
// contains an irreducible loop.
- for (HPostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetPostOrder()) {
if (block->IsLoopHeader()) {
if (block->IsCatchBlock()) {
// TODO: Dealing with exceptional back edges could be tricky because
@@ -1134,8 +1129,8 @@ void HGraphVisitor::VisitInsertionOrder() {
}
void HGraphVisitor::VisitReversePostOrder() {
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- VisitBasicBlock(it.Current());
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ VisitBasicBlock(block);
}
}
@@ -1986,10 +1981,8 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
// Update the environments in this graph to have the invoke's environment
// as parent.
{
- HReversePostOrderIterator it(*this);
- it.Advance(); // Skip the entry block, we do not need to update the entry's suspend check.
- for (; !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ // Skip the entry block, we do not need to update the entry's suspend check.
+ for (HBasicBlock* block : GetReversePostOrderSkipEntryBlock()) {
for (HInstructionIterator instr_it(block->GetInstructions());
!instr_it.Done();
instr_it.Advance()) {
@@ -2070,8 +2063,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
// Do a reverse post order of the blocks in the callee and do (1), (2), (3)
// and (4) to the blocks that apply.
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
+ for (HBasicBlock* current : GetReversePostOrder()) {
if (current != exit_block_ && current != entry_block_ && current != first) {
DCHECK(current->GetTryCatchInformation() == nullptr);
DCHECK(current->GetGraph() == this);
@@ -2242,7 +2234,7 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
}
static void CheckAgainstUpperBound(ReferenceTypeInfo rti, ReferenceTypeInfo upper_bound_rti)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
if (rti.IsValid()) {
DCHECK(upper_bound_rti.IsSupertypeOf(rti))
<< " upper_bound_rti: " << upper_bound_rti
@@ -2295,7 +2287,7 @@ std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
ScopedObjectAccess soa(Thread::Current());
os << "["
<< " is_valid=" << rhs.IsValid()
- << " type=" << (!rhs.IsValid() ? "?" : PrettyClass(rhs.GetTypeHandle().Get()))
+ << " type=" << (!rhs.IsValid() ? "?" : mirror::Class::PrettyClass(rhs.GetTypeHandle().Get()))
<< " is_exact=" << rhs.IsExact()
<< " ]";
return os;
@@ -2500,12 +2492,8 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const {
LoadKind load_kind = GetLoadKind();
if (HasAddress(load_kind)) {
return GetAddress() == other_load_string->GetAddress();
- } else if (HasStringReference(load_kind)) {
- return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile());
} else {
- DCHECK(HasDexCacheReference(load_kind)) << load_kind;
- // If the string indexes and dex files are the same, dex cache element offsets
- // must also be the same, so we don't need to compare them.
+ DCHECK(HasStringReference(load_kind)) << load_kind;
return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile());
}
}
@@ -2535,8 +2523,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
return os << "BootImageAddress";
case HLoadString::LoadKind::kDexCacheAddress:
return os << "DexCacheAddress";
- case HLoadString::LoadKind::kDexCachePcRelative:
- return os << "DexCachePcRelative";
+ case HLoadString::LoadKind::kBssEntry:
+ return os << "BssEntry";
case HLoadString::LoadKind::kDexCacheViaMethod:
return os << "DexCacheViaMethod";
default:
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index dfa8276651..6a45149509 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -24,20 +24,22 @@
#include "base/arena_bit_vector.h"
#include "base/arena_containers.h"
#include "base/arena_object.h"
+#include "base/array_ref.h"
+#include "base/iteration_range.h"
#include "base/stl_util.h"
+#include "base/transform_array_ref.h"
#include "dex_file.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "handle.h"
#include "handle_scope.h"
#include "invoke_type.h"
+#include "intrinsics_enum.h"
#include "locations.h"
#include "method_reference.h"
#include "mirror/class.h"
#include "offsets.h"
#include "primitive.h"
-#include "utils/array_ref.h"
#include "utils/intrusive_forward_list.h"
-#include "utils/transform_array_ref.h"
namespace art {
@@ -109,6 +111,9 @@ enum IfCondition {
kCondBE, // <=
kCondA, // >
kCondAE, // >=
+ // First and last aliases.
+ kCondFirst = kCondEQ,
+ kCondLast = kCondAE,
};
enum GraphAnalysisResult {
@@ -171,7 +176,7 @@ class ReferenceTypeInfo : ValueObject {
static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact);
- static ReferenceTypeInfo Create(TypeHandle type_handle) SHARED_REQUIRES(Locks::mutator_lock_) {
+ static ReferenceTypeInfo Create(TypeHandle type_handle) REQUIRES_SHARED(Locks::mutator_lock_) {
return Create(type_handle, type_handle->CannotBeAssignedFromOtherTypes());
}
@@ -191,49 +196,49 @@ class ReferenceTypeInfo : ValueObject {
bool IsExact() const { return is_exact_; }
- bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool IsObjectClass() const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
return GetTypeHandle()->IsObjectClass();
}
- bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool IsStringClass() const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
return GetTypeHandle()->IsStringClass();
}
- bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool IsObjectArray() const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
}
- bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool IsInterface() const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
return GetTypeHandle()->IsInterface();
}
- bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool IsArrayClass() const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
return GetTypeHandle()->IsArrayClass();
}
- bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool IsPrimitiveArrayClass() const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
return GetTypeHandle()->IsPrimitiveArray();
}
- bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool IsNonPrimitiveArrayClass() const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
}
- bool CanArrayHold(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool CanArrayHold(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
if (!IsExact()) return false;
if (!IsArrayClass()) return false;
return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
}
- bool CanArrayHoldValuesOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool CanArrayHoldValuesOf(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
if (!IsExact()) return false;
if (!IsArrayClass()) return false;
@@ -244,13 +249,13 @@ class ReferenceTypeInfo : ValueObject {
Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
- bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool IsSupertypeOf(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
DCHECK(rti.IsValid());
return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
}
- bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
DCHECK(rti.IsValid());
return GetTypeHandle().Get() != rti.GetTypeHandle().Get() &&
@@ -260,7 +265,7 @@ class ReferenceTypeInfo : ValueObject {
// Returns true if the type information provide the same amount of details.
// Note that it does not mean that the instructions have the same actual type
// (because the type can be the result of a merge).
- bool IsEqual(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+ bool IsEqual(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) {
if (!IsValid() && !rti.IsValid()) {
// Invalid types are equal.
return true;
@@ -332,7 +337,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
}
// Acquires and stores RTI of inexact Object to be used when creating HNullConstant.
- void InitializeInexactObjectRTI(StackHandleScopeCollection* handles);
+ void InitializeInexactObjectRTI(VariableSizedHandleScope* handles);
ArenaAllocator* GetArena() const { return arena_; }
const ArenaVector<HBasicBlock*>& GetBlocks() const { return blocks_; }
@@ -456,10 +461,23 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
return reverse_post_order_;
}
+ ArrayRef<HBasicBlock* const> GetReversePostOrderSkipEntryBlock() {
+ DCHECK(GetReversePostOrder()[0] == entry_block_);
+ return ArrayRef<HBasicBlock* const>(GetReversePostOrder()).SubArray(1);
+ }
+
+ IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetPostOrder() const {
+ return ReverseRange(GetReversePostOrder());
+ }
+
const ArenaVector<HBasicBlock*>& GetLinearOrder() const {
return linear_order_;
}
+ IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetLinearPostOrder() const {
+ return ReverseRange(GetLinearOrder());
+ }
+
bool HasBoundsChecks() const {
return has_bounds_checks_;
}
@@ -575,7 +593,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// List of blocks to perform a reverse post order tree traversal.
ArenaVector<HBasicBlock*> reverse_post_order_;
- // List of blocks to perform a linear order tree traversal.
+ // List of blocks to perform a linear order tree traversal. Unlike the reverse
+ // post order, this order is not incrementally kept up-to-date.
ArenaVector<HBasicBlock*> linear_order_;
HBasicBlock* entry_block_;
@@ -827,7 +846,7 @@ static constexpr uint32_t kInvalidBlockId = static_cast<uint32_t>(-1);
class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
public:
- HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc)
+ explicit HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc)
: graph_(graph),
predecessors_(graph->GetArena()->Adapter(kArenaAllocPredecessors)),
successors_(graph->GetArena()->Adapter(kArenaAllocSuccessors)),
@@ -1311,7 +1330,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
#else
#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \
M(MipsComputeBaseMethodAddress, Instruction) \
- M(MipsDexCacheArraysBase, Instruction)
+ M(MipsDexCacheArraysBase, Instruction) \
+ M(MipsPackedSwitch, Instruction)
#endif
#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M)
@@ -1925,6 +1945,22 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
return !HasEnvironmentUses() && GetUses().HasExactlyOneElement();
}
+ bool IsRemovable() const {
+ return
+ !HasSideEffects() &&
+ !CanThrow() &&
+ !IsSuspendCheck() &&
+ !IsControlFlow() &&
+ !IsNativeDebugInfo() &&
+ !IsParameterValue() &&
+ // If we added an explicit barrier then we should keep it.
+ !IsMemoryBarrier();
+ }
+
+ bool IsDeadAndRemovable() const {
+ return IsRemovable() && !HasUses();
+ }
+
// Does this instruction strictly dominate `other_instruction`?
// Returns false if this instruction and `other_instruction` are the same.
// Aborts if this instruction and `other_instruction` are both phis.
@@ -2074,10 +2110,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
// to the current method. Such instructions are:
// (1): Instructions that require an environment, as calling the runtime requires
// to walk the stack and have the current method stored at a specific stack address.
- // (2): Object literals like classes and strings, that are loaded from the dex cache
- // fields of the current method.
+ // (2): HCurrentMethod, potentially used by HInvokeStaticOrDirect, HLoadString, or HLoadClass
+ // to access the dex cache.
bool NeedsCurrentMethod() const {
- return NeedsEnvironment() || IsLoadClass() || IsLoadString();
+ return NeedsEnvironment() || IsCurrentMethod();
}
// Returns whether the code generation of the instruction will require to have access
@@ -3679,17 +3715,6 @@ class HNewInstance FINAL : public HExpression<2> {
DISALLOW_COPY_AND_ASSIGN(HNewInstance);
};
-enum class Intrinsics {
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \
- k ## Name,
-#include "intrinsics_list.h"
- kNone,
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
-#undef OPTIMIZING_INTRINSICS
-};
-std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic);
-
enum IntrinsicNeedsEnvironmentOrCache {
kNoEnvironmentOrCache, // Intrinsic does not require an environment or dex cache.
kNeedsEnvironmentOrCache // Intrinsic requires an environment or requires a dex cache.
@@ -3731,8 +3756,8 @@ class HInvoke : public HInstruction {
uint32_t GetDexMethodIndex() const { return dex_method_index_; }
const DexFile& GetDexFile() const { return GetEnvironment()->GetDexFile(); }
- InvokeType GetOriginalInvokeType() const {
- return GetPackedField<OriginalInvokeTypeField>();
+ InvokeType GetInvokeType() const {
+ return GetPackedField<InvokeTypeField>();
}
Intrinsics GetIntrinsic() const {
@@ -3766,21 +3791,22 @@ class HInvoke : public HInstruction {
bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
+ ArtMethod* GetResolvedMethod() const { return resolved_method_; }
+
DECLARE_ABSTRACT_INSTRUCTION(Invoke);
protected:
- static constexpr size_t kFieldOriginalInvokeType = kNumberOfGenericPackedBits;
- static constexpr size_t kFieldOriginalInvokeTypeSize =
+ static constexpr size_t kFieldInvokeType = kNumberOfGenericPackedBits;
+ static constexpr size_t kFieldInvokeTypeSize =
MinimumBitsToStore(static_cast<size_t>(kMaxInvokeType));
static constexpr size_t kFieldReturnType =
- kFieldOriginalInvokeType + kFieldOriginalInvokeTypeSize;
+ kFieldInvokeType + kFieldInvokeTypeSize;
static constexpr size_t kFieldReturnTypeSize =
MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
static constexpr size_t kFlagCanThrow = kFieldReturnType + kFieldReturnTypeSize;
static constexpr size_t kNumberOfInvokePackedBits = kFlagCanThrow + 1;
static_assert(kNumberOfInvokePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
- using OriginalInvokeTypeField =
- BitField<InvokeType, kFieldOriginalInvokeType, kFieldOriginalInvokeTypeSize>;
+ using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>;
using ReturnTypeField = BitField<Primitive::Type, kFieldReturnType, kFieldReturnTypeSize>;
HInvoke(ArenaAllocator* arena,
@@ -3789,23 +3815,26 @@ class HInvoke : public HInstruction {
Primitive::Type return_type,
uint32_t dex_pc,
uint32_t dex_method_index,
- InvokeType original_invoke_type)
+ ArtMethod* resolved_method,
+ InvokeType invoke_type)
: HInstruction(
SideEffects::AllExceptGCDependency(), dex_pc), // Assume write/read on all fields/arrays.
number_of_arguments_(number_of_arguments),
+ resolved_method_(resolved_method),
inputs_(number_of_arguments + number_of_other_inputs,
arena->Adapter(kArenaAllocInvokeInputs)),
dex_method_index_(dex_method_index),
intrinsic_(Intrinsics::kNone),
intrinsic_optimizations_(0) {
SetPackedField<ReturnTypeField>(return_type);
- SetPackedField<OriginalInvokeTypeField>(original_invoke_type);
+ SetPackedField<InvokeTypeField>(invoke_type);
SetPackedFlag<kFlagCanThrow>(true);
}
void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); }
uint32_t number_of_arguments_;
+ ArtMethod* const resolved_method_;
ArenaVector<HUserRecord<HInstruction*>> inputs_;
const uint32_t dex_method_index_;
Intrinsics intrinsic_;
@@ -3831,6 +3860,7 @@ class HInvokeUnresolved FINAL : public HInvoke {
return_type,
dex_pc,
dex_method_index,
+ nullptr,
invoke_type) {
}
@@ -3924,10 +3954,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
Primitive::Type return_type,
uint32_t dex_pc,
uint32_t method_index,
- MethodReference target_method,
+ ArtMethod* resolved_method,
DispatchInfo dispatch_info,
- InvokeType original_invoke_type,
- InvokeType optimized_invoke_type,
+ InvokeType invoke_type,
+ MethodReference target_method,
ClinitCheckRequirement clinit_check_requirement)
: HInvoke(arena,
number_of_arguments,
@@ -3939,10 +3969,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
return_type,
dex_pc,
method_index,
- original_invoke_type),
+ resolved_method,
+ invoke_type),
target_method_(target_method),
dispatch_info_(dispatch_info) {
- SetPackedField<OptimizedInvokeTypeField>(optimized_invoke_type);
SetPackedField<ClinitCheckRequirementField>(clinit_check_requirement);
}
@@ -4006,14 +4036,6 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
bool HasSpecialInput() const { return GetNumberOfArguments() != InputCount(); }
- InvokeType GetOptimizedInvokeType() const {
- return GetPackedField<OptimizedInvokeTypeField>();
- }
-
- void SetOptimizedInvokeType(InvokeType invoke_type) {
- SetPackedField<OptimizedInvokeTypeField>(invoke_type);
- }
-
MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
@@ -4035,12 +4057,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
}
}
bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; }
- MethodReference GetTargetMethod() const { return target_method_; }
- void SetTargetMethod(MethodReference method) { target_method_ = method; }
- int32_t GetStringInitOffset() const {
+ QuickEntrypointEnum GetStringInitEntryPoint() const {
DCHECK(IsStringInit());
- return dispatch_info_.method_load_data;
+ return static_cast<QuickEntrypointEnum>(dispatch_info_.method_load_data);
}
uint64_t GetMethodAddress() const {
@@ -4064,7 +4084,11 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
// Is this instruction a call to a static method?
bool IsStatic() const {
- return GetOriginalInvokeType() == kStatic;
+ return GetInvokeType() == kStatic;
+ }
+
+ MethodReference GetTargetMethod() const {
+ return target_method_;
}
// Remove the HClinitCheck or the replacement HLoadClass (set as last input by
@@ -4106,26 +4130,18 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
void RemoveInputAt(size_t index);
private:
- static constexpr size_t kFieldOptimizedInvokeType = kNumberOfInvokePackedBits;
- static constexpr size_t kFieldOptimizedInvokeTypeSize =
- MinimumBitsToStore(static_cast<size_t>(kMaxInvokeType));
- static constexpr size_t kFieldClinitCheckRequirement =
- kFieldOptimizedInvokeType + kFieldOptimizedInvokeTypeSize;
+ static constexpr size_t kFieldClinitCheckRequirement = kNumberOfInvokePackedBits;
static constexpr size_t kFieldClinitCheckRequirementSize =
MinimumBitsToStore(static_cast<size_t>(ClinitCheckRequirement::kLast));
static constexpr size_t kNumberOfInvokeStaticOrDirectPackedBits =
kFieldClinitCheckRequirement + kFieldClinitCheckRequirementSize;
static_assert(kNumberOfInvokeStaticOrDirectPackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
- using OptimizedInvokeTypeField =
- BitField<InvokeType, kFieldOptimizedInvokeType, kFieldOptimizedInvokeTypeSize>;
using ClinitCheckRequirementField = BitField<ClinitCheckRequirement,
kFieldClinitCheckRequirement,
kFieldClinitCheckRequirementSize>;
- // The target method may refer to different dex file or method index than the original
- // invoke. This happens for sharpened calls and for calls where a method was redeclared
- // in derived class to increase visibility.
+ // Cached values of the resolved method, to avoid needing the mutator lock.
MethodReference target_method_;
DispatchInfo dispatch_info_;
@@ -4141,8 +4157,16 @@ class HInvokeVirtual FINAL : public HInvoke {
Primitive::Type return_type,
uint32_t dex_pc,
uint32_t dex_method_index,
+ ArtMethod* resolved_method,
uint32_t vtable_index)
- : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index, kVirtual),
+ : HInvoke(arena,
+ number_of_arguments,
+ 0u,
+ return_type,
+ dex_pc,
+ dex_method_index,
+ resolved_method,
+ kVirtual),
vtable_index_(vtable_index) {}
bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
@@ -4155,6 +4179,7 @@ class HInvokeVirtual FINAL : public HInvoke {
DECLARE_INSTRUCTION(InvokeVirtual);
private:
+ // Cached value of the resolved method, to avoid needing the mutator lock.
const uint32_t vtable_index_;
DISALLOW_COPY_AND_ASSIGN(HInvokeVirtual);
@@ -4167,8 +4192,16 @@ class HInvokeInterface FINAL : public HInvoke {
Primitive::Type return_type,
uint32_t dex_pc,
uint32_t dex_method_index,
+ ArtMethod* resolved_method,
uint32_t imt_index)
- : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index, kInterface),
+ : HInvoke(arena,
+ number_of_arguments,
+ 0u,
+ return_type,
+ dex_pc,
+ dex_method_index,
+ resolved_method,
+ kInterface),
imt_index_(imt_index) {}
bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
@@ -4182,6 +4215,7 @@ class HInvokeInterface FINAL : public HInvoke {
DECLARE_INSTRUCTION(InvokeInterface);
private:
+ // Cached value of the resolved method, to avoid needing the mutator lock.
const uint32_t imt_index_;
DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
@@ -4363,7 +4397,7 @@ class HDiv FINAL : public HBinaryOperation {
HInstruction* left,
HInstruction* right,
uint32_t dex_pc)
- : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls(), dex_pc) {}
+ : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {}
template <typename T>
T ComputeIntegral(T x, T y) const {
@@ -4398,11 +4432,6 @@ class HDiv FINAL : public HBinaryOperation {
ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
}
- static SideEffects SideEffectsForArchRuntimeCalls() {
- // The generated code can use a runtime call.
- return SideEffects::CanTriggerGC();
- }
-
DECLARE_INSTRUCTION(Div);
private:
@@ -4415,7 +4444,7 @@ class HRem FINAL : public HBinaryOperation {
HInstruction* left,
HInstruction* right,
uint32_t dex_pc)
- : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls(), dex_pc) {}
+ : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {}
template <typename T>
T ComputeIntegral(T x, T y) const {
@@ -4450,10 +4479,6 @@ class HRem FINAL : public HBinaryOperation {
ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
}
- static SideEffects SideEffectsForArchRuntimeCalls() {
- return SideEffects::CanTriggerGC();
- }
-
DECLARE_INSTRUCTION(Rem);
private:
@@ -4906,9 +4931,7 @@ class HTypeConversion FINAL : public HExpression<1> {
public:
// Instantiate a type conversion of `input` to `result_type`.
HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc)
- : HExpression(result_type,
- SideEffectsForArchRuntimeCalls(input->GetType(), result_type),
- dex_pc) {
+ : HExpression(result_type, SideEffects::None(), dex_pc) {
SetRawInputAt(0, input);
// Invariant: We should never generate a conversion to a Boolean value.
DCHECK_NE(Primitive::kPrimBoolean, result_type);
@@ -4927,18 +4950,6 @@ class HTypeConversion FINAL : public HExpression<1> {
// containing the result. If the input cannot be converted, return nullptr.
HConstant* TryStaticEvaluation() const;
- static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type input_type,
- Primitive::Type result_type) {
- // Some architectures may not require the 'GC' side effects, but at this point
- // in the compilation process we do not know what architecture we will
- // generate code for, so we must be conservative.
- if ((Primitive::IsFloatingPointType(input_type) && Primitive::IsIntegralType(result_type))
- || (input_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(result_type))) {
- return SideEffects::CanTriggerGC();
- }
- return SideEffects::None();
- }
-
DECLARE_INSTRUCTION(TypeConversion);
private:
@@ -5020,9 +5031,7 @@ class HInstanceFieldGet FINAL : public HExpression<1> {
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache,
uint32_t dex_pc)
- : HExpression(field_type,
- SideEffects::FieldReadOfType(field_type, is_volatile),
- dex_pc),
+ : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc),
field_info_(field_offset,
field_type,
is_volatile,
@@ -5073,8 +5082,7 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> {
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache,
uint32_t dex_pc)
- : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile),
- dex_pc),
+ : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc),
field_info_(field_offset,
field_type,
is_volatile,
@@ -5441,7 +5449,8 @@ class HLoadClass FINAL : public HInstruction {
bool is_referrers_class,
uint32_t dex_pc,
bool needs_access_check,
- bool is_in_dex_cache)
+ bool is_in_dex_cache,
+ bool is_in_boot_image)
: HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc),
special_input_(HUserRecord<HInstruction*>(current_method)),
type_index_(type_index),
@@ -5455,6 +5464,7 @@ class HLoadClass FINAL : public HInstruction {
is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod);
SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
SetPackedFlag<kFlagIsInDexCache>(is_in_dex_cache);
+ SetPackedFlag<kFlagIsInBootImage>(is_in_boot_image);
SetPackedFlag<kFlagGenerateClInitCheck>(false);
}
@@ -5545,6 +5555,7 @@ class HLoadClass FINAL : public HInstruction {
bool IsReferrersClass() const { return GetLoadKind() == LoadKind::kReferrersClass; }
bool NeedsAccessCheck() const { return GetPackedFlag<kFlagNeedsAccessCheck>(); }
bool IsInDexCache() const { return GetPackedFlag<kFlagIsInDexCache>(); }
+ bool IsInBootImage() const { return GetPackedFlag<kFlagIsInBootImage>(); }
bool MustGenerateClinitCheck() const { return GetPackedFlag<kFlagGenerateClInitCheck>(); }
void MarkInDexCache() {
@@ -5554,6 +5565,10 @@ class HLoadClass FINAL : public HInstruction {
SetSideEffects(SideEffects::None());
}
+ void MarkInBootImage() {
+ SetPackedFlag<kFlagIsInBootImage>(true);
+ }
+
void AddSpecialInput(HInstruction* special_input);
using HInstruction::GetInputRecords; // Keep the const version visible.
@@ -5571,9 +5586,10 @@ class HLoadClass FINAL : public HInstruction {
private:
static constexpr size_t kFlagNeedsAccessCheck = kNumberOfGenericPackedBits;
static constexpr size_t kFlagIsInDexCache = kFlagNeedsAccessCheck + 1;
+ static constexpr size_t kFlagIsInBootImage = kFlagIsInDexCache + 1;
// Whether this instruction must generate the initialization check.
// Used for code generation.
- static constexpr size_t kFlagGenerateClInitCheck = kFlagIsInDexCache + 1;
+ static constexpr size_t kFlagGenerateClInitCheck = kFlagIsInBootImage + 1;
static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1;
static constexpr size_t kFieldLoadKindSize =
MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
@@ -5658,10 +5674,9 @@ class HLoadString FINAL : public HInstruction {
// Used for strings outside the boot image referenced by JIT-compiled code.
kDexCacheAddress,
- // Load from resolved strings array in the dex cache using a PC-relative load.
- // Used for strings outside boot image when we know that we can access
- // the dex cache arrays using a PC-relative load.
- kDexCachePcRelative,
+ // Load from an entry in the .bss section using a PC-relative load.
+ // Used for strings outside boot image when .bss is accessible with a PC-relative load.
+ kBssEntry,
// Load from resolved strings array accessed through the class loaded from
// the compiled method's own ArtMethod*. This is the default access type when
@@ -5680,7 +5695,7 @@ class HLoadString FINAL : public HInstruction {
string_index_(string_index) {
SetPackedFlag<kFlagIsInDexCache>(false);
SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod);
- load_data_.ref.dex_file = &dex_file;
+ load_data_.dex_file_ = &dex_file;
}
void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) {
@@ -5693,20 +5708,11 @@ class HLoadString FINAL : public HInstruction {
const DexFile& dex_file,
uint32_t string_index) {
DCHECK(HasStringReference(load_kind));
- load_data_.ref.dex_file = &dex_file;
+ load_data_.dex_file_ = &dex_file;
string_index_ = string_index;
SetLoadKindInternal(load_kind);
}
- void SetLoadKindWithDexCacheReference(LoadKind load_kind,
- const DexFile& dex_file,
- uint32_t element_index) {
- DCHECK(HasDexCacheReference(load_kind));
- load_data_.ref.dex_file = &dex_file;
- load_data_.ref.dex_cache_element_index = element_index;
- SetLoadKindInternal(load_kind);
- }
-
LoadKind GetLoadKind() const {
return GetPackedField<LoadKindField>();
}
@@ -5718,8 +5724,6 @@ class HLoadString FINAL : public HInstruction {
return string_index_;
}
- uint32_t GetDexCacheElementOffset() const;
-
uint64_t GetAddress() const {
DCHECK(HasAddress(GetLoadKind()));
return load_data_.address;
@@ -5789,6 +5793,7 @@ class HLoadString FINAL : public HInstruction {
static bool HasStringReference(LoadKind load_kind) {
return load_kind == LoadKind::kBootImageLinkTimeAddress ||
load_kind == LoadKind::kBootImageLinkTimePcRelative ||
+ load_kind == LoadKind::kBssEntry ||
load_kind == LoadKind::kDexCacheViaMethod;
}
@@ -5796,10 +5801,6 @@ class HLoadString FINAL : public HInstruction {
return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress;
}
- static bool HasDexCacheReference(LoadKind load_kind) {
- return load_kind == LoadKind::kDexCachePcRelative;
- }
-
void SetLoadKindInternal(LoadKind load_kind);
// The special input is the HCurrentMethod for kDexCacheViaMethod.
@@ -5812,10 +5813,7 @@ class HLoadString FINAL : public HInstruction {
uint32_t string_index_;
union {
- struct {
- const DexFile* dex_file; // For string reference and dex cache reference.
- uint32_t dex_cache_element_index; // Only for dex cache reference.
- } ref;
+ const DexFile* dex_file_; // For string reference.
uint64_t address; // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets.
} load_data_;
@@ -5825,15 +5823,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs);
// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
inline const DexFile& HLoadString::GetDexFile() const {
- DCHECK(HasStringReference(GetLoadKind()) || HasDexCacheReference(GetLoadKind()))
- << GetLoadKind();
- return *load_data_.ref.dex_file;
-}
-
-// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
-inline uint32_t HLoadString::GetDexCacheElementOffset() const {
- DCHECK(HasDexCacheReference(GetLoadKind())) << GetLoadKind();
- return load_data_.ref.dex_cache_element_index;
+ DCHECK(HasStringReference(GetLoadKind())) << GetLoadKind();
+ return *load_data_.dex_file_;
}
// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
@@ -5841,7 +5832,7 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
// The special input is used for PC-relative loads on some architectures,
// including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
- GetLoadKind() == LoadKind::kDexCachePcRelative ||
+ GetLoadKind() == LoadKind::kBssEntry ||
GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
// HLoadString::GetInputRecords() returns an empty array at this point,
@@ -5895,9 +5886,7 @@ class HStaticFieldGet FINAL : public HExpression<1> {
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache,
uint32_t dex_pc)
- : HExpression(field_type,
- SideEffects::FieldReadOfType(field_type, is_volatile),
- dex_pc),
+ : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc),
field_info_(field_offset,
field_type,
is_volatile,
@@ -5945,8 +5934,7 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> {
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache,
uint32_t dex_pc)
- : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile),
- dex_pc),
+ : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc),
field_info_(field_offset,
field_type,
is_volatile,
@@ -6223,7 +6211,7 @@ class HInstanceOf FINAL : public HExpression<2> {
class HBoundType FINAL : public HExpression<1> {
public:
- HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc)
+ explicit HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc)
: HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc),
upper_bound_(ReferenceTypeInfo::CreateInvalid()) {
SetPackedFlag<kFlagUpperCanBeNull>(true);
@@ -6644,95 +6632,6 @@ class HGraphDelegateVisitor : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(HGraphDelegateVisitor);
};
-class HInsertionOrderIterator : public ValueObject {
- public:
- explicit HInsertionOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {}
-
- bool Done() const { return index_ == graph_.GetBlocks().size(); }
- HBasicBlock* Current() const { return graph_.GetBlocks()[index_]; }
- void Advance() { ++index_; }
-
- private:
- const HGraph& graph_;
- size_t index_;
-
- DISALLOW_COPY_AND_ASSIGN(HInsertionOrderIterator);
-};
-
-class HReversePostOrderIterator : public ValueObject {
- public:
- explicit HReversePostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {
- // Check that reverse post order of the graph has been built.
- DCHECK(!graph.GetReversePostOrder().empty());
- }
-
- bool Done() const { return index_ == graph_.GetReversePostOrder().size(); }
- HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_]; }
- void Advance() { ++index_; }
-
- private:
- const HGraph& graph_;
- size_t index_;
-
- DISALLOW_COPY_AND_ASSIGN(HReversePostOrderIterator);
-};
-
-class HPostOrderIterator : public ValueObject {
- public:
- explicit HPostOrderIterator(const HGraph& graph)
- : graph_(graph), index_(graph_.GetReversePostOrder().size()) {
- // Check that reverse post order of the graph has been built.
- DCHECK(!graph.GetReversePostOrder().empty());
- }
-
- bool Done() const { return index_ == 0; }
- HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_ - 1u]; }
- void Advance() { --index_; }
-
- private:
- const HGraph& graph_;
- size_t index_;
-
- DISALLOW_COPY_AND_ASSIGN(HPostOrderIterator);
-};
-
-class HLinearPostOrderIterator : public ValueObject {
- public:
- explicit HLinearPostOrderIterator(const HGraph& graph)
- : order_(graph.GetLinearOrder()), index_(graph.GetLinearOrder().size()) {}
-
- bool Done() const { return index_ == 0; }
-
- HBasicBlock* Current() const { return order_[index_ - 1u]; }
-
- void Advance() {
- --index_;
- DCHECK_GE(index_, 0U);
- }
-
- private:
- const ArenaVector<HBasicBlock*>& order_;
- size_t index_;
-
- DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator);
-};
-
-class HLinearOrderIterator : public ValueObject {
- public:
- explicit HLinearOrderIterator(const HGraph& graph)
- : order_(graph.GetLinearOrder()), index_(0) {}
-
- bool Done() const { return index_ == order_.size(); }
- HBasicBlock* Current() const { return order_[index_]; }
- void Advance() { ++index_; }
-
- private:
- const ArenaVector<HBasicBlock*>& order_;
- size_t index_;
-
- DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
-};
-
// Iterator over the blocks that art part of the loop. Includes blocks part
// of an inner loop. The order in which the blocks are iterated is on their
// block id.
diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h
index de77245e17..36431c1fb9 100644
--- a/compiler/optimizing/nodes_mips.h
+++ b/compiler/optimizing/nodes_mips.h
@@ -66,6 +66,41 @@ class HMipsDexCacheArraysBase : public HExpression<0> {
DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase);
};
+// Mips version of HPackedSwitch that holds a pointer to the base method address.
+class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> {
+ public:
+ HMipsPackedSwitch(int32_t start_value,
+ int32_t num_entries,
+ HInstruction* input,
+ HMipsComputeBaseMethodAddress* method_base,
+ uint32_t dex_pc)
+ : HTemplateInstruction(SideEffects::None(), dex_pc),
+ start_value_(start_value),
+ num_entries_(num_entries) {
+ SetRawInputAt(0, input);
+ SetRawInputAt(1, method_base);
+ }
+
+ bool IsControlFlow() const OVERRIDE { return true; }
+
+ int32_t GetStartValue() const { return start_value_; }
+
+ int32_t GetNumEntries() const { return num_entries_; }
+
+ HBasicBlock* GetDefaultBlock() const {
+ // Last entry is the default block.
+ return GetBlock()->GetSuccessors()[num_entries_];
+ }
+
+ DECLARE_INSTRUCTION(MipsPackedSwitch);
+
+ private:
+ const int32_t start_value_;
+ const int32_t num_entries_;
+
+ DISALLOW_COPY_AND_ASSIGN(HMipsPackedSwitch);
+};
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index 8bd8667f84..814202e97b 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -17,6 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
#define ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
+// This `#include` should never be used by compilation, as this file (`nodes_shared.h`) is included
+// in `nodes.h`. However it helps editing tools (e.g. YouCompleteMe) by giving them better context
+// (defining `HInstruction` and co).
+#include "nodes.h"
+
namespace art {
class HMultiplyAccumulate FINAL : public HExpression<3> {
@@ -117,10 +122,15 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation {
// This instruction computes an intermediate address pointing in the 'middle' of an object. The
// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
// never used across anything that can trigger GC.
+// The result of this instruction is not a pointer in the sense of `Primitive::kPrimNot`. So we
+// represent it by the type `Primitive::kPrimInt`.
class HIntermediateAddress FINAL : public HExpression<2> {
public:
HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
- : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
+ : HExpression(Primitive::kPrimInt, SideEffects::DependsOnGC(), dex_pc) {
+ DCHECK_EQ(Primitive::ComponentSize(Primitive::kPrimInt),
+ Primitive::ComponentSize(Primitive::kPrimNot))
+ << "kPrimInt and kPrimNot have different sizes.";
SetRawInputAt(0, base_address);
SetRawInputAt(1, offset);
}
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 2f59d4cd5b..0819fb01ac 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -37,7 +37,10 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> {
virtual ~HOptimization() {}
- // Return the name of the pass.
+ // Return the name of the pass. Pass names for a single HOptimization should be of form
+ // <optimization_name> or <optimization_name>$<pass_name> for common <optimization_name> prefix.
+ // Example: 'instruction_simplifier', 'instruction_simplifier$after_bce',
+ // 'instruction_simplifier$before_codegen'.
const char* GetPassName() const { return pass_name_; }
// Perform the analysis itself.
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index a6d234d739..013e110b87 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -19,6 +19,7 @@
#include "arch/instruction_set.h"
#include "cfi_test.h"
+#include "driver/compiler_options.h"
#include "gtest/gtest.h"
#include "optimizing/code_generator.h"
#include "optimizing/optimizing_unit_test.h"
@@ -51,7 +52,7 @@ class OptimizingCFITest : public CFITest {
void SetUpFrame(InstructionSet isa) {
// Setup simple context.
std::string error;
- isa_features_.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+ isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error);
graph_ = CreateGraph(&allocator_);
// Generate simple frame with some spills.
code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_);
@@ -157,13 +158,28 @@ class OptimizingCFITest : public CFITest {
TestImpl(isa, #isa, expected_asm, expected_cfi); \
}
+// TODO(VIXL): Support this test for the VIXL backend.
+#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND)
TEST_ISA(kThumb2)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
TEST_ISA(kArm64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
TEST_ISA(kX86)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
TEST_ISA(kX86_64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
TEST_ISA(kMips)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
TEST_ISA(kMips64)
+#endif
+// TODO(VIXL): Support this test for the VIXL backend.
+#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND)
TEST_F(OptimizingCFITest, kThumb2Adjust) {
std::vector<uint8_t> expected_asm(
expected_asm_kThumb2_adjust,
@@ -184,7 +200,9 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) {
Finish();
Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi);
}
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
TEST_F(OptimizingCFITest, kMipsAdjust) {
// One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
static constexpr size_t kNumNops = 1u + (1u << 15);
@@ -212,7 +230,9 @@ TEST_F(OptimizingCFITest, kMipsAdjust) {
Finish();
Check(kMips, "kMips_adjust", expected_asm, expected_cfi);
}
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
TEST_F(OptimizingCFITest, kMips64Adjust) {
// One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
static constexpr size_t kNumNops = 1u + (1u << 15);
@@ -240,6 +260,7 @@ TEST_F(OptimizingCFITest, kMips64Adjust) {
Finish();
Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi);
}
+#endif
#endif // ART_TARGET_ANDROID
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 05eb06333e..f735dc8cb3 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -1,10 +1,10 @@
static constexpr uint8_t expected_asm_kThumb2[] = {
- 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
+ 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x0B, 0xB0,
0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD,
};
static constexpr uint8_t expected_cfi_kThumb2[] = {
0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
- 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
+ 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x0A, 0x42,
0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E,
0x40,
};
@@ -19,20 +19,19 @@ static constexpr uint8_t expected_cfi_kThumb2[] = {
// 0x00000006: .cfi_offset_extended: r81 at cfa-16
// 0x00000006: sub sp, sp, #44
// 0x00000008: .cfi_def_cfa_offset: 64
-// 0x00000008: str r0, [sp, #0]
-// 0x0000000a: .cfi_remember_state
-// 0x0000000a: add sp, sp, #44
-// 0x0000000c: .cfi_def_cfa_offset: 20
-// 0x0000000c: vpop.f32 {s16-s17}
-// 0x00000010: .cfi_def_cfa_offset: 12
-// 0x00000010: .cfi_restore_extended: r80
-// 0x00000010: .cfi_restore_extended: r81
-// 0x00000010: pop {r5, r6, pc}
-// 0x00000012: .cfi_restore_state
-// 0x00000012: .cfi_def_cfa_offset: 64
+// 0x00000008: .cfi_remember_state
+// 0x00000008: add sp, sp, #44
+// 0x0000000a: .cfi_def_cfa_offset: 20
+// 0x0000000a: vpop.f32 {s16-s17}
+// 0x0000000e: .cfi_def_cfa_offset: 12
+// 0x0000000e: .cfi_restore_extended: r80
+// 0x0000000e: .cfi_restore_extended: r81
+// 0x0000000e: pop {r5, r6, pc}
+// 0x00000010: .cfi_restore_state
+// 0x00000010: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kArm64[] = {
- 0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9,
+ 0xFF, 0x03, 0x01, 0xD1, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9,
0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9,
0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
};
@@ -41,7 +40,7 @@ static constexpr uint8_t expected_cfi_kArm64[] = {
0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
};
-// 0x00000000: str x0, [sp, #-64]!
+// 0x00000000: sub sp, sp, #0x40 (64)
// 0x00000004: .cfi_def_cfa_offset: 64
// 0x00000004: str x20, [sp, #40]
// 0x00000008: .cfi_offset: r20 at cfa-24
@@ -67,12 +66,12 @@ static constexpr uint8_t expected_cfi_kArm64[] = {
// 0x00000024: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kX86[] = {
- 0x56, 0x55, 0x83, 0xEC, 0x34, 0x89, 0x04, 0x24, 0x83, 0xC4, 0x34, 0x5D,
+ 0x56, 0x55, 0x83, 0xEC, 0x34, 0x83, 0xC4, 0x34, 0x5D,
0x5E, 0xC3,
};
static constexpr uint8_t expected_cfi_kX86[] = {
0x41, 0x0E, 0x08, 0x86, 0x02, 0x41, 0x0E, 0x0C, 0x85, 0x03, 0x43, 0x0E,
- 0x40, 0x43, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E,
+ 0x40, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E,
0x04, 0xC6, 0x41, 0x0B, 0x0E, 0x40,
};
// 0x00000000: push esi
@@ -83,29 +82,28 @@ static constexpr uint8_t expected_cfi_kX86[] = {
// 0x00000002: .cfi_offset: r5 at cfa-12
// 0x00000002: sub esp, 52
// 0x00000005: .cfi_def_cfa_offset: 64
-// 0x00000005: mov [esp], eax
-// 0x00000008: .cfi_remember_state
-// 0x00000008: add esp, 52
-// 0x0000000b: .cfi_def_cfa_offset: 12
-// 0x0000000b: pop ebp
-// 0x0000000c: .cfi_def_cfa_offset: 8
-// 0x0000000c: .cfi_restore: r5
-// 0x0000000c: pop esi
-// 0x0000000d: .cfi_def_cfa_offset: 4
-// 0x0000000d: .cfi_restore: r6
-// 0x0000000d: ret
-// 0x0000000e: .cfi_restore_state
-// 0x0000000e: .cfi_def_cfa_offset: 64
+// 0x00000005: .cfi_remember_state
+// 0x00000005: add esp, 52
+// 0x00000008: .cfi_def_cfa_offset: 12
+// 0x00000008: pop ebp
+// 0x0000000a: .cfi_def_cfa_offset: 8
+// 0x0000000a: .cfi_restore: r5
+// 0x0000000a: pop esi
+// 0x0000000b: .cfi_def_cfa_offset: 4
+// 0x0000000b: .cfi_restore: r6
+// 0x0000000b: ret
+// 0x0000000c: .cfi_restore_state
+// 0x0000000c: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kX86_64[] = {
0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24,
- 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x48, 0x89, 0x3C, 0x24,
+ 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18,
0xF2, 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C,
0x24, 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3,
};
static constexpr uint8_t expected_cfi_kX86_64[] = {
0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E,
- 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x44, 0x0A, 0x47, 0xDD, 0x47,
+ 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x0A, 0x47, 0xDD, 0x47,
0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6,
0x41, 0x0B, 0x0E, 0x40,
};
@@ -121,35 +119,34 @@ static constexpr uint8_t expected_cfi_kX86_64[] = {
// 0x0000000d: .cfi_offset: r30 at cfa-32
// 0x0000000d: movsd [rsp + 24], xmm12
// 0x00000014: .cfi_offset: r29 at cfa-40
-// 0x00000014: movq [rsp], rdi
-// 0x00000018: .cfi_remember_state
-// 0x00000018: movsd xmm12, [rsp + 24]
-// 0x0000001f: .cfi_restore: r29
-// 0x0000001f: movsd xmm13, [rsp + 32]
-// 0x00000026: .cfi_restore: r30
-// 0x00000026: addq rsp, 40
-// 0x0000002a: .cfi_def_cfa_offset: 24
-// 0x0000002a: pop rbx
-// 0x0000002b: .cfi_def_cfa_offset: 16
-// 0x0000002b: .cfi_restore: r3
-// 0x0000002b: pop rbp
-// 0x0000002c: .cfi_def_cfa_offset: 8
-// 0x0000002c: .cfi_restore: r6
-// 0x0000002c: ret
-// 0x0000002d: .cfi_restore_state
-// 0x0000002d: .cfi_def_cfa_offset: 64
+// 0x00000014: .cfi_remember_state
+// 0x00000014: movsd xmm12, [rsp + 24]
+// 0x0000001c: .cfi_restore: r29
+// 0x0000001c: movsd xmm13, [rsp + 32]
+// 0x00000022: .cfi_restore: r30
+// 0x00000022: addq rsp, 40
+// 0x00000026: .cfi_def_cfa_offset: 24
+// 0x00000026: pop rbx
+// 0x00000027: .cfi_def_cfa_offset: 16
+// 0x00000027: .cfi_restore: r3
+// 0x00000027: pop rbp
+// 0x00000028: .cfi_def_cfa_offset: 8
+// 0x00000028: .cfi_restore: r6
+// 0x00000028: ret
+// 0x00000029: .cfi_restore_state
+// 0x00000029: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kMips[] = {
0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
- 0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F,
+ 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F,
0x34, 0x00, 0xB0, 0x8F, 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7,
- 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+ 0x09, 0x00, 0xE0, 0x03, 0x40, 0x00, 0xBD, 0x27,
};
static constexpr uint8_t expected_cfi_kMips[] = {
0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
- 0x4C, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48,
- 0x0B, 0x0E, 0x40,
+ 0x48, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B,
+ 0x0E, 0x40,
};
// 0x00000000: addiu r29, r29, -64
// 0x00000004: .cfi_def_cfa_offset: 64
@@ -161,34 +158,33 @@ static constexpr uint8_t expected_cfi_kMips[] = {
// 0x00000010: .cfi_offset: r16 at cfa-12
// 0x00000010: sdc1 f22, +40(r29)
// 0x00000014: sdc1 f20, +32(r29)
-// 0x00000018: sw r4, +0(r29)
-// 0x0000001c: .cfi_remember_state
-// 0x0000001c: lw r31, +60(r29)
-// 0x00000020: .cfi_restore: r31
-// 0x00000020: lw r17, +56(r29)
-// 0x00000024: .cfi_restore: r17
-// 0x00000024: lw r16, +52(r29)
-// 0x00000028: .cfi_restore: r16
-// 0x00000028: ldc1 f22, +40(r29)
-// 0x0000002c: ldc1 f20, +32(r29)
+// 0x00000018: .cfi_remember_state
+// 0x00000018: lw r31, +60(r29)
+// 0x0000001c: .cfi_restore: r31
+// 0x0000001c: lw r17, +56(r29)
+// 0x00000020: .cfi_restore: r17
+// 0x00000020: lw r16, +52(r29)
+// 0x00000024: .cfi_restore: r16
+// 0x00000024: ldc1 f22, +40(r29)
+// 0x00000028: ldc1 f20, +32(r29)
+// 0x0000002c: jr r31
// 0x00000030: addiu r29, r29, 64
// 0x00000034: .cfi_def_cfa_offset: 0
-// 0x00000034: jr r31
-// 0x00000038: nop
-// 0x0000003c: .cfi_restore_state
-// 0x0000003c: .cfi_def_cfa_offset: 64
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kMips64[] = {
0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
- 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x18, 0x00, 0xBD, 0x67,
+ 0xE8, 0xFF, 0xBD, 0x67, 0x18, 0x00, 0xBD, 0x67,
0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF,
0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67,
0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
};
+
static constexpr uint8_t expected_cfi_kMips64[] = {
0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
- 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x44, 0x0A, 0x44,
+ 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x0A, 0x44,
0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
};
@@ -206,29 +202,28 @@ static constexpr uint8_t expected_cfi_kMips64[] = {
// 0x00000018: .cfi_offset: r56 at cfa-40
// 0x00000018: daddiu r29, r29, -24
// 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: sd r4, +0(r29)
-// 0x00000020: .cfi_remember_state
-// 0x00000020: daddiu r29, r29, 24
-// 0x00000024: .cfi_def_cfa_offset: 40
-// 0x00000024: ldc1 f24, +0(r29)
-// 0x00000028: .cfi_restore: r56
-// 0x00000028: ldc1 f25, +8(r29)
-// 0x0000002c: .cfi_restore: r57
-// 0x0000002c: ld r16, +16(r29)
-// 0x00000030: .cfi_restore: r16
-// 0x00000030: ld r17, +24(r29)
-// 0x00000034: .cfi_restore: r17
-// 0x00000034: ld r31, +32(r29)
-// 0x00000038: .cfi_restore: r31
-// 0x00000038: daddiu r29, r29, 40
-// 0x0000003c: .cfi_def_cfa_offset: 0
-// 0x0000003c: jr r31
-// 0x00000040: nop
-// 0x00000044: .cfi_restore_state
-// 0x00000044: .cfi_def_cfa_offset: 64
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: daddiu r29, r29, 24
+// 0x00000020: .cfi_def_cfa_offset: 40
+// 0x00000020: ldc1 f24, +0(r29)
+// 0x00000024: .cfi_restore: r56
+// 0x00000024: ldc1 f25, +8(r29)
+// 0x00000028: .cfi_restore: r57
+// 0x00000028: ld r16, +16(r29)
+// 0x0000002c: .cfi_restore: r16
+// 0x0000002c: ld r17, +24(r29)
+// 0x00000030: .cfi_restore: r17
+// 0x00000030: ld r31, +32(r29)
+// 0x00000034: .cfi_restore: r31
+// 0x00000034: daddiu r29, r29, 40
+// 0x00000038: .cfi_def_cfa_offset: 0
+// 0x00000038: jr r31
+// 0x0000003c: nop
+// 0x00000040: .cfi_restore_state
+// 0x00000040: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
- 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x00, 0x28,
+ 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x28,
0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
@@ -244,7 +239,7 @@ static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
};
static constexpr uint8_t expected_cfi_kThumb2_adjust[] = {
0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
- 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A,
+ 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x86, 0x0A,
0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B,
0x0E, 0x40,
};
@@ -259,9 +254,9 @@ static constexpr uint8_t expected_cfi_kThumb2_adjust[] = {
// 0x00000006: .cfi_offset_extended: r81 at cfa-16
// 0x00000006: sub sp, sp, #44
// 0x00000008: .cfi_def_cfa_offset: 64
-// 0x00000008: str r0, [sp, #0]
-// 0x0000000a: cmp r0, #0
-// 0x0000000c: beq +128 (0x00000090)
+// 0x00000008: cmp r0, #0
+// 0x0000000a: beq +128 (0x00000090)
+// 0x0000000c: ldr r0, [r0, #0]
// 0x0000000e: ldr r0, [r0, #0]
// 0x00000010: ldr r0, [r0, #0]
// 0x00000012: ldr r0, [r0, #0]
@@ -326,36 +321,34 @@ static constexpr uint8_t expected_cfi_kThumb2_adjust[] = {
// 0x00000088: ldr r0, [r0, #0]
// 0x0000008a: ldr r0, [r0, #0]
// 0x0000008c: ldr r0, [r0, #0]
-// 0x0000008e: ldr r0, [r0, #0]
-// 0x00000090: .cfi_remember_state
-// 0x00000090: add sp, sp, #44
-// 0x00000092: .cfi_def_cfa_offset: 20
-// 0x00000092: vpop.f32 {s16-s17}
-// 0x00000096: .cfi_def_cfa_offset: 12
-// 0x00000096: .cfi_restore_extended: r80
-// 0x00000096: .cfi_restore_extended: r81
-// 0x00000096: pop {r5, r6, pc}
-// 0x00000098: .cfi_restore_state
-// 0x00000098: .cfi_def_cfa_offset: 64
+// 0x0000008e: .cfi_remember_state
+// 0x0000008e: add sp, sp, #44
+// 0x00000090: .cfi_def_cfa_offset: 20
+// 0x00000090: vpop.f32 {s16-s17}
+// 0x00000094: .cfi_def_cfa_offset: 12
+// 0x00000094: .cfi_restore_extended: r80
+// 0x00000094: .cfi_restore_extended: r81
+// 0x00000094: pop {r5, r6, pc}
+// 0x00000096: .cfi_restore_state
+// 0x00000096: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kMips_adjust_head[] = {
0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
- 0x00, 0x00, 0xA4, 0xAF, 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
+ 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C,
0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F,
0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27,
};
static constexpr uint8_t expected_asm_kMips_adjust_tail[] = {
0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F,
- 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x40, 0x00, 0xBD, 0x27,
- 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+ 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x09, 0x00, 0xE0, 0x03,
+ 0x40, 0x00, 0xBD, 0x27,
};
static constexpr uint8_t expected_cfi_kMips_adjust[] = {
0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
- 0x54, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
- 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48, 0x0B, 0x0E,
- 0x40,
+ 0x50, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
+ 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40,
};
// 0x00000000: addiu r29, r29, -64
// 0x00000004: .cfi_def_cfa_offset: 64
@@ -367,42 +360,40 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = {
// 0x00000010: .cfi_offset: r16 at cfa-12
// 0x00000010: sdc1 f22, +40(r29)
// 0x00000014: sdc1 f20, +32(r29)
-// 0x00000018: sw r4, +0(r29)
-// 0x0000001c: bne r0, r4, 0x00000040 ; +36
-// 0x00000020: addiu r29, r29, -4
-// 0x00000024: .cfi_def_cfa_offset: 68
-// 0x00000024: sw r31, +0(r29)
-// 0x00000028: bltzal r0, 0x0000002c ; +4
-// 0x0000002c: lui r1, 0x20000
-// 0x00000030: ori r1, r1, 24
-// 0x00000034: addu r1, r1, r31
-// 0x00000038: lw r31, +0(r29)
-// 0x0000003c: jr r1
-// 0x00000040: addiu r29, r29, 4
-// 0x00000044: .cfi_def_cfa_offset: 64
-// 0x00000044: nop
+// 0x00000018: bne r0, r4, 0x00000040 ; +36
+// 0x0000001c: addiu r29, r29, -4
+// 0x00000020: .cfi_def_cfa_offset: 68
+// 0x00000020: sw r31, +0(r29)
+// 0x00000024: bltzal r0, 0x0000002c ; +4
+// 0x00000028: lui r1, 0x20000
+// 0x0000002c: ori r1, r1, 24
+// 0x00000030: addu r1, r1, r31
+// 0x00000034: lw r31, +0(r29)
+// 0x00000038: jr r1
+// 0x0000003c: addiu r29, r29, 4
+// 0x00000040: .cfi_def_cfa_offset: 64
+// 0x00000040: nop
// ...
-// 0x00020044: nop
-// 0x00020048: .cfi_remember_state
-// 0x00020048: lw r31, +60(r29)
-// 0x0002004c: .cfi_restore: r31
-// 0x0002004c: lw r17, +56(r29)
-// 0x00020050: .cfi_restore: r17
-// 0x00020050: lw r16, +52(r29)
-// 0x00020054: .cfi_restore: r16
-// 0x00020054: ldc1 f22, +40(r29)
-// 0x00020058: ldc1 f20, +32(r29)
+// 0x00020040: nop
+// 0x00020044: .cfi_remember_state
+// 0x00020044: lw r31, +60(r29)
+// 0x00020048: .cfi_restore: r31
+// 0x00020048: lw r17, +56(r29)
+// 0x0002004c: .cfi_restore: r17
+// 0x0002004c: lw r16, +52(r29)
+// 0x00020050: .cfi_restore: r16
+// 0x00020050: ldc1 f22, +40(r29)
+// 0x00020054: ldc1 f20, +32(r29)
+// 0x00020058: jr r31
// 0x0002005c: addiu r29, r29, 64
// 0x00020060: .cfi_def_cfa_offset: 0
-// 0x00020060: jr r31
-// 0x00020064: nop
-// 0x00020068: .cfi_restore_state
-// 0x00020068: .cfi_def_cfa_offset: 64
+// 0x00020060: .cfi_restore_state
+// 0x00020060: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
- 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60,
+ 0xE8, 0xFF, 0xBD, 0x67, 0x02, 0x00, 0xA6, 0x60,
0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
};
static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = {
@@ -412,7 +403,7 @@ static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = {
};
static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
- 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x14, 0x00,
+ 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x10, 0x00,
0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0,
0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
};
@@ -430,29 +421,28 @@ static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
// 0x00000018: .cfi_offset: r56 at cfa-40
// 0x00000018: daddiu r29, r29, -24
// 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: sd r4, +0(r29)
-// 0x00000020: bnec r5, r6, 0x0000002c ; +12
-// 0x00000024: auipc r1, 2
-// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080
-// 0x0000002c: nop
+// 0x0000001c: bnec r5, r6, 0x0000002c ; +12
+// 0x00000020: auipc r1, 2
+// 0x00000024: jic r1, 12 ; b 0x00020030 ; +131080
+// 0x00000028: nop
// ...
-// 0x0002002c: nop
-// 0x00020030: .cfi_remember_state
-// 0x00020030: daddiu r29, r29, 24
-// 0x00020034: .cfi_def_cfa_offset: 40
-// 0x00020034: ldc1 f24, +0(r29)
-// 0x00020038: .cfi_restore: r56
-// 0x00020038: ldc1 f25, +8(r29)
-// 0x0002003c: .cfi_restore: r57
-// 0x0002003c: ld r16, +16(r29)
-// 0x00020040: .cfi_restore: r16
-// 0x00020040: ld r17, +24(r29)
-// 0x00020044: .cfi_restore: r17
-// 0x00020044: ld r31, +32(r29)
-// 0x00020048: .cfi_restore: r31
-// 0x00020048: daddiu r29, r29, 40
-// 0x0002004c: .cfi_def_cfa_offset: 0
-// 0x0002004c: jr r31
-// 0x00020050: nop
-// 0x00020054: .cfi_restore_state
-// 0x00020054: .cfi_def_cfa_offset: 64
+// 0x00020028: nop
+// 0x0002002c: .cfi_remember_state
+// 0x0002002c: daddiu r29, r29, 24
+// 0x00020030: .cfi_def_cfa_offset: 40
+// 0x00020030: ldc1 f24, +0(r29)
+// 0x00020034: .cfi_restore: r56
+// 0x00020034: ldc1 f25, +8(r29)
+// 0x00020038: .cfi_restore: r57
+// 0x00020038: ld r16, +16(r29)
+// 0x0002003c: .cfi_restore: r16
+// 0x0002003c: ld r17, +24(r29)
+// 0x00020040: .cfi_restore: r17
+// 0x00020040: ld r31, +32(r29)
+// 0x00020044: .cfi_restore: r31
+// 0x00020044: daddiu r29, r29, 40
+// 0x00020047: .cfi_def_cfa_offset: 0
+// 0x00020048: jr r31
+// 0x0002004c: nop
+// 0x00020050: .cfi_restore_state
+// 0x00020050: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d5b0d77fe5..8c769270b1 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -18,6 +18,8 @@
#include <fstream>
#include <memory>
+#include <sstream>
+
#include <stdint.h>
#ifdef ART_ENABLE_CODEGEN_arm
@@ -46,6 +48,7 @@
#include "base/arena_containers.h"
#include "base/dumpable.h"
#include "base/macros.h"
+#include "base/mutex.h"
#include "base/timing_logger.h"
#include "bounds_check_elimination.h"
#include "builder.h"
@@ -56,7 +59,6 @@
#include "dead_code_elimination.h"
#include "debug/elf_debug_writer.h"
#include "debug/method_debug_info.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "dex/verification_results.h"
#include "dex/verified_method.h"
#include "driver/compiler_driver-inl.h"
@@ -77,6 +79,7 @@
#include "jni/quick/jni_compiler.h"
#include "licm.h"
#include "load_store_elimination.h"
+#include "loop_optimization.h"
#include "nodes.h"
#include "oat_quick_method_header.h"
#include "prepare_for_register_allocation.h"
@@ -95,6 +98,8 @@ namespace art {
static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB;
+static constexpr const char* kPassNameSeparator = "$";
+
/**
* Used by the code generator, to allocate the code in a vector.
*/
@@ -133,14 +138,18 @@ class PassObserver : public ValueObject {
PassObserver(HGraph* graph,
CodeGenerator* codegen,
std::ostream* visualizer_output,
- CompilerDriver* compiler_driver)
+ CompilerDriver* compiler_driver,
+ Mutex& dump_mutex)
: graph_(graph),
cached_method_name_(),
timing_logger_enabled_(compiler_driver->GetDumpPasses()),
timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
disasm_info_(graph->GetArena()),
+ visualizer_oss_(),
+ visualizer_output_(visualizer_output),
visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()),
- visualizer_(visualizer_output, graph, *codegen),
+ visualizer_(&visualizer_oss_, graph, *codegen),
+ visualizer_dump_mutex_(dump_mutex),
graph_in_bad_state_(false) {
if (timing_logger_enabled_ || visualizer_enabled_) {
if (!IsVerboseMethod(compiler_driver, GetMethodName())) {
@@ -158,6 +167,10 @@ class PassObserver : public ValueObject {
LOG(INFO) << "TIMINGS " << GetMethodName();
LOG(INFO) << Dumpable<TimingLogger>(timing_logger_);
}
+ if (visualizer_enabled_) {
+ MutexLock mu(Thread::Current(), visualizer_dump_mutex_);
+ *visualizer_output_ << visualizer_oss_.str();
+ }
}
void DumpDisassembly() const {
@@ -171,13 +184,14 @@ class PassObserver : public ValueObject {
const char* GetMethodName() {
// PrettyMethod() is expensive, so we delay calling it until we actually have to.
if (cached_method_name_.empty()) {
- cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile());
+ cached_method_name_ = graph_->GetDexFile().PrettyMethod(graph_->GetMethodIdx());
}
return cached_method_name_.c_str();
}
private:
void StartPass(const char* pass_name) {
+ VLOG(compiler) << "Starting pass: " << pass_name;
// Dump graph first, then start timer.
if (visualizer_enabled_) {
visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_);
@@ -234,8 +248,11 @@ class PassObserver : public ValueObject {
DisassemblyInformation disasm_info_;
+ std::ostringstream visualizer_oss_;
+ std::ostream* visualizer_output_;
bool visualizer_enabled_;
HGraphVisualizer visualizer_;
+ Mutex& visualizer_dump_mutex_;
// Flag to be set by the compiler if the pass failed and the graph is not
// expected to validate.
@@ -266,7 +283,7 @@ class PassScope : public ValueObject {
class OptimizingCompiler FINAL : public Compiler {
public:
explicit OptimizingCompiler(CompilerDriver* driver);
- ~OptimizingCompiler();
+ ~OptimizingCompiler() OVERRIDE;
bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const OVERRIDE;
@@ -281,12 +298,17 @@ class OptimizingCompiler FINAL : public Compiler {
CompiledMethod* JniCompile(uint32_t access_flags,
uint32_t method_idx,
- const DexFile& dex_file) const OVERRIDE {
- return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
+ const DexFile& dex_file,
+ JniOptimizationFlags optimization_flags) const OVERRIDE {
+ return ArtQuickJniCompileMethod(GetCompilerDriver(),
+ access_flags,
+ method_idx,
+ dex_file,
+ optimization_flags);
}
uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize(
InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
}
@@ -303,19 +325,19 @@ class OptimizingCompiler FINAL : public Compiler {
bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr)
OVERRIDE
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
- protected:
- virtual void RunOptimizations(HGraph* graph,
- CodeGenerator* codegen,
- CompilerDriver* driver,
- const DexCompilationUnit& dex_compilation_unit,
- PassObserver* pass_observer,
- StackHandleScopeCollection* handles) const;
+ private:
+ void RunOptimizations(HGraph* graph,
+ CodeGenerator* codegen,
+ CompilerDriver* driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ PassObserver* pass_observer,
+ VariableSizedHandleScope* handles) const;
- virtual void RunOptimizations(HOptimization* optimizations[],
- size_t length,
- PassObserver* pass_observer) const;
+ void RunOptimizations(HOptimization* optimizations[],
+ size_t length,
+ PassObserver* pass_observer) const;
private:
// Create a 'CompiledMethod' for an optimized graph.
@@ -350,7 +372,7 @@ class OptimizingCompiler FINAL : public Compiler {
CompilerDriver* driver,
const DexCompilationUnit& dex_compilation_unit,
PassObserver* pass_observer,
- StackHandleScopeCollection* handles) const;
+ VariableSizedHandleScope* handles) const;
void RunArchOptimizations(InstructionSet instruction_set,
HGraph* graph,
@@ -361,13 +383,16 @@ class OptimizingCompiler FINAL : public Compiler {
std::unique_ptr<std::ostream> visualizer_output_;
+ mutable Mutex dump_mutex_; // To synchronize visualizer writing.
+
DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler);
};
static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
- : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {}
+ : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
+ dump_mutex_("Visualizer dump lock") {}
void OptimizingCompiler::Init() {
// Enable C1visualizer output. Must be done in Init() because the compiler
@@ -375,9 +400,6 @@ void OptimizingCompiler::Init() {
CompilerDriver* driver = GetCompilerDriver();
const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName();
if (!cfg_file_name.empty()) {
- CHECK_EQ(driver->GetThreadCount(), 1U)
- << "Graph visualizer requires the compiler to run single-threaded. "
- << "Invoke the compiler with '-j1'.";
std::ios_base::openmode cfg_file_mode =
driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode));
@@ -420,6 +442,134 @@ static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
|| instruction_set == kX86_64;
}
+// Strip pass name suffix to get optimization name.
+static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) {
+ size_t pos = pass_name.find(kPassNameSeparator);
+ return pos == std::string::npos ? pass_name : pass_name.substr(0, pos);
+}
+
+static HOptimization* BuildOptimization(
+ const std::string& pass_name,
+ ArenaAllocator* arena,
+ HGraph* graph,
+ OptimizingCompilerStats* stats,
+ CodeGenerator* codegen,
+ CompilerDriver* driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ VariableSizedHandleScope* handles,
+ SideEffectsAnalysis* most_recent_side_effects,
+ HInductionVarAnalysis* most_recent_induction) {
+ std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
+ if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
+ CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
+ return new (arena) BoundsCheckElimination(graph,
+ *most_recent_side_effects,
+ most_recent_induction);
+ } else if (opt_name == GVNOptimization::kGlobalValueNumberingPassName) {
+ CHECK(most_recent_side_effects != nullptr);
+ return new (arena) GVNOptimization(graph, *most_recent_side_effects, pass_name.c_str());
+ } else if (opt_name == HConstantFolding::kConstantFoldingPassName) {
+ return new (arena) HConstantFolding(graph, pass_name.c_str());
+ } else if (opt_name == HDeadCodeElimination::kDeadCodeEliminationPassName) {
+ return new (arena) HDeadCodeElimination(graph, stats, pass_name.c_str());
+ } else if (opt_name == HInliner::kInlinerPassName) {
+ size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
+ return new (arena) HInliner(graph, // outer_graph
+ graph, // outermost_graph
+ codegen,
+ dex_compilation_unit, // outer_compilation_unit
+ dex_compilation_unit, // outermost_compilation_unit
+ driver,
+ handles,
+ stats,
+ number_of_dex_registers,
+ /* depth */ 0);
+ } else if (opt_name == HSharpening::kSharpeningPassName) {
+ return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
+ } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
+ return new (arena) HSelectGenerator(graph, stats);
+ } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
+ return new (arena) HInductionVarAnalysis(graph);
+ } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
+ return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str());
+ } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
+ return new (arena) IntrinsicsRecognizer(graph, stats);
+ } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
+ CHECK(most_recent_side_effects != nullptr);
+ return new (arena) LICM(graph, *most_recent_side_effects, stats);
+ } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
+ CHECK(most_recent_side_effects != nullptr);
+ return new (arena) LoadStoreElimination(graph, *most_recent_side_effects);
+ } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
+ return new (arena) SideEffectsAnalysis(graph);
+ } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
+ return new (arena) HLoopOptimization(graph, most_recent_induction);
+#ifdef ART_ENABLE_CODEGEN_arm
+ } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
+ return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
+ } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) {
+ return new (arena) arm::InstructionSimplifierArm(graph, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+ } else if (opt_name == arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName) {
+ return new (arena) arm64::InstructionSimplifierArm64(graph, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+ } else if (opt_name == mips::DexCacheArrayFixups::kDexCacheArrayFixupsMipsPassName) {
+ return new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
+ } else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) {
+ return new (arena) mips::PcRelativeFixups(graph, codegen, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+ } else if (opt_name == x86::PcRelativeFixups::kPcRelativeFixupsX86PassName) {
+ return new (arena) x86::PcRelativeFixups(graph, codegen, stats);
+ } else if (opt_name == x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName) {
+ return new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
+#endif
+ }
+ return nullptr;
+}
+
+static ArenaVector<HOptimization*> BuildOptimizations(
+ const std::vector<std::string>& pass_names,
+ ArenaAllocator* arena,
+ HGraph* graph,
+ OptimizingCompilerStats* stats,
+ CodeGenerator* codegen,
+ CompilerDriver* driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ VariableSizedHandleScope* handles) {
+ // Few HOptimizations constructors require SideEffectsAnalysis or HInductionVarAnalysis
+ // instances. This method assumes that each of them expects the nearest instance preceeding it
+ // in the pass name list.
+ SideEffectsAnalysis* most_recent_side_effects = nullptr;
+ HInductionVarAnalysis* most_recent_induction = nullptr;
+ ArenaVector<HOptimization*> ret(arena->Adapter());
+ for (const std::string& pass_name : pass_names) {
+ HOptimization* opt = BuildOptimization(
+ pass_name,
+ arena,
+ graph,
+ stats,
+ codegen,
+ driver,
+ dex_compilation_unit,
+ handles,
+ most_recent_side_effects,
+ most_recent_induction);
+ CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\"";
+ ret.push_back(opt);
+
+ std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
+ if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
+ most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt);
+ } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
+ most_recent_induction = down_cast<HInductionVarAnalysis*>(opt);
+ }
+ }
+ return ret;
+}
+
void OptimizingCompiler::RunOptimizations(HOptimization* optimizations[],
size_t length,
PassObserver* pass_observer) const {
@@ -434,7 +584,7 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
CompilerDriver* driver,
const DexCompilationUnit& dex_compilation_unit,
PassObserver* pass_observer,
- StackHandleScopeCollection* handles) const {
+ VariableSizedHandleScope* handles) const {
OptimizingCompilerStats* stats = compilation_stats_.get();
const CompilerOptions& compiler_options = driver->GetCompilerOptions();
bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
@@ -444,11 +594,11 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
}
size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
HInliner* inliner = new (graph->GetArena()) HInliner(
- graph,
- graph,
+ graph, // outer_graph
+ graph, // outermost_graph
codegen,
- dex_compilation_unit,
- dex_compilation_unit,
+ dex_compilation_unit, // outer_compilation_unit
+ dex_compilation_unit, // outermost_compilation_unit
driver,
handles,
stats,
@@ -463,17 +613,24 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
HGraph* graph,
CodeGenerator* codegen,
PassObserver* pass_observer) const {
+ UNUSED(codegen); // To avoid compilation error when compiling for svelte
OptimizingCompilerStats* stats = compilation_stats_.get();
ArenaAllocator* arena = graph->GetArena();
+#ifdef ART_USE_VIXL_ARM_BACKEND
+ UNUSED(arena);
+ UNUSED(pass_observer);
+ UNUSED(stats);
+#endif
switch (instruction_set) {
-#ifdef ART_ENABLE_CODEGEN_arm
+#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND)
case kThumb2:
case kArm: {
- arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+ arm::DexCacheArrayFixups* fixups =
+ new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
arm::InstructionSimplifierArm* simplifier =
new (arena) arm::InstructionSimplifierArm(graph, stats);
SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
- GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch");
+ GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
HOptimization* arm_optimizations[] = {
simplifier,
side_effects,
@@ -489,7 +646,7 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
arm64::InstructionSimplifierArm64* simplifier =
new (arena) arm64::InstructionSimplifierArm64(graph, stats);
SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
- GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch");
+ GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
HOptimization* arm64_optimizations[] = {
simplifier,
side_effects,
@@ -518,7 +675,7 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
x86::PcRelativeFixups* pc_relative_fixups =
new (arena) x86::PcRelativeFixups(graph, codegen, stats);
x86::X86MemoryOperandGeneration* memory_gen =
- new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen);
+ new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
HOptimization* x86_optimizations[] = {
pc_relative_fixups,
memory_gen
@@ -530,7 +687,7 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
#ifdef ART_ENABLE_CODEGEN_x86_64
case kX86_64: {
x86::X86MemoryOperandGeneration* memory_gen =
- new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen);
+ new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
HOptimization* x86_64_optimizations[] = {
memory_gen
};
@@ -546,7 +703,8 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
NO_INLINE // Avoid increasing caller's frame size by large stack-allocated objects.
static void AllocateRegisters(HGraph* graph,
CodeGenerator* codegen,
- PassObserver* pass_observer) {
+ PassObserver* pass_observer,
+ RegisterAllocator::Strategy strategy) {
{
PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName,
pass_observer);
@@ -559,7 +717,7 @@ static void AllocateRegisters(HGraph* graph,
}
{
PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
- RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
+ RegisterAllocator::Create(graph->GetArena(), codegen, liveness, strategy)->AllocateRegisters();
}
}
@@ -568,30 +726,48 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
CompilerDriver* driver,
const DexCompilationUnit& dex_compilation_unit,
PassObserver* pass_observer,
- StackHandleScopeCollection* handles) const {
+ VariableSizedHandleScope* handles) const {
OptimizingCompilerStats* stats = compilation_stats_.get();
ArenaAllocator* arena = graph->GetArena();
+ if (driver->GetCompilerOptions().GetPassesToRun() != nullptr) {
+ ArenaVector<HOptimization*> optimizations = BuildOptimizations(
+ *driver->GetCompilerOptions().GetPassesToRun(),
+ arena,
+ graph,
+ stats,
+ codegen,
+ driver,
+ dex_compilation_unit,
+ handles);
+ RunOptimizations(&optimizations[0], optimizations.size(), pass_observer);
+ return;
+ }
+
HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
- graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
+ graph, stats, "dead_code_elimination$initial");
HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
- graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
+ graph, stats, "dead_code_elimination$final");
HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
- HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
- HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
+ HConstantFolding* fold2 = new (arena) HConstantFolding(
+ graph, "constant_folding$after_inlining");
+ HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
LICM* licm = new (arena) LICM(graph, *side_effects, stats);
LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
+ HLoopOptimization* loop = new (arena) HLoopOptimization(graph, induction);
HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
- graph, stats, "instruction_simplifier_after_bce");
+ graph, stats, "instruction_simplifier$after_inlining");
InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
- graph, stats, "instruction_simplifier_before_codegen");
- IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats);
+ graph, stats, "instruction_simplifier$after_bce");
+ InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
+ graph, stats, "instruction_simplifier$before_codegen");
+ IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
HOptimization* optimizations1[] = {
intrinsics,
@@ -609,24 +785,25 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
// redundant suspend checks to recognize empty blocks.
select_generator,
fold2, // TODO: if we don't inline we can also skip fold2.
+ simplify2,
side_effects,
gvn,
licm,
induction,
bce,
+ loop,
fold3, // evaluates code generated by dynamic bce
- simplify2,
+ simplify3,
lse,
dce2,
// The codegen has a few assumptions that only the instruction simplifier
// can satisfy. For example, the code generator does not expect to see a
// HTypeConversion from a type to the same type.
- simplify3,
+ simplify4,
};
RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, pass_observer);
- AllocateRegisters(graph, codegen, pass_observer);
}
static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
@@ -688,9 +865,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
// Always use the Thumb-2 assembler: some runtime functionality
// (like implicit stack overflow checks) assume Thumb-2.
- if (instruction_set == kArm) {
- instruction_set = kThumb2;
- }
+ DCHECK_NE(instruction_set, kArm);
// Do not attempt to compile on architectures we do not support.
if (!IsInstructionSetSupported(instruction_set)) {
@@ -719,9 +894,10 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
return nullptr;
}
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
DexCompilationUnit dex_compilation_unit(
class_loader,
- Runtime::Current()->GetClassLinker(),
+ class_linker,
dex_file,
code_item,
class_def_idx,
@@ -750,7 +926,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
ScopedObjectAccess soa(Thread::Current());
StackHandleScope<1> hs(soa.Self());
Handle<mirror::ClassLoader> loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader*>(class_loader)));
+ soa.Decode<mirror::ClassLoader>(class_loader)));
method = compiler_driver->ResolveMethod(
soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
}
@@ -760,7 +936,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
if (method != nullptr) {
graph->SetArtMethod(method);
ScopedObjectAccess soa(Thread::Current());
- interpreter_metadata = method->GetQuickenedInfo();
+ interpreter_metadata = method->GetQuickenedInfo(class_linker->GetImagePointerSize());
uint16_t type_index = method->GetDeclaringClass()->GetDexTypeIndex();
// Update the dex cache if the type is not in it yet. Note that under AOT,
@@ -789,13 +965,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
PassObserver pass_observer(graph,
codegen.get(),
visualizer_output_.get(),
- compiler_driver);
+ compiler_driver,
+ dump_mutex_);
VLOG(compiler) << "Building " << pass_observer.GetMethodName();
{
ScopedObjectAccess soa(Thread::Current());
- StackHandleScopeCollection handles(soa.Self());
+ VariableSizedHandleScope handles(soa.Self());
// Do not hold `mutator_lock_` between optimizations.
ScopedThreadSuspension sts(soa.Self(), kNative);
@@ -841,6 +1018,10 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
&pass_observer,
&handles);
+ RegisterAllocator::Strategy regalloc_strategy =
+ compiler_options.GetRegisterAllocationStrategy();
+ AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy);
+
codegen->Compile(code_allocator);
pass_observer.DumpDisassembly();
}
@@ -886,7 +1067,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
if (kArenaAllocatorCountAllocations) {
if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) {
MemStats mem_stats(arena.GetMemStats());
- LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
+ LOG(INFO) << dex_file.PrettyMethod(method_idx) << " " << Dumpable<MemStats>(mem_stats);
}
}
}
@@ -908,7 +1089,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
// instruction set is supported -- and has support for read
// barriers, if they are enabled). This makes sure we're not
// regressing.
- std::string method_name = PrettyMethod(method_idx, dex_file);
+ std::string method_name = dex_file.PrettyMethod(method_idx);
bool shouldCompile = method_name.find("$opt$") != std::string::npos;
DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name;
}
@@ -973,7 +1154,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
if (kArenaAllocatorCountAllocations) {
if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) {
MemStats mem_stats(arena.GetMemStats());
- LOG(INFO) << PrettyMethod(method_idx, *dex_file) << " " << Dumpable<MemStats>(mem_stats);
+ LOG(INFO) << dex_file->PrettyMethod(method_idx) << " " << Dumpable<MemStats>(mem_stats);
}
}
}
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index dd5cb1c9bb..58d90176cd 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -22,7 +22,7 @@
#include "common_compiler_test.h"
#include "dex_file.h"
#include "dex_instruction.h"
-#include "handle_scope-inl.h"
+#include "handle_scope.h"
#include "scoped_thread_state_change.h"
#include "ssa_builder.h"
#include "ssa_liveness_analysis.h"
@@ -90,7 +90,7 @@ inline HGraph* CreateCFG(ArenaAllocator* allocator,
{
ScopedObjectAccess soa(Thread::Current());
- StackHandleScopeCollection handles(soa.Self());
+ VariableSizedHandleScope handles(soa.Self());
HGraphBuilder builder(graph, *item, &handles, return_type);
bool graph_built = (builder.BuildGraph() == kAnalysisSuccess);
return graph_built ? graph : nullptr;
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index c6acc45581..82feb95a2f 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -83,6 +83,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBssEntry:
// Add a base register for PC-relative literals on R2.
InitializePCRelativeBasePointer();
load_string->AddSpecialInput(base_);
@@ -92,6 +93,25 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
}
+ void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+ if (switch_insn->GetNumEntries() <=
+ InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) {
+ return;
+ }
+ // We need to replace the HPackedSwitch with a HMipsPackedSwitch in order to
+ // address the constant area.
+ InitializePCRelativeBasePointer();
+ HGraph* graph = GetGraph();
+ HBasicBlock* block = switch_insn->GetBlock();
+ HMipsPackedSwitch* mips_switch = new (graph->GetArena()) HMipsPackedSwitch(
+ switch_insn->GetStartValue(),
+ switch_insn->GetNumEntries(),
+ switch_insn->InputAt(0),
+ base_,
+ switch_insn->GetDexPc());
+ block->ReplaceAndRemoveInstructionWith(switch_insn, mips_switch);
+ }
+
void HandleInvoke(HInvoke* invoke) {
// If this is an invoke-static/-direct with PC-relative dex cache array
// addressing, we need the PC-relative address base.
@@ -115,7 +135,8 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
return;
}
- if (has_extra_input && !WillHaveCallFreeIntrinsicsCodeGen(invoke)) {
+ if (has_extra_input &&
+ !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) {
InitializePCRelativeBasePointer();
// Add the extra parameter base_.
invoke_static_or_direct->AddSpecialInput(base_);
@@ -123,22 +144,6 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
}
- bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) {
- if (invoke->GetIntrinsic() != Intrinsics::kNone) {
- // This invoke may have intrinsic code generation defined. However, we must
- // now also determine if this code generation is truly there and call-free
- // (not unimplemented, no bail on instruction features, or call on slow path).
- // This is done by actually calling the locations builder on the instruction
- // and clearing out the locations once result is known. We assume this
- // call only has creating locations as side effects!
- IntrinsicLocationsBuilderMIPS builder(codegen_);
- bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall();
- invoke->SetLocations(nullptr);
- return success;
- }
- return false;
- }
-
CodeGeneratorMIPS* codegen_;
// The generated HMipsComputeBaseMethodAddress in the entry block needed as an
diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h
index 1e8b071bb3..5a7397bf9d 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.h
+++ b/compiler/optimizing/pc_relative_fixups_mips.h
@@ -32,6 +32,8 @@ class PcRelativeFixups : public HOptimization {
: HOptimization(graph, "pc_relative_fixups_mips", stats),
codegen_(codegen) {}
+ static constexpr const char* kPcRelativeFixupsMipsPassName = "pc_relative_fixups_mips";
+
void Run() OVERRIDE;
private:
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 921f3dfff6..b1fdb1792d 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -92,7 +92,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
void VisitLoadString(HLoadString* load_string) OVERRIDE {
HLoadString::LoadKind load_kind = load_string->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
+ load_kind == HLoadString::LoadKind::kBssEntry) {
InitializePCRelativeBasePointer();
load_string->AddSpecialInput(base_);
}
@@ -203,7 +203,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
bool base_added = false;
if (invoke_static_or_direct != nullptr &&
invoke_static_or_direct->HasPcRelativeDexCache() &&
- !WillHaveCallFreeIntrinsicsCodeGen(invoke)) {
+ !IsCallFreeIntrinsic<IntrinsicLocationsBuilderX86>(invoke, codegen_)) {
InitializePCRelativeBasePointer();
// Add the extra parameter base_.
invoke_static_or_direct->AddSpecialInput(base_);
@@ -227,6 +227,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
case Intrinsics::kMathMaxFloatFloat:
case Intrinsics::kMathMinDoubleDouble:
case Intrinsics::kMathMinFloatFloat:
+ case Intrinsics::kMathRoundFloat:
if (!base_added) {
DCHECK(invoke_static_or_direct != nullptr);
DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
@@ -239,22 +240,6 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
}
- bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) {
- if (invoke->GetIntrinsic() != Intrinsics::kNone) {
- // This invoke may have intrinsic code generation defined. However, we must
- // now also determine if this code generation is truly there and call-free
- // (not unimplemented, no bail on instruction features, or call on slow path).
- // This is done by actually calling the locations builder on the instruction
- // and clearing out the locations once result is known. We assume this
- // call only has creating locations as side effects!
- IntrinsicLocationsBuilderX86 builder(codegen_);
- bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall();
- invoke->SetLocations(nullptr);
- return success;
- }
- return false;
- }
-
CodeGeneratorX86* codegen_;
// The generated HX86ComputeBaseMethodAddress in the entry block needed as an
diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h
index 03de2fcece..72fa71ea94 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.h
+++ b/compiler/optimizing/pc_relative_fixups_x86.h
@@ -29,9 +29,11 @@ namespace x86 {
class PcRelativeFixups : public HOptimization {
public:
PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
- : HOptimization(graph, "pc_relative_fixups_x86", stats),
+ : HOptimization(graph, kPcRelativeFixupsX86PassName, stats),
codegen_(codegen) {}
+ static constexpr const char* kPcRelativeFixupsX86PassName = "pc_relative_fixups_x86";
+
void Run() OVERRIDE;
private:
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 8fb539661f..0db60882db 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -20,8 +20,7 @@ namespace art {
void PrepareForRegisterAllocation::Run() {
// Order does not matter.
- for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
// No need to visit the phis.
for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
inst_it.Advance()) {
@@ -44,7 +43,7 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
// Add a fake environment for String.charAt() inline info as we want
// the exception to appear as being thrown from there.
const DexFile& dex_file = check->GetEnvironment()->GetDexFile();
- DCHECK_STREQ(PrettyMethod(check->GetStringCharAtMethodIndex(), dex_file).c_str(),
+ DCHECK_STREQ(dex_file.PrettyMethod(check->GetStringCharAtMethodIndex()).c_str(),
"char java.lang.String.charAt(int)");
ArenaAllocator* arena = GetGraph()->GetArena();
HEnvironment* environment = new (arena) HEnvironment(arena,
@@ -129,6 +128,7 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
} else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) {
// Pass the initialization duty to the `HLoadClass` instruction,
// and remove the instruction from the graph.
+ DCHECK(load_class->HasEnvironment());
load_class->SetMustGenerateClinitCheck(true);
check->GetBlock()->RemoveInstruction(check);
}
@@ -136,7 +136,7 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) {
HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass();
- bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse();
+ const bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse();
// Change the entrypoint to kQuickAllocObject if either:
// - the class is finalizable (only kQuickAllocObject handles finalizable classes),
// - the class needs access checks (we do not know if it's finalizable),
@@ -144,19 +144,25 @@ void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) {
if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) {
instruction->SetEntrypoint(kQuickAllocObject);
instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0);
- // The allocation entry point that deals with access checks does not work with inlined
- // methods, so we need to check whether this allocation comes from an inlined method.
- // We also need to make the same check as for moving clinit check, whether the HLoadClass
- // has the clinit check responsibility or not (HLoadClass can throw anyway).
- if (has_only_one_use &&
- !instruction->GetEnvironment()->IsFromInlinedInvoke() &&
- CanMoveClinitCheck(load_class, instruction)) {
- // We can remove the load class from the graph. If it needed access checks, we delegate
- // the access check to the allocation.
- if (load_class->NeedsAccessCheck()) {
- instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck);
+ if (has_only_one_use) {
+ // We've just removed the only use of the HLoadClass. Since we don't run DCE after this pass,
+ // do it manually if possible.
+ if (!load_class->CanThrow()) {
+ // If the load class can not throw, it has no side effects and can be removed if there is
+ // only one use.
+ load_class->GetBlock()->RemoveInstruction(load_class);
+ } else if (!instruction->GetEnvironment()->IsFromInlinedInvoke() &&
+ CanMoveClinitCheck(load_class, instruction)) {
+ // The allocation entry point that deals with access checks does not work with inlined
+ // methods, so we need to check whether this allocation comes from an inlined method.
+ // We also need to make the same check as for moving clinit check, whether the HLoadClass
+ // has the clinit check responsibility or not (HLoadClass can throw anyway).
+ // If it needed access checks, we delegate the access check to the allocation.
+ if (load_class->NeedsAccessCheck()) {
+ instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck);
+ }
+ load_class->GetBlock()->RemoveInstruction(load_class);
}
- load_class->GetBlock()->RemoveInstruction(load_class);
}
}
}
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index e96ab1918c..d588deaace 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -20,14 +20,14 @@
#include "class_linker-inl.h"
#include "mirror/class-inl.h"
#include "mirror/dex_cache.h"
-#include "scoped_thread_state_change.h"
+#include "scoped_thread_state_change-inl.h"
namespace art {
static inline mirror::DexCache* FindDexCacheWithHint(Thread* self,
const DexFile& dex_file,
Handle<mirror::DexCache> hint_dex_cache)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
if (LIKELY(hint_dex_cache->GetDexFile() == &dex_file)) {
return hint_dex_cache.Get();
} else {
@@ -35,7 +35,7 @@ static inline mirror::DexCache* FindDexCacheWithHint(Thread* self,
}
}
-static inline ReferenceTypeInfo::TypeHandle GetRootHandle(StackHandleScopeCollection* handles,
+static inline ReferenceTypeInfo::TypeHandle GetRootHandle(VariableSizedHandleScope* handles,
ClassLinker::ClassRoot class_root,
ReferenceTypeInfo::TypeHandle* cache) {
if (!ReferenceTypeInfo::IsValidHandle(*cache)) {
@@ -84,8 +84,8 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
void VisitNewArray(HNewArray* instr) OVERRIDE;
void VisitParameterValue(HParameterValue* instr) OVERRIDE;
void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info);
- void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ void SetClassAsTypeInfo(HInstruction* instr, ObjPtr<mirror::Class> klass, bool is_exact)
+ REQUIRES_SHARED(Locks::mutator_lock_);
void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE;
void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE;
void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) OVERRIDE;
@@ -109,7 +109,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
Handle<mirror::DexCache> hint_dex_cache,
- StackHandleScopeCollection* handles,
+ VariableSizedHandleScope* handles,
bool is_first_run,
const char* name)
: HOptimization(graph, name),
@@ -123,8 +123,7 @@ void ReferenceTypePropagation::ValidateTypes() {
// TODO: move this to the graph checker.
if (kIsDebugBuild) {
ScopedObjectAccess soa(Thread::Current());
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) {
HInstruction* instr = iti.Current();
if (instr->GetType() == Primitive::kPrimNot) {
@@ -158,8 +157,8 @@ void ReferenceTypePropagation::Run() {
// To properly propagate type info we need to visit in the dominator-based order.
// Reverse post order guarantees a node's dominators are visited first.
// We take advantage of this order in `VisitBasicBlock`.
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- VisitBasicBlock(it.Current());
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ VisitBasicBlock(block);
}
ProcessWorklist();
@@ -194,7 +193,7 @@ static bool ShouldCreateBoundType(HInstruction* position,
ReferenceTypeInfo upper_bound,
HInstruction* dominator_instr,
HBasicBlock* dominator_block)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
// If the position where we should insert the bound type is not already a
// a bound type then we need to create one.
if (position == nullptr || !position->IsBoundType()) {
@@ -427,7 +426,7 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) {
}
void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* instr,
- mirror::Class* klass,
+ ObjPtr<mirror::Class> klass,
bool is_exact) {
if (instr->IsInvokeStaticOrDirect() && instr->AsInvokeStaticOrDirect()->IsStringInit()) {
// Calls to String.<init> are replaced with a StringFactory.
@@ -448,13 +447,13 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst
mirror::Class* declaring_class = method->GetDeclaringClass();
DCHECK(declaring_class != nullptr);
DCHECK(declaring_class->IsStringClass())
- << "Expected String class: " << PrettyDescriptor(declaring_class);
+ << "Expected String class: " << declaring_class->PrettyDescriptor();
DCHECK(method->IsConstructor())
- << "Expected String.<init>: " << PrettyMethod(method);
+ << "Expected String.<init>: " << method->PrettyMethod();
}
instr->SetReferenceTypeInfo(
ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true));
- } else if (IsAdmissible(klass)) {
+ } else if (IsAdmissible(klass.Ptr())) {
ReferenceTypeInfo::TypeHandle handle = handle_cache_->NewHandle(klass);
is_exact = is_exact || handle->CannotBeAssignedFromOtherTypes();
instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact));
@@ -487,7 +486,7 @@ static mirror::Class* GetClassFromDexCache(Thread* self,
const DexFile& dex_file,
uint16_t type_idx,
Handle<mirror::DexCache> hint_dex_cache)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
mirror::DexCache* dex_cache = FindDexCacheWithHint(self, dex_file, hint_dex_cache);
// Get type from dex cache assuming it was populated by the verifier.
return dex_cache->GetResolvedType(type_idx);
@@ -512,12 +511,13 @@ void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstructio
}
ScopedObjectAccess soa(Thread::Current());
- mirror::Class* klass = nullptr;
+ ObjPtr<mirror::Class> klass;
// The field index is unknown only during tests.
if (info.GetFieldIndex() != kUnknownFieldIndex) {
ClassLinker* cl = Runtime::Current()->GetClassLinker();
- ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get());
+ ArtField* field = cl->GetResolvedField(info.GetFieldIndex(),
+ MakeObjPtr(info.GetDexCache().Get()));
// TODO: There are certain cases where we can't resolve the field.
// b/21914925 is open to keep track of a repro case for this issue.
if (field != nullptr) {
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index edd83bf5de..4663471729 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -21,6 +21,7 @@
#include "driver/dex_compilation_unit.h"
#include "handle_scope-inl.h"
#include "nodes.h"
+#include "obj_ptr.h"
#include "optimization.h"
#include "optimizing_compiler_stats.h"
@@ -33,7 +34,7 @@ class ReferenceTypePropagation : public HOptimization {
public:
ReferenceTypePropagation(HGraph* graph,
Handle<mirror::DexCache> hint_dex_cache,
- StackHandleScopeCollection* handles,
+ VariableSizedHandleScope* handles,
bool is_first_run,
const char* name = kReferenceTypePropagationPassName);
@@ -44,7 +45,7 @@ class ReferenceTypePropagation : public HOptimization {
// Returns true if klass is admissible to the propagation: non-null and resolved.
// For an array type, we also check if the component type is admissible.
- static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+ static bool IsAdmissible(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) {
return klass != nullptr &&
klass->IsResolved() &&
(!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType()));
@@ -55,10 +56,15 @@ class ReferenceTypePropagation : public HOptimization {
private:
class HandleCache {
public:
- explicit HandleCache(StackHandleScopeCollection* handles) : handles_(handles) { }
+ explicit HandleCache(VariableSizedHandleScope* handles) : handles_(handles) { }
template <typename T>
- MutableHandle<T> NewHandle(T* object) SHARED_REQUIRES(Locks::mutator_lock_) {
+ MutableHandle<T> NewHandle(T* object) REQUIRES_SHARED(Locks::mutator_lock_) {
+ return handles_->NewHandle(object);
+ }
+
+ template <typename T>
+ MutableHandle<T> NewHandle(ObjPtr<T> object) REQUIRES_SHARED(Locks::mutator_lock_) {
return handles_->NewHandle(object);
}
@@ -68,7 +74,7 @@ class ReferenceTypePropagation : public HOptimization {
ReferenceTypeInfo::TypeHandle GetThrowableClassHandle();
private:
- StackHandleScopeCollection* handles_;
+ VariableSizedHandleScope* handles_;
ReferenceTypeInfo::TypeHandle object_class_handle_;
ReferenceTypeInfo::TypeHandle class_class_handle_;
@@ -80,8 +86,8 @@ class ReferenceTypePropagation : public HOptimization {
void VisitPhi(HPhi* phi);
void VisitBasicBlock(HBasicBlock* block);
- void UpdateBoundType(HBoundType* bound_type) SHARED_REQUIRES(Locks::mutator_lock_);
- void UpdatePhi(HPhi* phi) SHARED_REQUIRES(Locks::mutator_lock_);
+ void UpdateBoundType(HBoundType* bound_type) REQUIRES_SHARED(Locks::mutator_lock_);
+ void UpdatePhi(HPhi* phi) REQUIRES_SHARED(Locks::mutator_lock_);
void BoundTypeForIfNotNull(HBasicBlock* block);
void BoundTypeForIfInstanceOf(HBasicBlock* block);
void ProcessWorklist();
@@ -92,10 +98,10 @@ class ReferenceTypePropagation : public HOptimization {
bool UpdateReferenceTypeInfo(HInstruction* instr);
static void UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
- SHARED_REQUIRES(Locks::mutator_lock_);
+ REQUIRES_SHARED(Locks::mutator_lock_);
void ValidateTypes();
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index 7649b5093c..b061c871b0 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -35,7 +35,7 @@ class ReferenceTypePropagationTest : public CommonCompilerTest {
~ReferenceTypePropagationTest() { }
- void SetupPropagation(StackHandleScopeCollection* handles) {
+ void SetupPropagation(VariableSizedHandleScope* handles) {
graph_->InitializeInexactObjectRTI(handles);
propagation_ = new (&allocator_) ReferenceTypePropagation(graph_,
Handle<mirror::DexCache>(),
@@ -46,7 +46,7 @@ class ReferenceTypePropagationTest : public CommonCompilerTest {
// Relay method to merge type in reference type propagation.
ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a,
- const ReferenceTypeInfo& b) SHARED_REQUIRES(Locks::mutator_lock_) {
+ const ReferenceTypeInfo& b) REQUIRES_SHARED(Locks::mutator_lock_) {
return propagation_->MergeTypes(a, b);
}
@@ -56,12 +56,12 @@ class ReferenceTypePropagationTest : public CommonCompilerTest {
}
// Helper method to construct the Object type.
- ReferenceTypeInfo ObjectType(bool is_exact = true) SHARED_REQUIRES(Locks::mutator_lock_) {
+ ReferenceTypeInfo ObjectType(bool is_exact = true) REQUIRES_SHARED(Locks::mutator_lock_) {
return ReferenceTypeInfo::Create(propagation_->handle_cache_.GetObjectClassHandle(), is_exact);
}
// Helper method to construct the String type.
- ReferenceTypeInfo StringType(bool is_exact = true) SHARED_REQUIRES(Locks::mutator_lock_) {
+ ReferenceTypeInfo StringType(bool is_exact = true) REQUIRES_SHARED(Locks::mutator_lock_) {
return ReferenceTypeInfo::Create(propagation_->handle_cache_.GetStringClassHandle(), is_exact);
}
@@ -79,7 +79,7 @@ class ReferenceTypePropagationTest : public CommonCompilerTest {
TEST_F(ReferenceTypePropagationTest, ProperSetup) {
ScopedObjectAccess soa(Thread::Current());
- StackHandleScopeCollection handles(soa.Self());
+ VariableSizedHandleScope handles(soa.Self());
SetupPropagation(&handles);
EXPECT_TRUE(propagation_ != nullptr);
@@ -88,7 +88,7 @@ TEST_F(ReferenceTypePropagationTest, ProperSetup) {
TEST_F(ReferenceTypePropagationTest, MergeInvalidTypes) {
ScopedObjectAccess soa(Thread::Current());
- StackHandleScopeCollection handles(soa.Self());
+ VariableSizedHandleScope handles(soa.Self());
SetupPropagation(&handles);
// Two invalid types.
@@ -120,7 +120,7 @@ TEST_F(ReferenceTypePropagationTest, MergeInvalidTypes) {
TEST_F(ReferenceTypePropagationTest, MergeValidTypes) {
ScopedObjectAccess soa(Thread::Current());
- StackHandleScopeCollection handles(soa.Self());
+ VariableSizedHandleScope handles(soa.Self());
SetupPropagation(&handles);
// Same types.
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 34502869e4..caf66474eb 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -17,6 +17,7 @@
#include "register_allocation_resolver.h"
#include "code_generator.h"
+#include "linear_order.h"
#include "ssa_liveness_analysis.h"
namespace art {
@@ -28,8 +29,7 @@ RegisterAllocationResolver::RegisterAllocationResolver(ArenaAllocator* allocator
codegen_(codegen),
liveness_(liveness) {}
-void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs,
- size_t max_safepoint_live_fp_regs,
+void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoints,
size_t reserved_out_slots,
size_t int_spill_slots,
size_t long_spill_slots,
@@ -43,10 +43,13 @@ void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs,
+ double_spill_slots
+ catch_phi_spill_slots;
+ // Update safepoints and calculate the size of the spills.
+ UpdateSafepointLiveRegisters();
+ size_t maximum_safepoint_spill_size = CalculateMaximumSafepointSpillSize(safepoints);
+
// Computes frame size and spill mask.
codegen_->InitializeCodeGeneration(spill_slots,
- max_safepoint_live_core_regs,
- max_safepoint_live_fp_regs,
+ maximum_safepoint_spill_size,
reserved_out_slots, // Includes slot(s) for the art method.
codegen_->GetGraph()->GetLinearOrder());
@@ -135,13 +138,11 @@ void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs,
// Connect siblings and resolve inputs.
for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
- ConnectSiblings(instruction->GetLiveInterval(),
- max_safepoint_live_core_regs + max_safepoint_live_fp_regs);
+ ConnectSiblings(instruction->GetLiveInterval());
}
// Resolve non-linear control flow across branches. Order does not matter.
- for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : codegen_->GetGraph()->GetLinearOrder()) {
if (block->IsCatchBlock() ||
(block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
// Instructions live at the top of catch blocks or irreducible loop header
@@ -171,15 +172,14 @@ void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs,
}
// Resolve phi inputs. Order does not matter.
- for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
- if (current->IsCatchBlock()) {
+ for (HBasicBlock* block : codegen_->GetGraph()->GetLinearOrder()) {
+ if (block->IsCatchBlock()) {
// Catch phi values are set at runtime by the exception delivery mechanism.
} else {
- for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+ for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
HInstruction* phi = inst_it.Current();
- for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
- HBasicBlock* predecessor = current->GetPredecessors()[i];
+ for (size_t i = 0, e = block->GetPredecessors().size(); i < e; ++i) {
+ HBasicBlock* predecessor = block->GetPredecessors()[i];
DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
HInstruction* input = phi->InputAt(i);
Location source = input->GetLiveInterval()->GetLocationAt(
@@ -222,8 +222,73 @@ void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs,
}
}
-void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval,
- size_t max_safepoint_live_regs) {
+void RegisterAllocationResolver::UpdateSafepointLiveRegisters() {
+ for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+ HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+ for (LiveInterval* current = instruction->GetLiveInterval();
+ current != nullptr;
+ current = current->GetNextSibling()) {
+ if (!current->HasRegister()) {
+ continue;
+ }
+ Location source = current->ToLocation();
+ for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
+ safepoint_position != nullptr;
+ safepoint_position = safepoint_position->GetNext()) {
+ DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
+ LocationSummary* locations = safepoint_position->GetLocations();
+ switch (source.GetKind()) {
+ case Location::kRegister:
+ case Location::kFpuRegister: {
+ locations->AddLiveRegister(source);
+ break;
+ }
+ case Location::kRegisterPair:
+ case Location::kFpuRegisterPair: {
+ locations->AddLiveRegister(source.ToLow());
+ locations->AddLiveRegister(source.ToHigh());
+ break;
+ }
+ case Location::kStackSlot: // Fall-through
+ case Location::kDoubleStackSlot: // Fall-through
+ case Location::kConstant: {
+ // Nothing to do.
+ break;
+ }
+ default: {
+ LOG(FATAL) << "Unexpected location for object";
+ }
+ }
+ }
+ }
+ }
+}
+
+size_t RegisterAllocationResolver::CalculateMaximumSafepointSpillSize(
+ ArrayRef<HInstruction* const> safepoints) {
+ size_t core_register_spill_size = codegen_->GetWordSize();
+ size_t fp_register_spill_size = codegen_->GetFloatingPointSpillSlotSize();
+ size_t maximum_safepoint_spill_size = 0u;
+ for (HInstruction* instruction : safepoints) {
+ LocationSummary* locations = instruction->GetLocations();
+ if (locations->OnlyCallsOnSlowPath()) {
+ size_t core_spills =
+ codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true);
+ size_t fp_spills =
+ codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false);
+ size_t spill_size =
+ core_register_spill_size * core_spills + fp_register_spill_size * fp_spills;
+ maximum_safepoint_spill_size = std::max(maximum_safepoint_spill_size, spill_size);
+ } else if (locations->CallsOnMainAndSlowPath()) {
+ // Nothing to spill on the slow path if the main path already clobbers caller-saves.
+ DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true));
+ DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false));
+ }
+ }
+ return maximum_safepoint_spill_size;
+}
+
+void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) {
LiveInterval* current = interval;
if (current->HasSpillSlot()
&& current->HasRegister()
@@ -306,48 +371,16 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval,
safepoint_position = safepoint_position->GetNext()) {
DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
- LocationSummary* locations = safepoint_position->GetLocations();
- if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
+ if (current->GetType() == Primitive::kPrimNot) {
DCHECK(interval->GetDefinedBy()->IsActualObject())
<< interval->GetDefinedBy()->DebugName()
<< "@" << safepoint_position->GetInstruction()->DebugName();
- locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
- }
-
- switch (source.GetKind()) {
- case Location::kRegister: {
- locations->AddLiveRegister(source);
- if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
- DCHECK_LE(locations->GetNumberOfLiveRegisters(),
- max_safepoint_live_regs);
- }
- if (current->GetType() == Primitive::kPrimNot) {
- DCHECK(interval->GetDefinedBy()->IsActualObject())
- << interval->GetDefinedBy()->DebugName()
- << "@" << safepoint_position->GetInstruction()->DebugName();
- locations->SetRegisterBit(source.reg());
- }
- break;
- }
- case Location::kFpuRegister: {
- locations->AddLiveRegister(source);
- break;
- }
-
- case Location::kRegisterPair:
- case Location::kFpuRegisterPair: {
- locations->AddLiveRegister(source.ToLow());
- locations->AddLiveRegister(source.ToHigh());
- break;
- }
- case Location::kStackSlot: // Fall-through
- case Location::kDoubleStackSlot: // Fall-through
- case Location::kConstant: {
- // Nothing to do.
- break;
+ LocationSummary* locations = safepoint_position->GetLocations();
+ if (current->GetParent()->HasSpillSlot()) {
+ locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
}
- default: {
- LOG(FATAL) << "Unexpected location for object";
+ if (source.GetKind() == Location::kRegister) {
+ locations->SetRegisterBit(source.reg());
}
}
}
diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h
index 6ceb9bc955..d48b1a0bb9 100644
--- a/compiler/optimizing/register_allocation_resolver.h
+++ b/compiler/optimizing/register_allocation_resolver.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
#include "base/arena_containers.h"
+#include "base/array_ref.h"
#include "base/value_object.h"
#include "primitive.h"
@@ -43,8 +44,7 @@ class RegisterAllocationResolver : ValueObject {
CodeGenerator* codegen,
const SsaLivenessAnalysis& liveness);
- void Resolve(size_t max_safepoint_live_core_regs,
- size_t max_safepoint_live_fp_regs,
+ void Resolve(ArrayRef<HInstruction* const> safepoints,
size_t reserved_out_slots, // Includes slot(s) for the art method.
size_t int_spill_slots,
size_t long_spill_slots,
@@ -54,10 +54,14 @@ class RegisterAllocationResolver : ValueObject {
const ArenaVector<LiveInterval*>& temp_intervals);
private:
+ // Update live registers of safepoint location summary.
+ void UpdateSafepointLiveRegisters();
+
+ // Calculate the maximum size of the spill area for safepoints.
+ size_t CalculateMaximumSafepointSpillSize(ArrayRef<HInstruction* const> safepoints);
+
// Connect adjacent siblings within blocks, and resolve inputs along the way.
- // Uses max_safepoint_live_regs to check that we did not underestimate the
- // number of live registers at safepoints.
- void ConnectSiblings(LiveInterval* interval, size_t max_safepoint_live_regs);
+ void ConnectSiblings(LiveInterval* interval);
// Connect siblings between block entries and exits.
void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 2367ce1aeb..5b768d5d67 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -21,6 +21,7 @@
#include "base/bit_vector-inl.h"
#include "code_generator.h"
+#include "register_allocator_graph_color.h"
#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
@@ -41,6 +42,8 @@ RegisterAllocator* RegisterAllocator::Create(ArenaAllocator* allocator,
switch (strategy) {
case kRegisterAllocatorLinearScan:
return new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis);
+ case kRegisterAllocatorGraphColor:
+ return new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis);
default:
LOG(FATAL) << "Invalid register allocation strategy: " << strategy;
UNREACHABLE();
@@ -163,6 +166,19 @@ bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& inte
} else {
codegen.DumpFloatingPointRegister(message, current->GetRegister());
}
+ for (LiveInterval* interval : intervals) {
+ if (interval->HasRegister()
+ && interval->GetRegister() == current->GetRegister()
+ && interval->CoversSlow(j)) {
+ message << std::endl;
+ if (interval->GetDefinedBy() != nullptr) {
+ message << interval->GetDefinedBy()->GetKind() << " ";
+ } else {
+ message << "physical ";
+ }
+ interval->Dump(message);
+ }
+ }
LOG(FATAL) << message.str();
} else {
return false;
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 729eede66e..7e1fff8e2b 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -40,7 +40,8 @@ class SsaLivenessAnalysis;
class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> {
public:
enum Strategy {
- kRegisterAllocatorLinearScan
+ kRegisterAllocatorLinearScan,
+ kRegisterAllocatorGraphColor
};
static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan;
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
new file mode 100644
index 0000000000..aa0d3710fa
--- /dev/null
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -0,0 +1,2042 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocator_graph_color.h"
+
+#include "code_generator.h"
+#include "linear_order.h"
+#include "register_allocation_resolver.h"
+#include "ssa_liveness_analysis.h"
+#include "thread-inl.h"
+
+namespace art {
+
+// Highest number of registers that we support for any platform. This can be used for std::bitset,
+// for example, which needs to know its size at compile time.
+static constexpr size_t kMaxNumRegs = 32;
+
+// The maximum number of graph coloring attempts before triggering a DCHECK.
+// This is meant to catch changes to the graph coloring algorithm that undermine its forward
+// progress guarantees. Forward progress for the algorithm means splitting live intervals on
+// every graph coloring attempt so that eventually the interference graph will be sparse enough
+// to color. The main threat to forward progress is trying to split short intervals which cannot be
+// split further; this could cause infinite looping because the interference graph would never
+// change. This is avoided by prioritizing short intervals before long ones, so that long
+// intervals are split when coloring fails.
+static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
+
+// We always want to avoid spilling inside loops.
+static constexpr size_t kLoopSpillWeightMultiplier = 10;
+
+// If we avoid moves in single jump blocks, we can avoid jumps to jumps.
+static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
+
+// We avoid moves in blocks that dominate the exit block, since these blocks will
+// be executed on every path through the method.
+static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
+
+enum class CoalesceKind {
+ kAdjacentSibling, // Prevents moves at interval split points.
+ kFixedOutputSibling, // Prevents moves from a fixed output location.
+ kFixedInput, // Prevents moves into a fixed input location.
+ kNonlinearControlFlow, // Prevents moves between blocks.
+ kPhi, // Prevents phi resolution moves.
+ kFirstInput, // Prevents a single input move.
+ kAnyInput, // May lead to better instruction selection / smaller encodings.
+};
+
+std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
+ return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
+}
+
+static size_t LoopDepthAt(HBasicBlock* block) {
+ HLoopInformation* loop_info = block->GetLoopInformation();
+ size_t depth = 0;
+ while (loop_info != nullptr) {
+ ++depth;
+ loop_info = loop_info->GetPreHeader()->GetLoopInformation();
+ }
+ return depth;
+}
+
+// Return the runtime cost of inserting a move instruction at the specified location.
+static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
+ HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
+ DCHECK(block != nullptr);
+ size_t cost = 1;
+ if (block->IsSingleJump()) {
+ cost *= kSingleJumpBlockWeightMultiplier;
+ }
+ if (block->Dominates(block->GetGraph()->GetExitBlock())) {
+ cost *= kDominatesExitBlockWeightMultiplier;
+ }
+ for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
+ cost *= kLoopSpillWeightMultiplier;
+ }
+ return cost;
+}
+
+// In general, we estimate coalesce priority by whether it will definitely avoid a move,
+// and by how likely it is to create an interference graph that's harder to color.
+static size_t ComputeCoalescePriority(CoalesceKind kind,
+ size_t position,
+ const SsaLivenessAnalysis& liveness) {
+ if (kind == CoalesceKind::kAnyInput) {
+ // This type of coalescing can affect instruction selection, but not moves, so we
+ // give it the lowest priority.
+ return 0;
+ } else {
+ return CostForMoveAt(position, liveness);
+ }
+}
+
+enum class CoalesceStage {
+ kWorklist, // Currently in the iterative coalescing worklist.
+ kActive, // Not in a worklist, but could be considered again during iterative coalescing.
+ kInactive, // No longer considered until last-chance coalescing.
+ kDefunct, // Either the two nodes interfere, or have already been coalesced.
+};
+
+std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
+ return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
+}
+
+// Represents a coalesce opportunity between two nodes.
+struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
+ CoalesceOpportunity(InterferenceNode* a,
+ InterferenceNode* b,
+ CoalesceKind kind,
+ size_t position,
+ const SsaLivenessAnalysis& liveness)
+ : node_a(a),
+ node_b(b),
+ stage(CoalesceStage::kWorklist),
+ priority(ComputeCoalescePriority(kind, position, liveness)) {}
+
+ // Compare two coalesce opportunities based on their priority.
+ // Return true if lhs has a lower priority than that of rhs.
+ static bool CmpPriority(const CoalesceOpportunity* lhs,
+ const CoalesceOpportunity* rhs) {
+ return lhs->priority < rhs->priority;
+ }
+
+ InterferenceNode* const node_a;
+ InterferenceNode* const node_b;
+
+ // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
+ // and whether it should still be considered.
+ CoalesceStage stage;
+
+ // The priority of this coalesce opportunity, based on heuristics.
+ const size_t priority;
+};
+
+enum class NodeStage {
+ kInitial, // Uninitialized.
+ kPrecolored, // Marks fixed nodes.
+ kSafepoint, // Marks safepoint nodes.
+ kPrunable, // Marks uncolored nodes in the interference graph.
+ kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers.
+ kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers.
+ kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers.
+ kPruned // Marks nodes already pruned from the interference graph.
+};
+
+std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
+ return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
+}
+
+// Returns the estimated cost of spilling a particular live interval.
+static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
+ if (interval->HasRegister()) {
+ // Intervals with a fixed register cannot be spilled.
+ return std::numeric_limits<float>::min();
+ }
+
+ size_t length = interval->GetLength();
+ if (length == 1) {
+ // Tiny intervals should have maximum priority, since they cannot be split any further.
+ return std::numeric_limits<float>::max();
+ }
+
+ size_t use_weight = 0;
+ if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
+ // Cost for spilling at a register definition point.
+ use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
+ }
+
+ UsePosition* use = interval->GetFirstUse();
+ while (use != nullptr && use->GetPosition() <= interval->GetStart()) {
+ // Skip uses before the start of this live interval.
+ use = use->GetNext();
+ }
+
+ while (use != nullptr && use->GetPosition() <= interval->GetEnd()) {
+ if (use->GetUser() != nullptr && use->RequiresRegister()) {
+ // Cost for spilling at a register use point.
+ use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness);
+ }
+ use = use->GetNext();
+ }
+
+ // We divide by the length of the interval because we want to prioritize
+ // short intervals; we do not benefit much if we split them further.
+ return static_cast<float>(use_weight) / static_cast<float>(length);
+}
+
+// Interference nodes make up the interference graph, which is the primary data structure in
+// graph coloring register allocation. Each node represents a single live interval, and contains
+// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
+// pre-colored nodes never contain outgoing edges (only incoming ones).
+//
+// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
+// but outgoing edges remain in order to later color the node based on the colors of its neighbors.
+//
+// Note that a pair interval is represented by a single node in the interference graph, which
+// essentially requires two colors. One consequence of this is that the degree of a node is not
+// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
+// number of colors with which a node could interfere. We model this by giving edges different
+// weights (1 or 2) to control how much it increases the degree of adjacent nodes.
+// For example, the edge between two single nodes will have weight 1. On the other hand,
+// the edge between a single node and a pair node will have weight 2. This is because the pair
+// node could block up to two colors for the single node, and because the single node could
+// block an entire two-register aligned slot for the pair node.
+// The degree is defined this way because we use it to decide whether a node is guaranteed a color,
+// and thus whether it is safe to prune it from the interference graph early on.
+class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
+ public:
+ InterferenceNode(ArenaAllocator* allocator,
+ LiveInterval* interval,
+ const SsaLivenessAnalysis& liveness)
+ : stage(NodeStage::kInitial),
+ interval_(interval),
+ adjacent_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ coalesce_opportunities_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
+ alias_(this),
+ spill_weight_(ComputeSpillWeight(interval, liveness)),
+ requires_color_(interval->RequiresRegister()),
+ needs_spill_slot_(false) {
+ DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
+ }
+
+ void AddInterference(InterferenceNode* other, bool guaranteed_not_interfering_yet) {
+ DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
+ DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
+ DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
+ DCHECK_NE(stage, NodeStage::kPruned);
+ DCHECK_NE(other->stage, NodeStage::kPruned);
+ if (guaranteed_not_interfering_yet) {
+ DCHECK(std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other)
+ == adjacent_nodes_.end());
+ adjacent_nodes_.push_back(other);
+ out_degree_ += EdgeWeightWith(other);
+ } else {
+ auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+ if (it == adjacent_nodes_.end()) {
+ adjacent_nodes_.push_back(other);
+ out_degree_ += EdgeWeightWith(other);
+ }
+ }
+ }
+
+ void RemoveInterference(InterferenceNode* other) {
+ DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
+ DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
+ auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+ if (it != adjacent_nodes_.end()) {
+ adjacent_nodes_.erase(it);
+ out_degree_ -= EdgeWeightWith(other);
+ }
+ }
+
+ bool ContainsInterference(InterferenceNode* other) const {
+ DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
+ DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
+ auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other);
+ return it != adjacent_nodes_.end();
+ }
+
+ LiveInterval* GetInterval() const {
+ return interval_;
+ }
+
+ const ArenaVector<InterferenceNode*>& GetAdjacentNodes() const {
+ return adjacent_nodes_;
+ }
+
+ size_t GetOutDegree() const {
+ // Pre-colored nodes have infinite degree.
+ DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max());
+ return out_degree_;
+ }
+
+ void AddCoalesceOpportunity(CoalesceOpportunity* opportunity) {
+ coalesce_opportunities_.push_back(opportunity);
+ }
+
+ void ClearCoalesceOpportunities() {
+ coalesce_opportunities_.clear();
+ }
+
+ bool IsMoveRelated() const {
+ for (CoalesceOpportunity* opportunity : coalesce_opportunities_) {
+ if (opportunity->stage == CoalesceStage::kWorklist ||
+ opportunity->stage == CoalesceStage::kActive) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // Return whether this node already has a color.
+ // Used to find fixed nodes in the interference graph before coloring.
+ bool IsPrecolored() const {
+ return interval_->HasRegister();
+ }
+
+ bool IsPair() const {
+ return interval_->HasHighInterval();
+ }
+
+ void SetAlias(InterferenceNode* rep) {
+ DCHECK_NE(rep->stage, NodeStage::kPruned);
+ DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
+ alias_ = rep;
+ }
+
+ InterferenceNode* GetAlias() {
+ if (alias_ != this) {
+ // Recurse in order to flatten tree of alias pointers.
+ alias_ = alias_->GetAlias();
+ }
+ return alias_;
+ }
+
+ const ArenaVector<CoalesceOpportunity*>& GetCoalesceOpportunities() const {
+ return coalesce_opportunities_;
+ }
+
+ float GetSpillWeight() const {
+ return spill_weight_;
+ }
+
+ bool RequiresColor() const {
+ return requires_color_;
+ }
+
+ // We give extra weight to edges adjacent to pair nodes. See the general comment on the
+ // interference graph above.
+ size_t EdgeWeightWith(const InterferenceNode* other) const {
+ return (IsPair() || other->IsPair()) ? 2 : 1;
+ }
+
+ bool NeedsSpillSlot() const {
+ return needs_spill_slot_;
+ }
+
+ void SetNeedsSpillSlot() {
+ needs_spill_slot_ = true;
+ }
+
+ // The current stage of this node, indicating which worklist it belongs to.
+ NodeStage stage;
+
+ private:
+ // The live interval that this node represents.
+ LiveInterval* const interval_;
+
+ // All nodes interfering with this one.
+ // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
+ // set sizes that we encounter. Using a vector leads to much better performance.
+ ArenaVector<InterferenceNode*> adjacent_nodes_;
+
+ // Interference nodes that this node should be coalesced with to reduce moves.
+ ArenaVector<CoalesceOpportunity*> coalesce_opportunities_;
+
+ // The maximum number of colors with which this node could interfere. This could be more than
+ // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
+ // We use "out" degree because incoming edges come from nodes already pruned from the graph,
+ // and do not affect the coloring of this node.
+ // Pre-colored nodes are treated as having infinite degree.
+ size_t out_degree_;
+
+ // The node representing this node in the interference graph.
+ // Initially set to `this`, and only changed if this node is coalesced into another.
+ InterferenceNode* alias_;
+
+ // The cost of splitting and spilling this interval to the stack.
+ // Nodes with a higher spill weight should be prioritized when assigning registers.
+ // This is essentially based on use density and location; short intervals with many uses inside
+ // deeply nested loops have a high spill weight.
+ const float spill_weight_;
+
+ const bool requires_color_;
+
+ bool needs_spill_slot_;
+
+ DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
+};
+
+// The order in which we color nodes is important. To guarantee forward progress,
+// we prioritize intervals that require registers, and after that we prioritize
+// short intervals. That way, if we fail to color a node, it either won't require a
+// register, or it will be a long interval that can be split in order to make the
+// interference graph sparser.
+// To improve code quality, we prioritize intervals used frequently in deeply nested loops.
+// (This metric is secondary to the forward progress requirements above.)
+// TODO: May also want to consider:
+// - Constants (since they can be rematerialized)
+// - Allocated spill slots
+static bool HasGreaterNodePriority(const InterferenceNode* lhs,
+ const InterferenceNode* rhs) {
+ // (1) Prioritize the node that requires a color.
+ if (lhs->RequiresColor() != rhs->RequiresColor()) {
+ return lhs->RequiresColor();
+ }
+
+ // (2) Prioritize the interval that has a higher spill weight.
+ return lhs->GetSpillWeight() > rhs->GetSpillWeight();
+}
+
+// A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
+// and provides methods for each phase of the attempt.
+class ColoringIteration {
+ public:
+ ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
+ ArenaAllocator* allocator,
+ bool processing_core_regs,
+ size_t num_regs)
+ : register_allocator_(register_allocator),
+ allocator_(allocator),
+ processing_core_regs_(processing_core_regs),
+ num_regs_(num_regs),
+ interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
+ coalesce_worklist_(CoalesceOpportunity::CmpPriority,
+ allocator->Adapter(kArenaAllocRegisterAllocator)) {}
+
+ // Use the intervals collected from instructions to construct an
+ // interference graph mapping intervals to adjacency lists.
+ // Also, collect synthesized safepoint nodes, used to keep
+ // track of live intervals across safepoints.
+ // TODO: Should build safepoints elsewhere.
+ void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals,
+ const ArenaVector<InterferenceNode*>& physical_nodes);
+
+ // Add coalesce opportunities to interference nodes.
+ void FindCoalesceOpportunities();
+
+ // Prune nodes from the interference graph to be colored later. Build
+ // a stack (pruned_nodes) containing these intervals in an order determined
+ // by various heuristics.
+ void PruneInterferenceGraph();
+
+ // Process pruned_intervals_ to color the interference graph, spilling when
+ // necessary. Returns true if successful. Else, some intervals have been
+ // split, and the interference graph should be rebuilt for another attempt.
+ bool ColorInterferenceGraph();
+
+ // Return prunable nodes.
+ // The register allocator will need to access prunable nodes after coloring
+ // in order to tell the code generator which registers have been assigned.
+ const ArenaVector<InterferenceNode*>& GetPrunableNodes() const {
+ return prunable_nodes_;
+ }
+
+ private:
+ // Create a coalesce opportunity between two nodes.
+ void CreateCoalesceOpportunity(InterferenceNode* a,
+ InterferenceNode* b,
+ CoalesceKind kind,
+ size_t position);
+
+ // Add an edge in the interference graph, if valid.
+ // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
+ // when possible.
+ void AddPotentialInterference(InterferenceNode* from,
+ InterferenceNode* to,
+ bool guaranteed_not_interfering_yet,
+ bool both_directions = true);
+
+ // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
+ // may be pruned from the interference graph.
+ void FreezeMoves(InterferenceNode* node);
+
+ // Prune a node from the interference graph, updating worklists if necessary.
+ void PruneNode(InterferenceNode* node);
+
+ // Add coalesce opportunities associated with this node to the coalesce worklist.
+ void EnableCoalesceOpportunities(InterferenceNode* node);
+
+ // If needed, from `node` from the freeze worklist to the simplify worklist.
+ void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
+
+ // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
+ bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
+
+ // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
+ bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
+
+ void Coalesce(CoalesceOpportunity* opportunity);
+
+ // Merge `from` into `into` in the interference graph.
+ void Combine(InterferenceNode* from, InterferenceNode* into);
+
+ // A reference to the register allocator instance,
+ // needed to split intervals and assign spill slots.
+ RegisterAllocatorGraphColor* register_allocator_;
+
+ // An arena allocator used for a single graph coloring attempt.
+ ArenaAllocator* allocator_;
+
+ const bool processing_core_regs_;
+
+ const size_t num_regs_;
+
+ // A map from live intervals to interference nodes.
+ ArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
+
+ // Uncolored nodes that should be pruned from the interference graph.
+ ArenaVector<InterferenceNode*> prunable_nodes_;
+
+ // A stack of nodes pruned from the interference graph, waiting to be pruned.
+ ArenaStdStack<InterferenceNode*> pruned_nodes_;
+
+ // A queue containing low degree, non-move-related nodes that can pruned immediately.
+ ArenaDeque<InterferenceNode*> simplify_worklist_;
+
+ // A queue containing low degree, move-related nodes.
+ ArenaDeque<InterferenceNode*> freeze_worklist_;
+
+ // A queue containing high degree nodes.
+ // If we have to prune from the spill worklist, we cannot guarantee
+ // the pruned node a color, so we order the worklist by priority.
+ ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
+
+ // A queue containing coalesce opportunities.
+ // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
+ // inside of loops) are more important than others.
+ ArenaPriorityQueue<CoalesceOpportunity*,
+ decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
+
+ DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
+};
+
+static bool IsCoreInterval(LiveInterval* interval) {
+ return !Primitive::IsFloatingPointType(interval->GetType());
+}
+
+static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
+ return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
+}
+
+RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& liveness,
+ bool iterative_move_coalescing)
+ : RegisterAllocator(allocator, codegen, liveness),
+ iterative_move_coalescing_(iterative_move_coalescing),
+ core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ num_int_spill_slots_(0),
+ num_double_spill_slots_(0),
+ num_float_spill_slots_(0),
+ num_long_spill_slots_(0),
+ catch_phi_spill_slot_counter_(0),
+ reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
+ reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
+ // Before we ask for blocked registers, set them up in the code generator.
+ codegen->SetupBlockedRegisters();
+
+ // Initialize physical core register live intervals and blocked registers.
+ // This includes globally blocked registers, such as the stack pointer.
+ physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
+ for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+ LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt);
+ physical_core_nodes_[i] =
+ new (allocator_) InterferenceNode(allocator_, interval, liveness);
+ physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
+ core_intervals_.push_back(interval);
+ if (codegen_->IsBlockedCoreRegister(i)) {
+ interval->AddRange(0, liveness.GetMaxLifetimePosition());
+ }
+ }
+ // Initialize physical floating point register live intervals and blocked registers.
+ physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
+ for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+ LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat);
+ physical_fp_nodes_[i] =
+ new (allocator_) InterferenceNode(allocator_, interval, liveness);
+ physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
+ fp_intervals_.push_back(interval);
+ if (codegen_->IsBlockedFloatingPointRegister(i)) {
+ interval->AddRange(0, liveness.GetMaxLifetimePosition());
+ }
+ }
+}
+
+void RegisterAllocatorGraphColor::AllocateRegisters() {
+ // (1) Collect and prepare live intervals.
+ ProcessInstructions();
+
+ for (bool processing_core_regs : {true, false}) {
+ ArenaVector<LiveInterval*>& intervals = processing_core_regs
+ ? core_intervals_
+ : fp_intervals_;
+ size_t num_registers = processing_core_regs
+ ? codegen_->GetNumberOfCoreRegisters()
+ : codegen_->GetNumberOfFloatingPointRegisters();
+
+ size_t attempt = 0;
+ while (true) {
+ ++attempt;
+ DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
+ << "Exceeded debug max graph coloring register allocation attempts. "
+ << "This could indicate that the register allocator is not making forward progress, "
+ << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
+ << "should be prioritized over long ones, because they cannot be split further.)";
+
+ // Many data structures are cleared between graph coloring attempts, so we reduce
+ // total memory usage by using a new arena allocator for each attempt.
+ ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool());
+ ColoringIteration iteration(this,
+ &coloring_attempt_allocator,
+ processing_core_regs,
+ num_registers);
+
+ // (2) Build the interference graph. Also gather safepoints.
+ ArenaVector<InterferenceNode*> safepoints(
+ coloring_attempt_allocator.Adapter(kArenaAllocRegisterAllocator));
+ ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
+ ? physical_core_nodes_
+ : physical_fp_nodes_;
+ iteration.BuildInterferenceGraph(intervals, physical_nodes);
+
+ // (3) Add coalesce opportunities.
+ // If we have tried coloring the graph a suspiciously high number of times, give
+ // up on move coalescing, just in case the coalescing heuristics are not conservative.
+ // (This situation will be caught if DCHECKs are turned on.)
+ if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
+ iteration.FindCoalesceOpportunities();
+ }
+
+ // (4) Prune all uncolored nodes from interference graph.
+ iteration.PruneInterferenceGraph();
+
+ // (5) Color pruned nodes based on interferences.
+ bool successful = iteration.ColorInterferenceGraph();
+
+ // We manually clear coalesce opportunities for physical nodes,
+ // since they persist across coloring attempts.
+ for (InterferenceNode* node : physical_core_nodes_) {
+ node->ClearCoalesceOpportunities();
+ }
+ for (InterferenceNode* node : physical_fp_nodes_) {
+ node->ClearCoalesceOpportunities();
+ }
+
+ if (successful) {
+ // Assign spill slots.
+ AllocateSpillSlots(iteration.GetPrunableNodes());
+
+ // Tell the code generator which registers were allocated.
+ // We only look at prunable_nodes because we already told the code generator about
+ // fixed intervals while processing instructions. We also ignore the fixed intervals
+ // placed at the top of catch blocks.
+ for (InterferenceNode* node : iteration.GetPrunableNodes()) {
+ LiveInterval* interval = node->GetInterval();
+ if (interval->HasRegister()) {
+ Location low_reg = processing_core_regs
+ ? Location::RegisterLocation(interval->GetRegister())
+ : Location::FpuRegisterLocation(interval->GetRegister());
+ codegen_->AddAllocatedRegister(low_reg);
+ if (interval->HasHighInterval()) {
+ LiveInterval* high = interval->GetHighInterval();
+ DCHECK(high->HasRegister());
+ Location high_reg = processing_core_regs
+ ? Location::RegisterLocation(high->GetRegister())
+ : Location::FpuRegisterLocation(high->GetRegister());
+ codegen_->AddAllocatedRegister(high_reg);
+ }
+ } else {
+ DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister());
+ }
+ }
+
+ break;
+ }
+ } // while unsuccessful
+ } // for processing_core_instructions
+
+ // (6) Resolve locations and deconstruct SSA form.
+ RegisterAllocationResolver(allocator_, codegen_, liveness_)
+ .Resolve(ArrayRef<HInstruction* const>(safepoints_),
+ reserved_art_method_slots_ + reserved_out_slots_,
+ num_int_spill_slots_,
+ num_long_spill_slots_,
+ num_float_spill_slots_,
+ num_double_spill_slots_,
+ catch_phi_spill_slot_counter_,
+ temp_intervals_);
+
+ if (kIsDebugBuild) {
+ Validate(/*log_fatal_on_failure*/ true);
+ }
+}
+
+bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
+ for (bool processing_core_regs : {true, false}) {
+ ArenaVector<LiveInterval*> intervals(
+ allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
+ for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
+ HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+ LiveInterval* interval = instruction->GetLiveInterval();
+ if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
+ intervals.push_back(instruction->GetLiveInterval());
+ }
+ }
+
+ ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
+ ? physical_core_nodes_
+ : physical_fp_nodes_;
+ for (InterferenceNode* fixed : physical_nodes) {
+ LiveInterval* interval = fixed->GetInterval();
+ if (interval->GetFirstRange() != nullptr) {
+ // Ideally we would check fixed ranges as well, but currently there are times when
+ // two fixed intervals for the same register will overlap. For example, a fixed input
+ // and a fixed output may sometimes share the same register, in which there will be two
+ // fixed intervals for the same place.
+ }
+ }
+
+ for (LiveInterval* temp : temp_intervals_) {
+ if (IsCoreInterval(temp) == processing_core_regs) {
+ intervals.push_back(temp);
+ }
+ }
+
+ size_t spill_slots = num_int_spill_slots_
+ + num_long_spill_slots_
+ + num_float_spill_slots_
+ + num_double_spill_slots_
+ + catch_phi_spill_slot_counter_;
+ bool ok = ValidateIntervals(intervals,
+ spill_slots,
+ reserved_art_method_slots_ + reserved_out_slots_,
+ *codegen_,
+ allocator_,
+ processing_core_regs,
+ log_fatal_on_failure);
+ if (!ok) {
+ return false;
+ }
+ } // for processing_core_regs
+
+ return true;
+}
+
+void RegisterAllocatorGraphColor::ProcessInstructions() {
+ for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
+ // Note that we currently depend on this ordering, since some helper
+ // code is designed for linear scan register allocation.
+ for (HBackwardInstructionIterator instr_it(block->GetInstructions());
+ !instr_it.Done();
+ instr_it.Advance()) {
+ ProcessInstruction(instr_it.Current());
+ }
+
+ for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ ProcessInstruction(phi_it.Current());
+ }
+
+ if (block->IsCatchBlock()
+ || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
+ // By blocking all registers at the top of each catch block or irreducible loop, we force
+ // intervals belonging to the live-in set of the catch/header block to be spilled.
+ // TODO(ngeoffray): Phis in this block could be allocated in register.
+ size_t position = block->GetLifetimeStart();
+ BlockRegisters(position, position + 1);
+ }
+ }
+}
+
+void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ if (locations == nullptr) {
+ return;
+ }
+ if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) {
+ // We do this here because we do not want the suspend check to artificially
+ // create live registers.
+ DCHECK(instruction->IsSuspendCheckEntry());
+ DCHECK_EQ(locations->GetTempCount(), 0u);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+
+ CheckForTempLiveIntervals(instruction);
+ CheckForSafepoint(instruction);
+ if (instruction->GetLocations()->WillCall()) {
+ // If a call will happen, create fixed intervals for caller-save registers.
+ // TODO: Note that it may be beneficial to later split intervals at this point,
+ // so that we allow last-minute moves from a caller-save register
+ // to a callee-save register.
+ BlockRegisters(instruction->GetLifetimePosition(),
+ instruction->GetLifetimePosition() + 1,
+ /*caller_save_only*/ true);
+ }
+ CheckForFixedInputs(instruction);
+
+ LiveInterval* interval = instruction->GetLiveInterval();
+ if (interval == nullptr) {
+ // Instructions lacking a valid output location do not have a live interval.
+ DCHECK(!locations->Out().IsValid());
+ return;
+ }
+
+ // Low intervals act as representatives for their corresponding high interval.
+ DCHECK(!interval->IsHighInterval());
+ if (codegen_->NeedsTwoRegisters(interval->GetType())) {
+ interval->AddHighInterval();
+ }
+ AddSafepointsFor(instruction);
+ CheckForFixedOutput(instruction);
+ AllocateSpillSlotForCatchPhi(instruction);
+
+ ArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
+ ? core_intervals_
+ : fp_intervals_;
+ if (interval->HasSpillSlot() || instruction->IsConstant()) {
+ // Note that if an interval already has a spill slot, then its value currently resides
+ // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
+ // register use. This is also true for constants, which can be materialized at any point.
+ size_t first_register_use = interval->FirstRegisterUse();
+ if (first_register_use != kNoLifetime) {
+ LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
+ intervals.push_back(split);
+ } else {
+ // We won't allocate a register for this value.
+ }
+ } else {
+ intervals.push_back(interval);
+ }
+}
+
+void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
+ // We simply block physical registers where necessary.
+ // TODO: Ideally we would coalesce the physical register with the register
+ // allocated to the input value, but this can be tricky if, e.g., there
+ // could be multiple physical register uses of the same value at the
+ // same instruction. Furthermore, there's currently no distinction between
+ // fixed inputs to a call (which will be clobbered) and other fixed inputs (which
+ // may not be clobbered).
+ LocationSummary* locations = instruction->GetLocations();
+ size_t position = instruction->GetLifetimePosition();
+ for (size_t i = 0; i < locations->GetInputCount(); ++i) {
+ Location input = locations->InAt(i);
+ if (input.IsRegister() || input.IsFpuRegister()) {
+ BlockRegister(input, position, position + 1);
+ codegen_->AddAllocatedRegister(input);
+ } else if (input.IsPair()) {
+ BlockRegister(input.ToLow(), position, position + 1);
+ BlockRegister(input.ToHigh(), position, position + 1);
+ codegen_->AddAllocatedRegister(input.ToLow());
+ codegen_->AddAllocatedRegister(input.ToHigh());
+ }
+ }
+}
+
+void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
+ // If an instruction has a fixed output location, we give the live interval a register and then
+ // proactively split it just after the definition point to avoid creating too many interferences
+ // with a fixed node.
+ LiveInterval* interval = instruction->GetLiveInterval();
+ Location out = interval->GetDefinedBy()->GetLocations()->Out();
+ size_t position = instruction->GetLifetimePosition();
+ DCHECK_GE(interval->GetEnd() - position, 2u);
+
+ if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
+ out = instruction->GetLocations()->InAt(0);
+ }
+
+ if (out.IsRegister() || out.IsFpuRegister()) {
+ interval->SetRegister(out.reg());
+ codegen_->AddAllocatedRegister(out);
+ Split(interval, position + 1);
+ } else if (out.IsPair()) {
+ interval->SetRegister(out.low());
+ interval->GetHighInterval()->SetRegister(out.high());
+ codegen_->AddAllocatedRegister(out.ToLow());
+ codegen_->AddAllocatedRegister(out.ToHigh());
+ Split(interval, position + 1);
+ } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
+ interval->SetSpillSlot(out.GetStackIndex());
+ } else {
+ DCHECK(out.IsUnallocated() || out.IsConstant());
+ }
+}
+
+void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
+ LiveInterval* interval = instruction->GetLiveInterval();
+ for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
+ HInstruction* safepoint = safepoints_[safepoint_index - 1u];
+ size_t safepoint_position = safepoint->GetLifetimePosition();
+
+ // Test that safepoints_ are ordered in the optimal way.
+ DCHECK(safepoint_index == safepoints_.size() ||
+ safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
+
+ if (safepoint_position == interval->GetStart()) {
+ // The safepoint is for this instruction, so the location of the instruction
+ // does not need to be saved.
+ DCHECK_EQ(safepoint_index, safepoints_.size());
+ DCHECK_EQ(safepoint, instruction);
+ continue;
+ } else if (interval->IsDeadAt(safepoint_position)) {
+ break;
+ } else if (!interval->Covers(safepoint_position)) {
+ // Hole in the interval.
+ continue;
+ }
+ interval->AddSafepoint(safepoint);
+ }
+}
+
+void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ size_t position = instruction->GetLifetimePosition();
+ for (size_t i = 0; i < locations->GetTempCount(); ++i) {
+ Location temp = locations->GetTemp(i);
+ if (temp.IsRegister() || temp.IsFpuRegister()) {
+ BlockRegister(temp, position, position + 1);
+ codegen_->AddAllocatedRegister(temp);
+ } else {
+ DCHECK(temp.IsUnallocated());
+ switch (temp.GetPolicy()) {
+ case Location::kRequiresRegister: {
+ LiveInterval* interval =
+ LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
+ interval->AddTempUse(instruction, i);
+ core_intervals_.push_back(interval);
+ temp_intervals_.push_back(interval);
+ break;
+ }
+
+ case Location::kRequiresFpuRegister: {
+ LiveInterval* interval =
+ LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
+ interval->AddTempUse(instruction, i);
+ fp_intervals_.push_back(interval);
+ temp_intervals_.push_back(interval);
+ if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+ interval->AddHighInterval(/*is_temp*/ true);
+ temp_intervals_.push_back(interval->GetHighInterval());
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected policy for temporary location "
+ << temp.GetPolicy();
+ }
+ }
+ }
+}
+
+void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+
+ if (locations->NeedsSafepoint()) {
+ safepoints_.push_back(instruction);
+ }
+}
+
+LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
+ if (interval->GetStart() < position && position < interval->GetEnd()) {
+ return Split(interval, position);
+ } else {
+ return interval;
+ }
+}
+
+void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
+ DCHECK(!interval->IsHighInterval());
+
+ // Split just after a register definition.
+ if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
+ interval = TrySplit(interval, interval->GetStart() + 1);
+ }
+
+ UsePosition* use = interval->GetFirstUse();
+ while (use != nullptr && use->GetPosition() < interval->GetStart()) {
+ use = use->GetNext();
+ }
+
+ // Split around register uses.
+ size_t end = interval->GetEnd();
+ while (use != nullptr && use->GetPosition() <= end) {
+ if (use->RequiresRegister()) {
+ size_t position = use->GetPosition();
+ interval = TrySplit(interval, position - 1);
+ if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
+ // If we are at the very end of a basic block, we cannot split right
+ // at the use. Split just after instead.
+ interval = TrySplit(interval, position + 1);
+ } else {
+ interval = TrySplit(interval, position);
+ }
+ }
+ use = use->GetNext();
+ }
+}
+
+void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
+ if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+ HPhi* phi = instruction->AsPhi();
+ LiveInterval* interval = phi->GetLiveInterval();
+
+ HInstruction* previous_phi = phi->GetPrevious();
+ DCHECK(previous_phi == nullptr ||
+ previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+ << "Phis expected to be sorted by vreg number, "
+ << "so that equivalent phis are adjacent.";
+
+ if (phi->IsVRegEquivalentOf(previous_phi)) {
+ // Assign the same spill slot.
+ DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
+ interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
+ } else {
+ interval->SetSpillSlot(catch_phi_spill_slot_counter_);
+ catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ }
+ }
+}
+
+void RegisterAllocatorGraphColor::BlockRegister(Location location,
+ size_t start,
+ size_t end) {
+ DCHECK(location.IsRegister() || location.IsFpuRegister());
+ int reg = location.reg();
+ LiveInterval* interval = location.IsRegister()
+ ? physical_core_nodes_[reg]->GetInterval()
+ : physical_fp_nodes_[reg]->GetInterval();
+ DCHECK(interval->GetRegister() == reg);
+ bool blocked_by_codegen = location.IsRegister()
+ ? codegen_->IsBlockedCoreRegister(reg)
+ : codegen_->IsBlockedFloatingPointRegister(reg);
+ if (blocked_by_codegen) {
+ // We've already blocked this register for the entire method. (And adding a
+ // range inside another range violates the preconditions of AddRange).
+ } else {
+ interval->AddRange(start, end);
+ }
+}
+
+void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+ for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+ if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
+ BlockRegister(Location::RegisterLocation(i), start, end);
+ }
+ }
+ for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+ if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
+ BlockRegister(Location::FpuRegisterLocation(i), start, end);
+ }
+ }
+}
+
+void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
+ InterferenceNode* to,
+ bool guaranteed_not_interfering_yet,
+ bool both_directions) {
+ if (from->IsPrecolored()) {
+ // We save space by ignoring outgoing edges from fixed nodes.
+ } else if (to->IsPrecolored()) {
+ // It is important that only a single node represents a given fixed register in the
+ // interference graph. We retrieve that node here.
+ const ArenaVector<InterferenceNode*>& physical_nodes = to->GetInterval()->IsFloatingPoint()
+ ? register_allocator_->physical_fp_nodes_
+ : register_allocator_->physical_core_nodes_;
+ InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
+ from->AddInterference(physical_node, /*guaranteed_not_interfering_yet*/ false);
+ DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
+ DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
+
+ // If a node interferes with a fixed pair node, the weight of the edge may
+ // be inaccurate after using the alias of the pair node, because the alias of the pair node
+ // is a singular node.
+ // We could make special pair fixed nodes, but that ends up being too conservative because
+ // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
+ // three rather than two.
+ // Instead, we explicitly add an interference with the high node of the fixed pair node.
+ // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
+ // can be unaligned on x86 complicates things.
+ if (to->IsPair()) {
+ InterferenceNode* high_node =
+ physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
+ DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
+ high_node->GetInterval()->GetRegister());
+ from->AddInterference(high_node, /*guaranteed_not_interfering_yet*/ false);
+ }
+ } else {
+ // Standard interference between two uncolored nodes.
+ from->AddInterference(to, guaranteed_not_interfering_yet);
+ }
+
+ if (both_directions) {
+ AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
+ }
+}
+
+// Returns true if `in_node` represents an input interval of `out_node`, and the output interval
+// is allowed to have the same register as the input interval.
+// TODO: Ideally we should just produce correct intervals in liveness analysis.
+// We would need to refactor the current live interval layout to do so, which is
+// no small task.
+static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
+ LiveInterval* output_interval = out_node->GetInterval();
+ HInstruction* defined_by = output_interval->GetDefinedBy();
+ if (defined_by == nullptr) {
+ // This must not be a definition point.
+ return false;
+ }
+
+ LocationSummary* locations = defined_by->GetLocations();
+ if (locations->OutputCanOverlapWithInputs()) {
+ // This instruction does not allow the output to reuse a register from an input.
+ return false;
+ }
+
+ LiveInterval* input_interval = in_node->GetInterval();
+ LiveInterval* next_sibling = input_interval->GetNextSibling();
+ size_t def_position = defined_by->GetLifetimePosition();
+ size_t use_position = def_position + 1;
+ if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
+ // The next sibling starts at the use position, so reusing the input register in the output
+ // would clobber the input before it's moved into the sibling interval location.
+ return false;
+ }
+
+ if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
+ // The input interval is live after the use position.
+ return false;
+ }
+
+ HInputsRef inputs = defined_by->GetInputs();
+ for (size_t i = 0; i < inputs.size(); ++i) {
+ if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
+ DCHECK(input_interval->SameRegisterKind(*output_interval));
+ return true;
+ }
+ }
+
+ // The input interval was not an input for this instruction.
+ return false;
+}
+
+void ColoringIteration::BuildInterferenceGraph(
+ const ArenaVector<LiveInterval*>& intervals,
+ const ArenaVector<InterferenceNode*>& physical_nodes) {
+ DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty());
+ // Build the interference graph efficiently by ordering range endpoints
+ // by position and doing a linear sweep to find interferences. (That is, we
+ // jump from endpoint to endpoint, maintaining a set of intervals live at each
+ // point. If two nodes are ever in the live set at the same time, then they
+ // interfere with each other.)
+ //
+ // We order by both position and (secondarily) by whether the endpoint
+ // begins or ends a range; we want to process range endings before range
+ // beginnings at the same position because they should not conflict.
+ //
+ // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
+ // Tuple contents: (position, is_range_beginning, node).
+ ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
+ allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ // We reserve plenty of space to avoid excessive copying.
+ range_endpoints.reserve(4 * prunable_nodes_.size());
+
+ for (LiveInterval* parent : intervals) {
+ for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
+ LiveRange* range = sibling->GetFirstRange();
+ if (range != nullptr) {
+ InterferenceNode* node = new (allocator_) InterferenceNode(
+ allocator_, sibling, register_allocator_->liveness_);
+ interval_node_map_.Insert(std::make_pair(sibling, node));
+
+ if (sibling->HasRegister()) {
+ // Fixed nodes should alias the canonical node for the corresponding register.
+ node->stage = NodeStage::kPrecolored;
+ InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
+ node->SetAlias(physical_node);
+ DCHECK_EQ(node->GetInterval()->GetRegister(),
+ physical_node->GetInterval()->GetRegister());
+ } else {
+ node->stage = NodeStage::kPrunable;
+ prunable_nodes_.push_back(node);
+ }
+
+ while (range != nullptr) {
+ range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
+ range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
+ range = range->GetNext();
+ }
+ }
+ }
+ }
+
+ // Sort the endpoints.
+ // We explicitly ignore the third entry of each tuple (the node pointer) in order
+ // to maintain determinism.
+ std::sort(range_endpoints.begin(), range_endpoints.end(),
+ [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
+ const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
+ return std::tie(std::get<0>(lhs), std::get<1>(lhs))
+ < std::tie(std::get<0>(rhs), std::get<1>(rhs));
+ });
+
+ // Nodes live at the current position in the linear sweep.
+ ArenaVector<InterferenceNode*> live(
+ allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
+ // live set. When we encounter the end of a range, we remove the corresponding node
+ // from the live set. Nodes interfere if they are in the live set at the same time.
+ for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
+ bool is_range_beginning;
+ InterferenceNode* node;
+ size_t position;
+ // Extract information from the tuple, including the node this tuple represents.
+ std::tie(position, is_range_beginning, node) = *it;
+
+ if (is_range_beginning) {
+ bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
+ for (InterferenceNode* conflicting : live) {
+ DCHECK_NE(node, conflicting);
+ if (CheckInputOutputCanOverlap(conflicting, node)) {
+ // We do not add an interference, because the instruction represented by `node` allows
+ // its output to share a register with an input, represented here by `conflicting`.
+ } else {
+ AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
+ }
+ }
+ DCHECK(std::find(live.begin(), live.end(), node) == live.end());
+ live.push_back(node);
+ } else {
+ // End of range.
+ auto live_it = std::find(live.begin(), live.end(), node);
+ DCHECK(live_it != live.end());
+ live.erase(live_it);
+ }
+ }
+ DCHECK(live.empty());
+}
+
+void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
+ InterferenceNode* b,
+ CoalesceKind kind,
+ size_t position) {
+ DCHECK_EQ(a->IsPair(), b->IsPair())
+ << "Nodes of different memory widths should never be coalesced";
+ CoalesceOpportunity* opportunity =
+ new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
+ a->AddCoalesceOpportunity(opportunity);
+ b->AddCoalesceOpportunity(opportunity);
+ coalesce_worklist_.push(opportunity);
+}
+
+// When looking for coalesce opportunities, we use the interval_node_map_ to find the node
+// corresponding to an interval. Note that not all intervals are in this map, notably the parents
+// of constants and stack arguments. (However, these interval should not be involved in coalesce
+// opportunities anyway, because they're not going to be in registers.)
+void ColoringIteration::FindCoalesceOpportunities() {
+ DCHECK(coalesce_worklist_.empty());
+
+ for (InterferenceNode* node : prunable_nodes_) {
+ LiveInterval* interval = node->GetInterval();
+
+ // Coalesce siblings.
+ LiveInterval* next_sibling = interval->GetNextSibling();
+ if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
+ auto it = interval_node_map_.Find(next_sibling);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* sibling_node = it->second;
+ CreateCoalesceOpportunity(node,
+ sibling_node,
+ CoalesceKind::kAdjacentSibling,
+ interval->GetEnd());
+ }
+ }
+
+ // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
+ LiveInterval* parent = interval->GetParent();
+ if (parent->HasRegister()
+ && parent->GetNextSibling() == interval
+ && parent->GetEnd() == interval->GetStart()) {
+ auto it = interval_node_map_.Find(parent);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* parent_node = it->second;
+ CreateCoalesceOpportunity(node,
+ parent_node,
+ CoalesceKind::kFixedOutputSibling,
+ parent->GetEnd());
+ }
+ }
+
+ // Try to prevent moves across blocks.
+ // Note that this does not lead to many succeeding coalesce attempts, so could be removed
+ // if found to add to compile time.
+ const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
+ if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
+ // If the start of this interval is at a block boundary, we look at the
+ // location of the interval in blocks preceding the block this interval
+ // starts at. This can avoid a move between the two blocks.
+ HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
+ for (HBasicBlock* predecessor : block->GetPredecessors()) {
+ size_t position = predecessor->GetLifetimeEnd() - 1;
+ LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
+ if (existing != nullptr) {
+ auto it = interval_node_map_.Find(existing);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* existing_node = it->second;
+ CreateCoalesceOpportunity(node,
+ existing_node,
+ CoalesceKind::kNonlinearControlFlow,
+ position);
+ }
+ }
+ }
+ }
+
+ // Coalesce phi inputs with the corresponding output.
+ HInstruction* defined_by = interval->GetDefinedBy();
+ if (defined_by != nullptr && defined_by->IsPhi()) {
+ const ArenaVector<HBasicBlock*>& predecessors = defined_by->GetBlock()->GetPredecessors();
+ HInputsRef inputs = defined_by->GetInputs();
+
+ for (size_t i = 0, e = inputs.size(); i < e; ++i) {
+ // We want the sibling at the end of the appropriate predecessor block.
+ size_t position = predecessors[i]->GetLifetimeEnd() - 1;
+ LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
+
+ auto it = interval_node_map_.Find(input_interval);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* input_node = it->second;
+ CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
+ }
+ }
+ }
+
+ // Coalesce output with first input when policy is kSameAsFirstInput.
+ if (defined_by != nullptr) {
+ Location out = defined_by->GetLocations()->Out();
+ if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
+ LiveInterval* input_interval
+ = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
+ // TODO: Could we consider lifetime holes here?
+ if (input_interval->GetEnd() == interval->GetStart()) {
+ auto it = interval_node_map_.Find(input_interval);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* input_node = it->second;
+ CreateCoalesceOpportunity(node,
+ input_node,
+ CoalesceKind::kFirstInput,
+ interval->GetStart());
+ }
+ }
+ }
+ }
+
+ // An interval that starts an instruction (that is, it is not split), may
+ // re-use the registers used by the inputs of that instruction, based on the
+ // location summary.
+ if (defined_by != nullptr) {
+ DCHECK(!interval->IsSplit());
+ LocationSummary* locations = defined_by->GetLocations();
+ if (!locations->OutputCanOverlapWithInputs()) {
+ HInputsRef inputs = defined_by->GetInputs();
+ for (size_t i = 0; i < inputs.size(); ++i) {
+ size_t def_point = defined_by->GetLifetimePosition();
+ // TODO: Getting the sibling at the def_point might not be quite what we want
+ // for fixed inputs, since the use will be *at* the def_point rather than after.
+ LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
+ if (input_interval != nullptr &&
+ input_interval->HasHighInterval() == interval->HasHighInterval()) {
+ auto it = interval_node_map_.Find(input_interval);
+ if (it != interval_node_map_.end()) {
+ InterferenceNode* input_node = it->second;
+ CreateCoalesceOpportunity(node,
+ input_node,
+ CoalesceKind::kAnyInput,
+ interval->GetStart());
+ }
+ }
+ }
+ }
+ }
+
+ // Try to prevent moves into fixed input locations.
+ UsePosition* use = interval->GetFirstUse();
+ for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) {
+ // Skip past uses before the start of this interval.
+ }
+ for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) {
+ HInstruction* user = use->GetUser();
+ if (user == nullptr) {
+ // User may be null for certain intervals, such as temp intervals.
+ continue;
+ }
+ LocationSummary* locations = user->GetLocations();
+ Location input = locations->InAt(use->GetInputIndex());
+ if (input.IsRegister() || input.IsFpuRegister()) {
+ // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
+ // is currently not supported.
+ InterferenceNode* fixed_node = input.IsRegister()
+ ? register_allocator_->physical_core_nodes_[input.reg()]
+ : register_allocator_->physical_fp_nodes_[input.reg()];
+ CreateCoalesceOpportunity(node,
+ fixed_node,
+ CoalesceKind::kFixedInput,
+ user->GetLifetimePosition());
+ }
+ }
+ } // for node in prunable_nodes
+}
+
+static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
+ return node->GetOutDegree() < num_regs;
+}
+
+static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
+ return !IsLowDegreeNode(node, num_regs);
+}
+
+void ColoringIteration::PruneInterferenceGraph() {
+ DCHECK(pruned_nodes_.empty()
+ && simplify_worklist_.empty()
+ && freeze_worklist_.empty()
+ && spill_worklist_.empty());
+ // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
+ // and all others as high degree nodes. The distinction is important: low degree nodes are
+ // guaranteed a color, while high degree nodes are not.
+
+ // Build worklists. Note that the coalesce worklist has already been
+ // filled by FindCoalesceOpportunities().
+ for (InterferenceNode* node : prunable_nodes_) {
+ DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
+ if (IsLowDegreeNode(node, num_regs_)) {
+ if (node->GetCoalesceOpportunities().empty()) {
+ // Simplify Worklist.
+ node->stage = NodeStage::kSimplifyWorklist;
+ simplify_worklist_.push_back(node);
+ } else {
+ // Freeze Worklist.
+ node->stage = NodeStage::kFreezeWorklist;
+ freeze_worklist_.push_back(node);
+ }
+ } else {
+ // Spill worklist.
+ node->stage = NodeStage::kSpillWorklist;
+ spill_worklist_.push(node);
+ }
+ }
+
+ // Prune graph.
+ // Note that we do not remove a node from its current worklist if it moves to another, so it may
+ // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
+ while (true) {
+ if (!simplify_worklist_.empty()) {
+ // Prune low-degree nodes.
+ // TODO: pop_back() should work as well, but it didn't; we get a
+ // failed check while pruning. We should look into this.
+ InterferenceNode* node = simplify_worklist_.front();
+ simplify_worklist_.pop_front();
+ DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
+ DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
+ DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
+ PruneNode(node);
+ } else if (!coalesce_worklist_.empty()) {
+ // Coalesce.
+ CoalesceOpportunity* opportunity = coalesce_worklist_.top();
+ coalesce_worklist_.pop();
+ if (opportunity->stage == CoalesceStage::kWorklist) {
+ Coalesce(opportunity);
+ }
+ } else if (!freeze_worklist_.empty()) {
+ // Freeze moves and prune a low-degree move-related node.
+ InterferenceNode* node = freeze_worklist_.front();
+ freeze_worklist_.pop_front();
+ if (node->stage == NodeStage::kFreezeWorklist) {
+ DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
+ DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
+ FreezeMoves(node);
+ PruneNode(node);
+ }
+ } else if (!spill_worklist_.empty()) {
+ // We spill the lowest-priority node, because pruning a node earlier
+ // gives it a higher chance of being spilled.
+ InterferenceNode* node = spill_worklist_.top();
+ spill_worklist_.pop();
+ if (node->stage == NodeStage::kSpillWorklist) {
+ DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
+ FreezeMoves(node);
+ PruneNode(node);
+ }
+ } else {
+ // Pruning complete.
+ break;
+ }
+ }
+ DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
+}
+
+void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
+ for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+ if (opportunity->stage == CoalesceStage::kActive) {
+ opportunity->stage = CoalesceStage::kWorklist;
+ coalesce_worklist_.push(opportunity);
+ }
+ }
+}
+
+void ColoringIteration::PruneNode(InterferenceNode* node) {
+ DCHECK_NE(node->stage, NodeStage::kPruned);
+ DCHECK(!node->IsPrecolored());
+ node->stage = NodeStage::kPruned;
+ pruned_nodes_.push(node);
+
+ for (InterferenceNode* adj : node->GetAdjacentNodes()) {
+ DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
+
+ if (adj->IsPrecolored()) {
+ // No effect on pre-colored nodes; they're never pruned.
+ } else {
+ // Remove the interference.
+ bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
+ DCHECK(adj->ContainsInterference(node))
+ << "Missing reflexive interference from non-fixed node";
+ adj->RemoveInterference(node);
+
+ // Handle transitions from high degree to low degree.
+ if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
+ EnableCoalesceOpportunities(adj);
+ for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
+ EnableCoalesceOpportunities(adj_adj);
+ }
+
+ DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
+ if (adj->IsMoveRelated()) {
+ adj->stage = NodeStage::kFreezeWorklist;
+ freeze_worklist_.push_back(adj);
+ } else {
+ adj->stage = NodeStage::kSimplifyWorklist;
+ simplify_worklist_.push_back(adj);
+ }
+ }
+ }
+ }
+}
+
+void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
+ if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
+ DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
+ node->stage = NodeStage::kSimplifyWorklist;
+ simplify_worklist_.push_back(node);
+ }
+}
+
+void ColoringIteration::FreezeMoves(InterferenceNode* node) {
+ for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+ if (opportunity->stage == CoalesceStage::kDefunct) {
+ // Constrained moves should remain constrained, since they will not be considered
+ // during last-chance coalescing.
+ } else {
+ opportunity->stage = CoalesceStage::kInactive;
+ }
+ InterferenceNode* other = opportunity->node_a->GetAlias() == node
+ ? opportunity->node_b->GetAlias()
+ : opportunity->node_a->GetAlias();
+ if (other != node && other->stage == NodeStage::kFreezeWorklist) {
+ DCHECK(IsLowDegreeNode(node, num_regs_));
+ CheckTransitionFromFreezeWorklist(other);
+ }
+ }
+}
+
+bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
+ InterferenceNode* into) {
+ if (!into->IsPrecolored()) {
+ // The uncolored heuristic will cover this case.
+ return false;
+ }
+ if (from->IsPair() || into->IsPair()) {
+ // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
+ // are currently represented as two single fixed nodes in the graph, and `into` is
+ // only one of them. (We may lose the implicit connections to the second one in a merge.)
+ return false;
+ }
+
+ // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
+ // Reasons an adjacent node `adj` can be "ok":
+ // (1) If `adj` is low degree, interference with `into` will not affect its existing
+ // colorable guarantee. (Notice that coalescing cannot increase its degree.)
+ // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
+ // (3) If there's already an interference with `into`, coalescing will not add interferences.
+ for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+ if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
+ // Ok.
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
+ InterferenceNode* into) {
+ if (into->IsPrecolored()) {
+ // The pre-colored heuristic will handle this case.
+ return false;
+ }
+
+ // Arbitrary cap to improve compile time. Tests show that this has negligible affect
+ // on generated code.
+ if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
+ return false;
+ }
+
+ // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
+ // of high degree. (Low degree neighbors can be ignored, because they will eventually be
+ // pruned from the interference graph in the simplify stage.)
+ size_t high_degree_interferences = 0;
+ for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+ if (IsHighDegreeNode(adj, num_regs_)) {
+ high_degree_interferences += from->EdgeWeightWith(adj);
+ }
+ }
+ for (InterferenceNode* adj : into->GetAdjacentNodes()) {
+ if (IsHighDegreeNode(adj, num_regs_)) {
+ if (from->ContainsInterference(adj)) {
+ // We've already counted this adjacent node.
+ // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
+ // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
+ // but remains conservative.)
+ if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
+ high_degree_interferences -= from->EdgeWeightWith(adj);
+ }
+ } else {
+ high_degree_interferences += into->EdgeWeightWith(adj);
+ }
+ }
+ }
+
+ return high_degree_interferences < num_regs_;
+}
+
+void ColoringIteration::Combine(InterferenceNode* from,
+ InterferenceNode* into) {
+ from->SetAlias(into);
+
+ // Add interferences.
+ for (InterferenceNode* adj : from->GetAdjacentNodes()) {
+ bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
+ AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
+ if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
+ // This is a (temporary) transition to a high degree node. Its degree will decrease again
+ // when we prune `from`, but it's best to be consistent about the current worklist.
+ adj->stage = NodeStage::kSpillWorklist;
+ spill_worklist_.push(adj);
+ }
+ }
+
+ // Add coalesce opportunities.
+ for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
+ if (opportunity->stage != CoalesceStage::kDefunct) {
+ into->AddCoalesceOpportunity(opportunity);
+ }
+ }
+ EnableCoalesceOpportunities(from);
+
+ // Prune and update worklists.
+ PruneNode(from);
+ if (IsLowDegreeNode(into, num_regs_)) {
+ // Coalesce(...) takes care of checking for a transition to the simplify worklist.
+ DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
+ } else if (into->stage == NodeStage::kFreezeWorklist) {
+ // This is a transition to a high degree node.
+ into->stage = NodeStage::kSpillWorklist;
+ spill_worklist_.push(into);
+ } else {
+ DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
+ }
+}
+
+void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
+ InterferenceNode* from = opportunity->node_a->GetAlias();
+ InterferenceNode* into = opportunity->node_b->GetAlias();
+ DCHECK_NE(from->stage, NodeStage::kPruned);
+ DCHECK_NE(into->stage, NodeStage::kPruned);
+
+ if (from->IsPrecolored()) {
+ // If we have one pre-colored node, make sure it's the `into` node.
+ std::swap(from, into);
+ }
+
+ if (from == into) {
+ // These nodes have already been coalesced.
+ opportunity->stage = CoalesceStage::kDefunct;
+ CheckTransitionFromFreezeWorklist(from);
+ } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
+ // These nodes interfere.
+ opportunity->stage = CoalesceStage::kDefunct;
+ CheckTransitionFromFreezeWorklist(from);
+ CheckTransitionFromFreezeWorklist(into);
+ } else if (PrecoloredHeuristic(from, into)
+ || UncoloredHeuristic(from, into)) {
+ // We can coalesce these nodes.
+ opportunity->stage = CoalesceStage::kDefunct;
+ Combine(from, into);
+ CheckTransitionFromFreezeWorklist(into);
+ } else {
+ // We cannot coalesce, but we may be able to later.
+ opportunity->stage = CoalesceStage::kActive;
+ }
+}
+
+// Build a mask with a bit set for each register assigned to some
+// interval in `intervals`.
+template <typename Container>
+static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) {
+ std::bitset<kMaxNumRegs> conflict_mask;
+ for (InterferenceNode* adjacent : intervals) {
+ LiveInterval* conflicting = adjacent->GetInterval();
+ if (conflicting->HasRegister()) {
+ conflict_mask.set(conflicting->GetRegister());
+ if (conflicting->HasHighInterval()) {
+ DCHECK(conflicting->GetHighInterval()->HasRegister());
+ conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
+ }
+ } else {
+ DCHECK(!conflicting->HasHighInterval()
+ || !conflicting->GetHighInterval()->HasRegister());
+ }
+ }
+ return conflict_mask;
+}
+
+bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
+ return processing_core_regs
+ ? !codegen_->IsCoreCalleeSaveRegister(reg)
+ : !codegen_->IsCoreCalleeSaveRegister(reg);
+}
+
+static bool RegisterIsAligned(size_t reg) {
+ return reg % 2 == 0;
+}
+
+static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
+ // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
+ // Note that CTZ is undefined if all bits are 0, so we special-case it.
+ return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
+}
+
+bool ColoringIteration::ColorInterferenceGraph() {
+ DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
+ ArenaVector<LiveInterval*> colored_intervals(
+ allocator_->Adapter(kArenaAllocRegisterAllocator));
+ bool successful = true;
+
+ while (!pruned_nodes_.empty()) {
+ InterferenceNode* node = pruned_nodes_.top();
+ pruned_nodes_.pop();
+ LiveInterval* interval = node->GetInterval();
+ size_t reg = 0;
+
+ InterferenceNode* alias = node->GetAlias();
+ if (alias != node) {
+ // This node was coalesced with another.
+ LiveInterval* alias_interval = alias->GetInterval();
+ if (alias_interval->HasRegister()) {
+ reg = alias_interval->GetRegister();
+ DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
+ << "This node conflicts with the register it was coalesced with";
+ } else {
+ DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
+ << "Move coalescing was not conservative, causing a node to be coalesced "
+ << "with another node that could not be colored";
+ if (interval->RequiresRegister()) {
+ successful = false;
+ }
+ }
+ } else {
+ // Search for free register(s).
+ std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
+ if (interval->HasHighInterval()) {
+ // Note that the graph coloring allocator assumes that pair intervals are aligned here,
+ // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
+ // change the alignment requirements here, we will have to update the algorithm (e.g.,
+ // be more conservative about the weight of edges adjacent to pair nodes.)
+ while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
+ reg += 2;
+ }
+
+ // Try to use a caller-save register first.
+ for (size_t i = 0; i < num_regs_ - 1; i += 2) {
+ bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_);
+ bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
+ if (!conflict_mask[i] && !conflict_mask[i + 1]) {
+ if (low_caller_save && high_caller_save) {
+ reg = i;
+ break;
+ } else if (low_caller_save || high_caller_save) {
+ reg = i;
+ // Keep looking to try to get both parts in caller-save registers.
+ }
+ }
+ }
+ } else {
+ // Not a pair interval.
+ reg = FindFirstZeroInConflictMask(conflict_mask);
+
+ // Try to use caller-save registers first.
+ for (size_t i = 0; i < num_regs_; ++i) {
+ if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
+ reg = i;
+ break;
+ }
+ }
+ }
+
+ // Last-chance coalescing.
+ for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
+ if (opportunity->stage == CoalesceStage::kDefunct) {
+ continue;
+ }
+ LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
+ ? opportunity->node_b->GetAlias()->GetInterval()
+ : opportunity->node_a->GetAlias()->GetInterval();
+ if (other_interval->HasRegister()) {
+ size_t coalesce_register = other_interval->GetRegister();
+ if (interval->HasHighInterval()) {
+ if (!conflict_mask[coalesce_register] &&
+ !conflict_mask[coalesce_register + 1] &&
+ RegisterIsAligned(coalesce_register)) {
+ reg = coalesce_register;
+ break;
+ }
+ } else if (!conflict_mask[coalesce_register]) {
+ reg = coalesce_register;
+ break;
+ }
+ }
+ }
+ }
+
+ if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
+ // Assign register.
+ DCHECK(!interval->HasRegister());
+ interval->SetRegister(reg);
+ colored_intervals.push_back(interval);
+ if (interval->HasHighInterval()) {
+ DCHECK(!interval->GetHighInterval()->HasRegister());
+ interval->GetHighInterval()->SetRegister(reg + 1);
+ colored_intervals.push_back(interval->GetHighInterval());
+ }
+ } else if (interval->RequiresRegister()) {
+ // The interference graph is too dense to color. Make it sparser by
+ // splitting this live interval.
+ successful = false;
+ register_allocator_->SplitAtRegisterUses(interval);
+ // We continue coloring, because there may be additional intervals that cannot
+ // be colored, and that we should split.
+ } else {
+ // Spill.
+ node->SetNeedsSpillSlot();
+ }
+ }
+
+ // If unsuccessful, reset all register assignments.
+ if (!successful) {
+ for (LiveInterval* interval : colored_intervals) {
+ interval->ClearRegister();
+ }
+ }
+
+ return successful;
+}
+
+void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) {
+ // The register allocation resolver will organize the stack based on value type,
+ // so we assign stack slots for each value type separately.
+ ArenaVector<LiveInterval*> double_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+ ArenaVector<LiveInterval*> long_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+ ArenaVector<LiveInterval*> float_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+ ArenaVector<LiveInterval*> int_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ // The set of parent intervals already handled.
+ ArenaSet<LiveInterval*> seen(allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ // Find nodes that need spill slots.
+ for (InterferenceNode* node : nodes) {
+ if (!node->NeedsSpillSlot()) {
+ continue;
+ }
+
+ LiveInterval* parent = node->GetInterval()->GetParent();
+ if (seen.find(parent) != seen.end()) {
+ // We've already handled this interval.
+ // This can happen if multiple siblings of the same interval request a stack slot.
+ continue;
+ }
+ seen.insert(parent);
+
+ HInstruction* defined_by = parent->GetDefinedBy();
+ if (parent->HasSpillSlot()) {
+ // We already have a spill slot for this value that we can reuse.
+ } else if (defined_by->IsParameterValue()) {
+ // Parameters already have a stack slot.
+ parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+ } else if (defined_by->IsCurrentMethod()) {
+ // The current method is always at stack slot 0.
+ parent->SetSpillSlot(0);
+ } else if (defined_by->IsConstant()) {
+ // Constants don't need a spill slot.
+ } else {
+ // We need to find a spill slot for this interval. Place it in the correct
+ // worklist to be processed later.
+ switch (node->GetInterval()->GetType()) {
+ case Primitive::kPrimDouble:
+ double_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimLong:
+ long_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimFloat:
+ float_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimShort:
+ int_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
+ UNREACHABLE();
+ }
+ }
+ }
+
+ // Color spill slots for each value type.
+ ColorSpillSlots(&double_intervals, &num_double_spill_slots_);
+ ColorSpillSlots(&long_intervals, &num_long_spill_slots_);
+ ColorSpillSlots(&float_intervals, &num_float_spill_slots_);
+ ColorSpillSlots(&int_intervals, &num_int_spill_slots_);
+}
+
+void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* intervals,
+ size_t* num_stack_slots_used) {
+ // We cannot use the original interference graph here because spill slots are assigned to
+ // all of the siblings of an interval, whereas an interference node represents only a single
+ // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
+ // by position, and assigning the lowest spill slot available when we encounter an interval
+ // beginning. We ignore lifetime holes for simplicity.
+ ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
+ allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) {
+ LiveInterval* parent_interval = *it;
+ DCHECK(parent_interval->IsParent());
+ DCHECK(!parent_interval->HasSpillSlot());
+ size_t start = parent_interval->GetStart();
+ size_t end = parent_interval->GetLastSibling()->GetEnd();
+ DCHECK_LT(start, end);
+ interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
+ interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
+ }
+
+ // Sort by position.
+ // We explicitly ignore the third entry of each tuple (the interval pointer) in order
+ // to maintain determinism.
+ std::sort(interval_endpoints.begin(), interval_endpoints.end(),
+ [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
+ const std::tuple<size_t, bool, LiveInterval*>& rhs) {
+ return std::tie(std::get<0>(lhs), std::get<1>(lhs))
+ < std::tie(std::get<0>(rhs), std::get<1>(rhs));
+ });
+
+ ArenaBitVector taken(allocator_, 0, true);
+ for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
+ // Extract information from the current tuple.
+ LiveInterval* parent_interval;
+ bool is_interval_beginning;
+ size_t position;
+ std::tie(position, is_interval_beginning, parent_interval) = *it;
+
+ bool needs_two_slots = parent_interval->NeedsTwoSpillSlots();
+
+ if (is_interval_beginning) {
+ DCHECK(!parent_interval->HasSpillSlot());
+ DCHECK_EQ(position, parent_interval->GetStart());
+
+ // Find a free stack slot.
+ size_t slot = 0;
+ for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) {
+ // Skip taken slots.
+ }
+ parent_interval->SetSpillSlot(slot);
+
+ *num_stack_slots_used = std::max(*num_stack_slots_used,
+ needs_two_slots ? slot + 1 : slot + 2);
+ if (needs_two_slots && *num_stack_slots_used % 2 != 0) {
+ // The parallel move resolver requires that there be an even number of spill slots
+ // allocated for pair value types.
+ ++(*num_stack_slots_used);
+ }
+
+ taken.SetBit(slot);
+ if (needs_two_slots) {
+ taken.SetBit(slot + 1);
+ }
+ } else {
+ DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
+ DCHECK(parent_interval->HasSpillSlot());
+
+ // Free up the stack slot used by this interval.
+ size_t slot = parent_interval->GetSpillSlot();
+ DCHECK(taken.IsBitSet(slot));
+ DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1));
+ taken.ClearBit(slot);
+ if (needs_two_slots) {
+ taken.ClearBit(slot + 1);
+ }
+ }
+ }
+ DCHECK_EQ(taken.NumSetBits(), 0u);
+}
+
+} // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
new file mode 100644
index 0000000000..548687f784
--- /dev/null
+++ b/compiler/optimizing/register_allocator_graph_color.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
+
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "base/macros.h"
+#include "primitive.h"
+#include "register_allocator.h"
+
+namespace art {
+
+class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
+class Location;
+class SsaLivenessAnalysis;
+class InterferenceNode;
+struct CoalesceOpportunity;
+enum class CoalesceKind;
+
+/**
+ * A graph coloring register allocator.
+ *
+ * The algorithm proceeds as follows:
+ * (1) Build an interference graph, where nodes represent live intervals, and edges represent
+ * interferences between two intervals. Coloring this graph with k colors is isomorphic to
+ * finding a valid register assignment with k registers.
+ * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are
+ * guaranteed a color. (No matter how we color their adjacent nodes, we can give them a
+ * different color.) As we prune nodes from the graph, more nodes may drop below degree k,
+ * enabling further pruning. The key is to maintain the pruning order in a stack, so that we
+ * can color the nodes in the reverse order.
+ * When there are no more nodes with degree less than k, we start pruning alternate nodes based
+ * on heuristics. Since these nodes are not guaranteed a color, we are careful to
+ * prioritize nodes that require a register. We also prioritize short intervals, because
+ * short intervals cannot be split very much if coloring fails (see below). "Prioritizing"
+ * a node amounts to pruning it later, since it will have fewer interferences if we prune other
+ * nodes first.
+ * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign
+ * a node a color, we do one of two things:
+ * - If the node requires a register, we consider the current coloring attempt a failure.
+ * However, we split the node's live interval in order to make the interference graph
+ * sparser, so that future coloring attempts may succeed.
+ * - If the node does not require a register, we simply assign it a location on the stack.
+ *
+ * If iterative move coalescing is enabled, the algorithm also attempts to conservatively
+ * combine nodes in the graph that would prefer to have the same color. (For example, the output
+ * of a phi instruction would prefer to have the same register as at least one of its inputs.)
+ * There are several additional steps involved with this:
+ * - We look for coalesce opportunities by examining each live interval, a step similar to that
+ * used by linear scan when looking for register hints.
+ * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist
+ * of low degree nodes that have associated coalesce opportunities. Only when we run out of
+ * coalesce opportunities do we start pruning coalesce-associated nodes.
+ * - When pruning a node, if any nodes transition from high degree to low degree, we add
+ * associated coalesce opportunities to the worklist, since these opportunities may now succeed.
+ * - Whether two nodes can be combined is decided by two different heuristics--one used when
+ * coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node.
+ * It is vital that we only combine two nodes if the node that remains is guaranteed to receive
+ * a color. This is because additionally spilling is more costly than failing to coalesce.
+ * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around
+ * to be used as last-chance register hints when coloring. If nothing else, we try to use
+ * caller-save registers before callee-save registers.
+ *
+ * A good reference for graph coloring register allocation is
+ * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition).
+ */
+class RegisterAllocatorGraphColor : public RegisterAllocator {
+ public:
+ RegisterAllocatorGraphColor(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis,
+ bool iterative_move_coalescing = true);
+ ~RegisterAllocatorGraphColor() OVERRIDE {}
+
+ void AllocateRegisters() OVERRIDE;
+
+ bool Validate(bool log_fatal_on_failure);
+
+ private:
+ // Collect all intervals and prepare for register allocation.
+ void ProcessInstructions();
+ void ProcessInstruction(HInstruction* instruction);
+
+ // If any inputs require specific registers, block those registers
+ // at the position of this instruction.
+ void CheckForFixedInputs(HInstruction* instruction);
+
+ // If the output of an instruction requires a specific register, split
+ // the interval and assign the register to the first part.
+ void CheckForFixedOutput(HInstruction* instruction);
+
+ // Add all applicable safepoints to a live interval.
+ // Currently depends on instruction processing order.
+ void AddSafepointsFor(HInstruction* instruction);
+
+ // Collect all live intervals associated with the temporary locations
+ // needed by an instruction.
+ void CheckForTempLiveIntervals(HInstruction* instruction);
+
+ // If a safe point is needed, add a synthesized interval to later record
+ // the number of live registers at this point.
+ void CheckForSafepoint(HInstruction* instruction);
+
+ // Split an interval, but only if `position` is inside of `interval`.
+ // Return either the new interval, or the original interval if not split.
+ static LiveInterval* TrySplit(LiveInterval* interval, size_t position);
+
+ // To ensure every graph can be colored, split live intervals
+ // at their register defs and uses. This creates short intervals with low
+ // degree in the interference graph, which are prioritized during graph
+ // coloring.
+ void SplitAtRegisterUses(LiveInterval* interval);
+
+ // If the given instruction is a catch phi, give it a spill slot.
+ void AllocateSpillSlotForCatchPhi(HInstruction* instruction);
+
+ // Ensure that the given register cannot be allocated for a given range.
+ void BlockRegister(Location location, size_t start, size_t end);
+ void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
+
+ bool IsCallerSave(size_t reg, bool processing_core_regs);
+
+ // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not
+ // assigned the same stack slot.
+ void ColorSpillSlots(ArenaVector<LiveInterval*>* nodes,
+ size_t* num_stack_slots_used);
+
+ // Provide stack slots to nodes that need them.
+ void AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes);
+
+ // Whether iterative move coalescing should be performed. Iterative move coalescing
+ // improves code quality, but increases compile time.
+ const bool iterative_move_coalescing_;
+
+ // Live intervals, split by kind (core and floating point).
+ // These should not contain high intervals, as those are represented by
+ // the corresponding low interval throughout register allocation.
+ ArenaVector<LiveInterval*> core_intervals_;
+ ArenaVector<LiveInterval*> fp_intervals_;
+
+ // Intervals for temporaries, saved for special handling in the resolution phase.
+ ArenaVector<LiveInterval*> temp_intervals_;
+
+ // Safepoints, saved for special handling while processing instructions.
+ ArenaVector<HInstruction*> safepoints_;
+
+ // Interference nodes representing specific registers. These are "pre-colored" nodes
+ // in the interference graph.
+ ArenaVector<InterferenceNode*> physical_core_nodes_;
+ ArenaVector<InterferenceNode*> physical_fp_nodes_;
+
+ // Allocated stack slot counters.
+ size_t num_int_spill_slots_;
+ size_t num_double_spill_slots_;
+ size_t num_float_spill_slots_;
+ size_t num_long_spill_slots_;
+ size_t catch_phi_spill_slot_counter_;
+
+ // Number of stack slots needed for the pointer to the current method.
+ // This is 1 for 32-bit architectures, and 2 for 64-bit architectures.
+ const size_t reserved_art_method_slots_;
+
+ // Number of stack slots needed for outgoing arguments.
+ const size_t reserved_out_slots_;
+
+ friend class ColoringIteration;
+
+ DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 768ed2d26a..1a391ce9bb 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -22,6 +22,7 @@
#include "base/bit_vector-inl.h"
#include "base/enums.h"
#include "code_generator.h"
+#include "linear_order.h"
#include "register_allocation_resolver.h"
#include "ssa_liveness_analysis.h"
@@ -63,9 +64,7 @@ RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocat
registers_array_(nullptr),
blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
- reserved_out_slots_(0),
- maximum_number_of_live_core_registers_(0),
- maximum_number_of_live_fp_registers_(0) {
+ reserved_out_slots_(0) {
temp_intervals_.reserve(4);
int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
@@ -92,8 +91,7 @@ static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval
void RegisterAllocatorLinearScan::AllocateRegisters() {
AllocateRegistersInternal();
RegisterAllocationResolver(allocator_, codegen_, liveness_)
- .Resolve(maximum_number_of_live_core_registers_,
- maximum_number_of_live_fp_registers_,
+ .Resolve(ArrayRef<HInstruction* const>(safepoints_),
reserved_out_slots_,
int_spill_slots_.size(),
long_spill_slots_.size(),
@@ -111,8 +109,7 @@ void RegisterAllocatorLinearScan::AllocateRegisters() {
// Since only parallel moves have been inserted during the register allocation,
// these checks are mostly for making sure these moves have been added correctly.
size_t current_liveness = 0;
- for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : codegen_->GetGraph()->GetLinearOrder()) {
for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
HInstruction* instruction = inst_it.Current();
DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
@@ -166,8 +163,7 @@ void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool
void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
// Iterate post-order, to ensure the list is sorted, and the last added interval
// is the one with the lowest start position.
- for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
back_it.Advance()) {
ProcessInstruction(back_it.Current());
@@ -283,20 +279,6 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction)
return;
}
safepoints_.push_back(instruction);
- if (locations->OnlyCallsOnSlowPath()) {
- // We add a synthesized range at this position to record the live registers
- // at this position. Ideally, we could just update the safepoints when locations
- // are updated, but we currently need to know the full stack size before updating
- // locations (because of parameters and the fact that we don't have a frame pointer).
- // And knowing the full stack size requires to know the maximum number of live
- // registers at calls in slow paths.
- // By adding the following interval in the algorithm, we can compute this
- // maximum before updating locations.
- LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
- interval->AddRange(position, position + 1);
- AddSorted(&unhandled_core_intervals_, interval);
- AddSorted(&unhandled_fp_intervals_, interval);
- }
}
if (locations->WillCall()) {
@@ -569,20 +551,6 @@ void RegisterAllocatorLinearScan::LinearScan() {
});
inactive_.erase(inactive_kept_end, inactive_to_handle_end);
- if (current->IsSlowPathSafepoint()) {
- // Synthesized interval to record the maximum number of live registers
- // at safepoints. No need to allocate a register for it.
- if (processing_core_registers_) {
- maximum_number_of_live_core_registers_ =
- std::max(maximum_number_of_live_core_registers_, active_.size());
- } else {
- maximum_number_of_live_fp_registers_ =
- std::max(maximum_number_of_live_fp_registers_, active_.size());
- }
- DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
- continue;
- }
-
if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
DCHECK(!current->HasRegister());
// Allocating the low part was unsucessful. The splitted interval for the high part
@@ -685,7 +653,7 @@ bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) {
// the next intersection with `current`.
for (LiveInterval* inactive : inactive_) {
// Temp/Slow-path-safepoint interval has no holes.
- DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+ DCHECK(!inactive->IsTemp());
if (!current->IsSplit() && !inactive->IsFixed()) {
// Neither current nor inactive are fixed.
// Thanks to SSA, a non-split interval starting in a hole of an
@@ -933,7 +901,7 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) {
// start of current.
for (LiveInterval* inactive : inactive_) {
// Temp/Slow-path-safepoint interval has no holes.
- DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+ DCHECK(!inactive->IsTemp());
if (!current->IsSplit() && !inactive->IsFixed()) {
// Neither current nor inactive are fixed.
// Thanks to SSA, a non-split interval starting in a hole of an
@@ -1085,12 +1053,6 @@ void RegisterAllocatorLinearScan::AddSorted(ArenaVector<LiveInterval*>* array, L
if (current->StartsAfter(interval) && !current->IsHighInterval()) {
insert_at = i;
break;
- } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
- // Ensure the slow path interval is the last to be processed at its location: we want the
- // interval to know all live registers at this location.
- DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
- insert_at = i;
- break;
}
}
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
index b6e4f92e42..b3834f45e4 100644
--- a/compiler/optimizing/register_allocator_linear_scan.h
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -43,6 +43,7 @@ class RegisterAllocatorLinearScan : public RegisterAllocator {
RegisterAllocatorLinearScan(ArenaAllocator* allocator,
CodeGenerator* codegen,
const SsaLivenessAnalysis& analysis);
+ ~RegisterAllocatorLinearScan() OVERRIDE {}
void AllocateRegisters() OVERRIDE;
@@ -170,12 +171,6 @@ class RegisterAllocatorLinearScan : public RegisterAllocator {
// Slots reserved for out arguments.
size_t reserved_out_slots_;
- // The maximum live core registers at safepoints.
- size_t maximum_number_of_live_core_registers_;
-
- // The maximum live FP registers at safepoints.
- size_t maximum_number_of_live_fp_registers_;
-
ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index cbb7b2f1c5..55ea99e592 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -31,12 +31,29 @@
namespace art {
+using Strategy = RegisterAllocator::Strategy;
+
// Note: the register allocator tests rely on the fact that constants have live
// intervals and registers get allocated to them.
-class RegisterAllocatorTest : public CommonCompilerTest {};
+class RegisterAllocatorTest : public CommonCompilerTest {
+ protected:
+ // These functions need to access private variables of LocationSummary, so we declare it
+ // as a member of RegisterAllocatorTest, which we make a friend class.
+ static void SameAsFirstInputHint(Strategy strategy);
+ static void ExpectedInRegisterHint(Strategy strategy);
+};
+
+// This macro should include all register allocation strategies that should be tested.
+#define TEST_ALL_STRATEGIES(test_name)\
+TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\
+ test_name(Strategy::kRegisterAllocatorLinearScan);\
+}\
+TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\
+ test_name(Strategy::kRegisterAllocatorGraphColor);\
+}
-static bool Check(const uint16_t* data) {
+static bool Check(const uint16_t* data, Strategy strategy) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = CreateCFG(&allocator, data);
@@ -45,7 +62,8 @@ static bool Check(const uint16_t* data) {
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
return register_allocator->Validate(false);
}
@@ -143,7 +161,7 @@ TEST_F(RegisterAllocatorTest, ValidateIntervals) {
}
}
-TEST_F(RegisterAllocatorTest, CFG1) {
+static void CFG1(Strategy strategy) {
/*
* Test the following snippet:
* return 0;
@@ -160,10 +178,12 @@ TEST_F(RegisterAllocatorTest, CFG1) {
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN);
- ASSERT_TRUE(Check(data));
+ ASSERT_TRUE(Check(data, strategy));
}
-TEST_F(RegisterAllocatorTest, Loop1) {
+TEST_ALL_STRATEGIES(CFG1);
+
+static void Loop1(Strategy strategy) {
/*
* Test the following snippet:
* int a = 0;
@@ -199,10 +219,12 @@ TEST_F(RegisterAllocatorTest, Loop1) {
Instruction::CONST_4 | 5 << 12 | 1 << 8,
Instruction::RETURN | 1 << 8);
- ASSERT_TRUE(Check(data));
+ ASSERT_TRUE(Check(data, strategy));
}
-TEST_F(RegisterAllocatorTest, Loop2) {
+TEST_ALL_STRATEGIES(Loop1);
+
+static void Loop2(Strategy strategy) {
/*
* Test the following snippet:
* int a = 0;
@@ -248,10 +270,12 @@ TEST_F(RegisterAllocatorTest, Loop2) {
Instruction::ADD_INT, 1 << 8 | 0,
Instruction::RETURN | 1 << 8);
- ASSERT_TRUE(Check(data));
+ ASSERT_TRUE(Check(data, strategy));
}
-TEST_F(RegisterAllocatorTest, Loop3) {
+TEST_ALL_STRATEGIES(Loop2);
+
+static void Loop3(Strategy strategy) {
/*
* Test the following snippet:
* int a = 0
@@ -296,7 +320,8 @@ TEST_F(RegisterAllocatorTest, Loop3) {
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
ASSERT_TRUE(register_allocator->Validate(false));
@@ -314,6 +339,8 @@ TEST_F(RegisterAllocatorTest, Loop3) {
ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
}
+TEST_ALL_STRATEGIES(Loop3);
+
TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
@@ -354,7 +381,7 @@ TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition());
}
-TEST_F(RegisterAllocatorTest, DeadPhi) {
+static void DeadPhi(Strategy strategy) {
/* Test for a dead loop phi taking as back-edge input a phi that also has
* this loop phi as input. Walking backwards in SsaDeadPhiElimination
* does not solve the problem because the loop phi will be visited last.
@@ -385,15 +412,19 @@ TEST_F(RegisterAllocatorTest, DeadPhi) {
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
ASSERT_TRUE(register_allocator->Validate(false));
}
+TEST_ALL_STRATEGIES(DeadPhi);
+
/**
* Test that the TryAllocateFreeReg method works in the presence of inactive intervals
* that share the same register. It should split the interval it is currently
* allocating for at the minimum lifetime position between the two inactive intervals.
+ * This test only applies to the linear scan allocator.
*/
TEST_F(RegisterAllocatorTest, FreeUntil) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
@@ -507,15 +538,15 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
graph->GetDexFile(),
dex_cache,
0);
-*input2 = new (allocator) HInstanceFieldGet(parameter,
- Primitive::kPrimInt,
- MemberOffset(42),
- false,
- kUnknownFieldIndex,
- kUnknownClassDefIndex,
- graph->GetDexFile(),
- dex_cache,
- 0);
+ *input2 = new (allocator) HInstanceFieldGet(parameter,
+ Primitive::kPrimInt,
+ MemberOffset(42),
+ false,
+ kUnknownFieldIndex,
+ kUnknownClassDefIndex,
+ graph->GetDexFile(),
+ dex_cache,
+ 0);
then->AddInstruction(*input1);
else_->AddInstruction(*input2);
join->AddInstruction(new (allocator) HExit());
@@ -527,7 +558,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
return graph;
}
-TEST_F(RegisterAllocatorTest, PhiHint) {
+static void PhiHint(Strategy strategy) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HPhi *phi;
@@ -543,7 +574,7 @@ TEST_F(RegisterAllocatorTest, PhiHint) {
// Check that the register allocator is deterministic.
RegisterAllocator* register_allocator =
- RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0);
@@ -563,7 +594,7 @@ TEST_F(RegisterAllocatorTest, PhiHint) {
// the same register.
phi->GetLocations()->UpdateOut(Location::RegisterLocation(2));
RegisterAllocator* register_allocator =
- RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
@@ -583,7 +614,7 @@ TEST_F(RegisterAllocatorTest, PhiHint) {
// the same register.
input1->GetLocations()->UpdateOut(Location::RegisterLocation(2));
RegisterAllocator* register_allocator =
- RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
@@ -603,7 +634,7 @@ TEST_F(RegisterAllocatorTest, PhiHint) {
// the same register.
input2->GetLocations()->UpdateOut(Location::RegisterLocation(2));
RegisterAllocator* register_allocator =
- RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
@@ -612,6 +643,12 @@ TEST_F(RegisterAllocatorTest, PhiHint) {
}
}
+// TODO: Enable this test for graph coloring register allocation when iterative move
+// coalescing is merged.
+TEST_F(RegisterAllocatorTest, PhiHint_LinearScan) {
+ PhiHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
HInstruction** field,
HInstruction** ret) {
@@ -650,7 +687,7 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
return graph;
}
-TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
+void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HInstruction *field, *ret;
@@ -664,7 +701,7 @@ TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
liveness.Analyze();
RegisterAllocator* register_allocator =
- RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
// Sanity check that in normal conditions, the register should be hinted to 0 (EAX).
@@ -684,13 +721,19 @@ TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2);
RegisterAllocator* register_allocator =
- RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2);
}
}
+// TODO: Enable this test for graph coloring register allocation when iterative move
+// coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint_LinearScan) {
+ ExpectedInRegisterHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
HInstruction** first_sub,
HInstruction** second_sub) {
@@ -720,7 +763,7 @@ static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
return graph;
}
-TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
+void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HInstruction *first_sub, *second_sub;
@@ -734,7 +777,7 @@ TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
liveness.Analyze();
RegisterAllocator* register_allocator =
- RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
// Sanity check that in normal conditions, the registers are the same.
@@ -757,7 +800,7 @@ TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
RegisterAllocator* register_allocator =
- RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2);
@@ -765,6 +808,12 @@ TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
}
}
+// TODO: Enable this test for graph coloring register allocation when iterative move
+// coalescing is merged.
+TEST_F(RegisterAllocatorTest, SameAsFirstInputHint_LinearScan) {
+ SameAsFirstInputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
static HGraph* BuildDiv(ArenaAllocator* allocator,
HInstruction** div) {
HGraph* graph = CreateGraph(allocator);
@@ -791,7 +840,7 @@ static HGraph* BuildDiv(ArenaAllocator* allocator,
return graph;
}
-TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
+static void ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HInstruction *div;
@@ -805,7 +854,7 @@ TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
liveness.Analyze();
RegisterAllocator* register_allocator =
- RegisterAllocator::Create(&allocator, &codegen, liveness);
+ RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
register_allocator->AllocateRegisters();
// div on x86 requires its first input in eax and the output be the same as the first input.
@@ -813,9 +862,16 @@ TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
}
}
+// TODO: Enable this test for graph coloring register allocation when iterative move
+// coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint_LinearScan) {
+ ExpectedExactInRegisterAndSameOutputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
// Test a bug in the register allocator, where allocating a blocked
// register would lead to spilling an inactive interval at the wrong
// position.
+// This test only applies to the linear scan allocator.
TEST_F(RegisterAllocatorTest, SpillInactive) {
ArenaPool pool;
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index e409035d9d..46d0d0eb65 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -76,8 +76,7 @@ void HSelectGenerator::Run() {
// Iterate in post order in the unlikely case that removing one occurrence of
// the selection pattern empties a branch block of another occurrence.
// Otherwise the order does not matter.
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
if (!block->EndsWithIf()) continue;
// Find elements of the diamond pattern.
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index b73f73893c..fd1db592bb 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -20,6 +20,7 @@
#include "base/enums.h"
#include "class_linker.h"
#include "code_generator.h"
+#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
#include "utils/dex_cache_arrays_layout-inl.h"
#include "driver/compiler_driver.h"
@@ -30,7 +31,7 @@
#include "mirror/string.h"
#include "nodes.h"
#include "runtime.h"
-#include "scoped_thread_state_change.h"
+#include "scoped_thread_state_change-inl.h"
namespace art {
@@ -60,44 +61,28 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
return;
}
- // TODO: Avoid CompilerDriver.
- InvokeType original_invoke_type = invoke->GetOriginalInvokeType();
- InvokeType optimized_invoke_type = original_invoke_type;
- MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex());
- int vtable_idx;
- uintptr_t direct_code, direct_method;
- bool success = compiler_driver_->ComputeInvokeInfo(
- &compilation_unit_,
- invoke->GetDexPc(),
- false /* update_stats: already updated in builder */,
- true /* enable_devirtualization */,
- &optimized_invoke_type,
- &target_method,
- &vtable_idx,
- &direct_code,
- &direct_method);
- if (!success) {
- // TODO: try using kDexCachePcRelative. It's always a valid method load
- // kind as long as it's supported by the codegen
- return;
- }
- invoke->SetOptimizedInvokeType(optimized_invoke_type);
- invoke->SetTargetMethod(target_method);
+ HGraph* outer_graph = codegen_->GetGraph();
+ ArtMethod* compiling_method = graph_->GetArtMethod();
HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
uint64_t method_load_data = 0u;
uint64_t direct_code_ptr = 0u;
- HGraph* outer_graph = codegen_->GetGraph();
- if (target_method.dex_file == &outer_graph->GetDexFile() &&
- target_method.dex_method_index == outer_graph->GetMethodIdx()) {
+ if (invoke->GetResolvedMethod() == outer_graph->GetArtMethod()) {
+ DCHECK(outer_graph->GetArtMethod() != nullptr);
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
} else {
- bool use_pc_relative_instructions =
- ((direct_method == 0u || direct_code == static_cast<uintptr_t>(-1))) &&
- ContainsElement(compiler_driver_->GetDexFilesForOatFile(), target_method.dex_file);
+ uintptr_t direct_code, direct_method;
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ compiler_driver_->GetCodeAndMethodForDirectCall(
+ (compiling_method == nullptr) ? nullptr : compiling_method->GetDeclaringClass(),
+ invoke->GetResolvedMethod(),
+ &direct_code,
+ &direct_method);
+ }
if (direct_method != 0u) { // Should we use a direct pointer to the method?
// Note: For JIT, kDirectAddressWithFixup doesn't make sense at all and while
// kDirectAddress would be fine for image methods, we don't support it at the moment.
@@ -109,13 +94,12 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup;
}
} else { // Use dex cache.
- DCHECK_EQ(target_method.dex_file, &graph_->GetDexFile());
- if (use_pc_relative_instructions) { // Can we use PC-relative access to the dex cache arrays?
- DCHECK(!Runtime::Current()->UseJitCompilation());
+ if (!Runtime::Current()->UseJitCompilation()) {
+ // Use PC-relative access to the dex cache arrays.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()),
&graph_->GetDexFile());
- method_load_data = layout.MethodOffset(target_method.dex_method_index);
+ method_load_data = layout.MethodOffset(invoke->GetDexMethodIndex());
} else { // We must go through the ArtMethod's pointer to resolved methods.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
}
@@ -124,10 +108,11 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
// Note: For JIT, kCallPCRelative and kCallDirectWithFixup don't make sense at all and
// while kCallDirect would be fine for image methods, we don't support it at the moment.
DCHECK(!Runtime::Current()->UseJitCompilation());
+ const DexFile* dex_file_of_callee = invoke->GetTargetMethod().dex_file;
if (direct_code != static_cast<uintptr_t>(-1)) { // Is the code pointer known now?
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirect;
direct_code_ptr = direct_code;
- } else if (use_pc_relative_instructions) {
+ } else if (ContainsElement(compiler_driver_->GetDexFilesForOatFile(), dex_file_of_callee)) {
// Use PC-relative calls for invokes within a multi-dex oat file.
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative;
} else { // The direct pointer will be known at link time.
@@ -150,31 +135,22 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
method_load_kind, code_ptr_location, method_load_data, direct_code_ptr
};
HInvokeStaticOrDirect::DispatchInfo dispatch_info =
- codegen_->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info,
- invoke->GetTargetMethod());
+ codegen_->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke);
invoke->SetDispatchInfo(dispatch_info);
}
void HSharpening::ProcessLoadClass(HLoadClass* load_class) {
- if (load_class->NeedsAccessCheck()) {
- // We need to call the runtime anyway, so we simply get the class as that call's return value.
- return;
- }
- if (load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) {
- // Loading from the ArtMethod* is the most efficient retrieval.
- // TODO: This may not actually be true for all architectures and
- // locations of target classes. The additional register pressure
- // for using the ArtMethod* should be considered.
- return;
- }
-
- DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+ DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod ||
+ load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
+ << load_class->GetLoadKind();
DCHECK(!load_class->IsInDexCache()) << "HLoadClass should not be optimized before sharpening.";
+ DCHECK(!load_class->IsInBootImage()) << "HLoadClass should not be optimized before sharpening.";
const DexFile& dex_file = load_class->GetDexFile();
uint32_t type_index = load_class->GetTypeIndex();
bool is_in_dex_cache = false;
+ bool is_in_boot_image = false;
HLoadClass::LoadKind desired_load_kind;
uint64_t address = 0u; // Class or dex cache element address.
{
@@ -186,50 +162,46 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) {
? compilation_unit_.GetDexCache()
: hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
mirror::Class* klass = dex_cache->GetResolvedType(type_index);
-
- if (compiler_driver_->IsBootImage()) {
+ if (codegen_->GetCompilerOptions().IsBootImage()) {
// Compiling boot image. Check if the class is a boot image class.
DCHECK(!runtime->UseJitCompilation());
if (!compiler_driver_->GetSupportBootImageFixup()) {
- // MIPS/MIPS64 or compiler_driver_test. Do not sharpen.
+ // MIPS64 or compiler_driver_test. Do not sharpen.
desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+ } else if ((klass != nullptr) && compiler_driver_->IsImageClass(
+ dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
+ is_in_boot_image = true;
+ is_in_dex_cache = true;
+ desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
+ ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
+ : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
} else {
- if (klass != nullptr &&
- compiler_driver_->IsImageClass(
- dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
- is_in_dex_cache = true;
- desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
- ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
- : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
- } else {
- // Not a boot image class. We must go through the dex cache.
- DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
- desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative;
- }
- }
- } else if (runtime->UseJitCompilation()) {
- // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
- // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
- is_in_dex_cache = (klass != nullptr);
- if (klass != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(klass)) {
- // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
- desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
- address = reinterpret_cast64<uint64_t>(klass);
- } else {
- // Note: If the class is not in the dex cache or isn't initialized, the
- // instruction needs environment and will not be inlined across dex files.
- // Within a dex file, the slow-path helper loads the correct class and
- // inlined frames are used correctly for OOM stack trace.
- // TODO: Write a test for this. Bug: 29416588
- desired_load_kind = HLoadClass::LoadKind::kDexCacheAddress;
- void* dex_cache_element_address = &dex_cache->GetResolvedTypes()[type_index];
- address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
+ // Not a boot image class. We must go through the dex cache.
+ DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
+ desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative;
}
} else {
- // AOT app compilation. Check if the class is in the boot image.
- if ((klass != nullptr) &&
- runtime->GetHeap()->ObjectIsInBootImageSpace(klass) &&
- !codegen_->GetCompilerOptions().GetCompilePic()) {
+ is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass);
+ if (runtime->UseJitCompilation()) {
+ // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
+ // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+ is_in_dex_cache = (klass != nullptr);
+ if (is_in_boot_image) {
+ // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
+ desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
+ address = reinterpret_cast64<uint64_t>(klass);
+ } else {
+ // Note: If the class is not in the dex cache or isn't initialized, the
+ // instruction needs environment and will not be inlined across dex files.
+ // Within a dex file, the slow-path helper loads the correct class and
+ // inlined frames are used correctly for OOM stack trace.
+ // TODO: Write a test for this. Bug: 29416588
+ desired_load_kind = HLoadClass::LoadKind::kDexCacheAddress;
+ void* dex_cache_element_address = &dex_cache->GetResolvedTypes()[type_index];
+ address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
+ }
+ // AOT app compilation. Check if the class is in the boot image.
+ } else if (is_in_boot_image && !codegen_->GetCompilerOptions().GetCompilePic()) {
desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
address = reinterpret_cast64<uint64_t>(klass);
} else {
@@ -243,6 +215,24 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) {
}
}
}
+
+ if (is_in_boot_image) {
+ load_class->MarkInBootImage();
+ }
+
+ if (load_class->NeedsAccessCheck()) {
+ // We need to call the runtime anyway, so we simply get the class as that call's return value.
+ return;
+ }
+
+ if (load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) {
+ // Loading from the ArtMethod* is the most efficient retrieval in code size.
+ // TODO: This may not actually be true for all architectures and
+ // locations of target classes. The additional register pressure
+ // for using the ArtMethod* should be considered.
+ return;
+ }
+
if (is_in_dex_cache) {
load_class->MarkInDexCache();
}
@@ -279,8 +269,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
const DexFile& dex_file = load_string->GetDexFile();
uint32_t string_index = load_string->GetStringIndex();
- bool is_in_dex_cache = false;
- HLoadString::LoadKind desired_load_kind;
+ HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
uint64_t address = 0u; // String or dex cache element address.
{
Runtime* runtime = Runtime::Current();
@@ -291,38 +280,27 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
? compilation_unit_.GetDexCache()
: hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
- if (compiler_driver_->IsBootImage()) {
+ if (codegen_->GetCompilerOptions().IsBootImage()) {
// Compiling boot image. Resolve the string and allocate it if needed.
DCHECK(!runtime->UseJitCompilation());
mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
CHECK(string != nullptr);
- if (!compiler_driver_->GetSupportBootImageFixup()) {
- // MIPS/MIPS64 or compiler_driver_test. Do not sharpen.
- desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
- } else {
+ if (compiler_driver_->GetSupportBootImageFixup()) {
DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
- is_in_dex_cache = true;
desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
? HLoadString::LoadKind::kBootImageLinkTimePcRelative
: HLoadString::LoadKind::kBootImageLinkTimeAddress;
+ } else {
+ // MIPS64 or compiler_driver_test. Do not sharpen.
+ DCHECK_EQ(desired_load_kind, HLoadString::LoadKind::kDexCacheViaMethod);
}
} else if (runtime->UseJitCompilation()) {
// TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
// DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
mirror::String* string = dex_cache->GetResolvedString(string_index);
- is_in_dex_cache = (string != nullptr);
if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
- // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
address = reinterpret_cast64<uint64_t>(string);
- } else {
- // Note: If the string is not in the dex cache, the instruction needs environment
- // and will not be inlined across dex files. Within a dex file, the slow-path helper
- // loads the correct string and inlined frames are used correctly for OOM stack trace.
- // TODO: Write a test for this. Bug: 29416588
- desired_load_kind = HLoadString::LoadKind::kDexCacheAddress;
- void* dex_cache_element_address = &dex_cache->GetStrings()[string_index];
- address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
}
} else {
// AOT app compilation. Try to lookup the string without allocating if not found.
@@ -333,23 +311,16 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
address = reinterpret_cast64<uint64_t>(string);
} else {
- // Not JIT and either the string is not in boot image or we are compiling in PIC mode.
- // Use PC-relative load from the dex cache if the dex file belongs
- // to the oat file that we're currently compiling.
- desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)
- ? HLoadString::LoadKind::kDexCachePcRelative
- : HLoadString::LoadKind::kDexCacheViaMethod;
+ desired_load_kind = HLoadString::LoadKind::kBssEntry;
}
}
}
- if (is_in_dex_cache) {
- load_string->MarkInDexCache();
- }
HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind);
switch (load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBssEntry:
case HLoadString::LoadKind::kDexCacheViaMethod:
load_string->SetLoadKindWithStringReference(load_kind, dex_file, string_index);
break;
@@ -358,13 +329,6 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
DCHECK_NE(address, 0u);
load_string->SetLoadKindWithAddress(load_kind, address);
break;
- case HLoadString::LoadKind::kDexCachePcRelative: {
- PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
- DexCacheArraysLayout layout(pointer_size, &dex_file);
- size_t element_index = layout.StringOffset(string_index);
- load_string->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index);
- break;
- }
}
}
diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc
index 1dc69867b4..6d82e8e06d 100644
--- a/compiler/optimizing/side_effects_analysis.cc
+++ b/compiler/optimizing/side_effects_analysis.cc
@@ -26,8 +26,7 @@ void SideEffectsAnalysis::Run() {
// In DEBUG mode, ensure side effects are properly initialized to empty.
if (kIsDebugBuild) {
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
SideEffects effects = GetBlockEffects(block);
DCHECK(effects.DoesNothing());
if (block->IsLoopHeader()) {
@@ -38,9 +37,7 @@ void SideEffectsAnalysis::Run() {
}
// Do a post order visit to ensure we visit a loop header after its loop body.
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
-
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
SideEffects effects = SideEffects::None();
// Update `effects` with the side effects of all instructions in this block.
for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 5a574d9af7..ae1e369999 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -25,8 +25,8 @@ namespace art {
void SsaBuilder::FixNullConstantType() {
// The order doesn't matter here.
- for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
- for (HInstructionIterator it(itb.Current()->GetInstructions()); !it.Done(); it.Advance()) {
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* equality_instr = it.Current();
if (!equality_instr->IsEqual() && !equality_instr->IsNotEqual()) {
continue;
@@ -57,8 +57,8 @@ void SsaBuilder::FixNullConstantType() {
void SsaBuilder::EquivalentPhisCleanup() {
// The order doesn't matter here.
- for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
- for (HInstructionIterator it(itb.Current()->GetPhis()); !it.Done(); it.Advance()) {
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
HPhi* next = phi->GetNextEquivalentPhiWithSameType();
if (next != nullptr) {
@@ -79,8 +79,7 @@ void SsaBuilder::EquivalentPhisCleanup() {
}
void SsaBuilder::FixEnvironmentPhis() {
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) {
HPhi* phi = it_phis.Current()->AsPhi();
// If the phi is not dead, or has no environment uses, there is nothing to do.
@@ -163,18 +162,12 @@ static bool TypePhiFromInputs(HPhi* phi) {
// Replace inputs of `phi` to match its type. Return false if conflict is identified.
bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) {
Primitive::Type common_type = phi->GetType();
- if (common_type == Primitive::kPrimVoid || Primitive::IsIntegralType(common_type)) {
- // Phi either contains only other untyped phis (common_type == kPrimVoid),
- // or `common_type` is integral and we do not need to retype ambiguous inputs
- // because they are always constructed with the integral type candidate.
+ if (Primitive::IsIntegralType(common_type)) {
+ // We do not need to retype ambiguous inputs because they are always constructed
+ // with the integral type candidate.
if (kIsDebugBuild) {
for (HInstruction* input : phi->GetInputs()) {
- if (common_type == Primitive::kPrimVoid) {
- DCHECK(input->IsPhi() && input->GetType() == Primitive::kPrimVoid);
- } else {
- DCHECK((input->IsPhi() && input->GetType() == Primitive::kPrimVoid) ||
- HPhi::ToPhiType(input->GetType()) == common_type);
- }
+ DCHECK(HPhi::ToPhiType(input->GetType()) == common_type);
}
}
// Inputs did not need to be replaced, hence no conflict. Report success.
@@ -234,8 +227,7 @@ bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) {
void SsaBuilder::RunPrimitiveTypePropagation() {
ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder));
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
if (block->IsLoopHeader()) {
for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
HPhi* phi = phi_it.Current()->AsPhi();
@@ -303,7 +295,7 @@ static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
}
static Primitive::Type GetPrimitiveArrayComponentType(HInstruction* array)
- SHARED_REQUIRES(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) {
ReferenceTypeInfo array_type = array->GetReferenceTypeInfo();
DCHECK(array_type.IsPrimitiveArrayClass());
return array_type.GetTypeHandle()->GetComponentType()->GetPrimitiveType();
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index d7360adef8..45dac54115 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -49,7 +49,7 @@ class SsaBuilder : public ValueObject {
public:
SsaBuilder(HGraph* graph,
Handle<mirror::DexCache> dex_cache,
- StackHandleScopeCollection* handles)
+ VariableSizedHandleScope* handles)
: graph_(graph),
dex_cache_(dex_cache),
handles_(handles),
@@ -116,7 +116,7 @@ class SsaBuilder : public ValueObject {
HGraph* graph_;
Handle<mirror::DexCache> dex_cache_;
- StackHandleScopeCollection* const handles_;
+ VariableSizedHandleScope* const handles_;
// True if types of ambiguous ArrayGets have been resolved.
bool agets_fixed_;
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 7af4302884..e8e12e1a55 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -18,90 +18,21 @@
#include "base/bit_vector-inl.h"
#include "code_generator.h"
+#include "linear_order.h"
#include "nodes.h"
namespace art {
void SsaLivenessAnalysis::Analyze() {
- LinearizeGraph();
+ // Compute the linear order directly in the graph's data structure
+ // (there are no more following graph mutations).
+ LinearizeGraph(graph_, graph_->GetArena(), &graph_->linear_order_);
+
+ // Liveness analysis.
NumberInstructions();
ComputeLiveness();
}
-static bool IsLoop(HLoopInformation* info) {
- return info != nullptr;
-}
-
-static bool InSameLoop(HLoopInformation* first_loop, HLoopInformation* second_loop) {
- return first_loop == second_loop;
-}
-
-static bool IsInnerLoop(HLoopInformation* outer, HLoopInformation* inner) {
- return (inner != outer)
- && (inner != nullptr)
- && (outer != nullptr)
- && inner->IsIn(*outer);
-}
-
-static void AddToListForLinearization(ArenaVector<HBasicBlock*>* worklist, HBasicBlock* block) {
- HLoopInformation* block_loop = block->GetLoopInformation();
- auto insert_pos = worklist->rbegin(); // insert_pos.base() will be the actual position.
- for (auto end = worklist->rend(); insert_pos != end; ++insert_pos) {
- HBasicBlock* current = *insert_pos;
- HLoopInformation* current_loop = current->GetLoopInformation();
- if (InSameLoop(block_loop, current_loop)
- || !IsLoop(current_loop)
- || IsInnerLoop(current_loop, block_loop)) {
- // The block can be processed immediately.
- break;
- }
- }
- worklist->insert(insert_pos.base(), block);
-}
-
-void SsaLivenessAnalysis::LinearizeGraph() {
- // Create a reverse post ordering with the following properties:
- // - Blocks in a loop are consecutive,
- // - Back-edge is the last block before loop exits.
-
- // (1): Record the number of forward predecessors for each block. This is to
- // ensure the resulting order is reverse post order. We could use the
- // current reverse post order in the graph, but it would require making
- // order queries to a GrowableArray, which is not the best data structure
- // for it.
- ArenaVector<uint32_t> forward_predecessors(graph_->GetBlocks().size(),
- graph_->GetArena()->Adapter(kArenaAllocSsaLiveness));
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
- size_t number_of_forward_predecessors = block->GetPredecessors().size();
- if (block->IsLoopHeader()) {
- number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges();
- }
- forward_predecessors[block->GetBlockId()] = number_of_forward_predecessors;
- }
-
- // (2): Following a worklist approach, first start with the entry block, and
- // iterate over the successors. When all non-back edge predecessors of a
- // successor block are visited, the successor block is added in the worklist
- // following an order that satisfies the requirements to build our linear graph.
- graph_->linear_order_.reserve(graph_->GetReversePostOrder().size());
- ArenaVector<HBasicBlock*> worklist(graph_->GetArena()->Adapter(kArenaAllocSsaLiveness));
- worklist.push_back(graph_->GetEntryBlock());
- do {
- HBasicBlock* current = worklist.back();
- worklist.pop_back();
- graph_->linear_order_.push_back(current);
- for (HBasicBlock* successor : current->GetSuccessors()) {
- int block_id = successor->GetBlockId();
- size_t number_of_remaining_predecessors = forward_predecessors[block_id];
- if (number_of_remaining_predecessors == 1) {
- AddToListForLinearization(&worklist, successor);
- }
- forward_predecessors[block_id] = number_of_remaining_predecessors - 1;
- }
- } while (!worklist.empty());
-}
-
void SsaLivenessAnalysis::NumberInstructions() {
int ssa_index = 0;
size_t lifetime_position = 0;
@@ -114,8 +45,7 @@ void SsaLivenessAnalysis::NumberInstructions() {
// to differentiate between the start and end of an instruction. Adding 2 to
// the lifetime position for each instruction ensures the start of an
// instruction is different than the end of the previous instruction.
- for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetLinearOrder()) {
block->SetLifetimeStart(lifetime_position);
for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
@@ -157,8 +87,7 @@ void SsaLivenessAnalysis::NumberInstructions() {
}
void SsaLivenessAnalysis::ComputeLiveness() {
- for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetLinearOrder()) {
block_infos_[block->GetBlockId()] =
new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_);
}
@@ -210,9 +139,7 @@ static void RecursivelyProcessInputs(HInstruction* current,
void SsaLivenessAnalysis::ComputeLiveRanges() {
// Do a post order visit, adding inputs of instructions live in the block where
// that instruction is defined, and killing instructions that are being visited.
- for (HLinearPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
-
+ for (HBasicBlock* block : ReverseRange(graph_->GetLinearOrder())) {
BitVector* kill = GetKillSet(*block);
BitVector* live_in = GetLiveInSet(*block);
@@ -329,15 +256,13 @@ void SsaLivenessAnalysis::ComputeLiveInAndLiveOutSets() {
do {
changed = false;
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- const HBasicBlock& block = *it.Current();
-
+ for (const HBasicBlock* block : graph_->GetPostOrder()) {
// The live_in set depends on the kill set (which does not
// change in this loop), and the live_out set. If the live_out
// set does not change, there is no need to update the live_in set.
- if (UpdateLiveOut(block) && UpdateLiveIn(block)) {
+ if (UpdateLiveOut(*block) && UpdateLiveIn(*block)) {
if (kIsDebugBuild) {
- CheckNoLiveInIrreducibleLoop(block);
+ CheckNoLiveInIrreducibleLoop(*block);
}
changed = true;
}
@@ -368,6 +293,27 @@ bool SsaLivenessAnalysis::UpdateLiveIn(const HBasicBlock& block) {
return live_in->UnionIfNotIn(live_out, kill);
}
+void LiveInterval::DumpWithContext(std::ostream& stream,
+ const CodeGenerator& codegen) const {
+ Dump(stream);
+ if (IsFixed()) {
+ stream << ", register:" << GetRegister() << "(";
+ if (IsFloatingPoint()) {
+ codegen.DumpFloatingPointRegister(stream, GetRegister());
+ } else {
+ codegen.DumpCoreRegister(stream, GetRegister());
+ }
+ stream << ")";
+ } else {
+ stream << ", spill slot:" << GetSpillSlot();
+ }
+ stream << ", requires_register:" << (GetDefinedBy() != nullptr && RequiresRegister());
+ if (GetParent()->GetDefinedBy() != nullptr) {
+ stream << ", defined_by:" << GetParent()->GetDefinedBy()->GetKind();
+ stream << "(" << GetParent()->GetDefinedBy()->GetLifetimePosition() << ")";
+ }
+}
+
static int RegisterOrLowRegister(Location location) {
return location.IsPair() ? location.low() : location.reg();
}
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index dc98864d9b..b62bf4e5f9 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -150,9 +150,7 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
if (GetIsEnvironment()) return false;
if (IsSynthesized()) return false;
Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
- return location.IsUnallocated()
- && (location.GetPolicy() == Location::kRequiresRegister
- || location.GetPolicy() == Location::kRequiresFpuRegister);
+ return location.IsUnallocated() && location.RequiresRegisterKind();
}
private:
@@ -210,11 +208,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
return new (allocator) LiveInterval(allocator, type, instruction);
}
- static LiveInterval* MakeSlowPathInterval(ArenaAllocator* allocator, HInstruction* instruction) {
- return new (allocator) LiveInterval(
- allocator, Primitive::kPrimVoid, instruction, false, kNoRegister, false, true);
- }
-
static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) {
return new (allocator) LiveInterval(allocator, type, nullptr, true, reg, false);
}
@@ -225,7 +218,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
bool IsFixed() const { return is_fixed_; }
bool IsTemp() const { return is_temp_; }
- bool IsSlowPathSafepoint() const { return is_slow_path_safepoint_; }
// This interval is the result of a split.
bool IsSplit() const { return parent_ != this; }
@@ -481,6 +473,10 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
return last_range_->GetEnd();
}
+ size_t GetLength() const {
+ return GetEnd() - GetStart();
+ }
+
size_t FirstRegisterUseAfter(size_t position) const {
if (is_temp_) {
return position == GetStart() ? position : kNoLifetime;
@@ -504,10 +500,18 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
return kNoLifetime;
}
+ // Returns the location of the first register use for this live interval,
+ // including a register definition if applicable.
size_t FirstRegisterUse() const {
return FirstRegisterUseAfter(GetStart());
}
+ // Whether the interval requires a register rather than a stack location.
+ // If needed for performance, this could be cached.
+ bool RequiresRegister() const {
+ return !HasRegister() && FirstRegisterUse() != kNoLifetime;
+ }
+
size_t FirstUseAfter(size_t position) const {
if (is_temp_) {
return position == GetStart() ? position : kNoLifetime;
@@ -693,6 +697,10 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
stream << " is_high: " << IsHighInterval();
}
+ // Same as Dump, but adds context such as the instruction defining this interval, and
+ // the register currently assigned to this interval.
+ void DumpWithContext(std::ostream& stream, const CodeGenerator& codegen) const;
+
LiveInterval* GetNextSibling() const { return next_sibling_; }
LiveInterval* GetLastSibling() {
LiveInterval* result = this;
@@ -776,7 +784,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
DCHECK(!HasHighInterval());
DCHECK(!HasLowInterval());
high_or_low_interval_ = new (allocator_) LiveInterval(
- allocator_, type_, defined_by_, false, kNoRegister, is_temp, false, true);
+ allocator_, type_, defined_by_, false, kNoRegister, is_temp, true);
high_or_low_interval_->high_or_low_interval_ = this;
if (first_range_ != nullptr) {
high_or_low_interval_->first_range_ = first_range_->Dup(allocator_);
@@ -871,6 +879,33 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
range_search_start_ = first_range_;
}
+ bool DefinitionRequiresRegister() const {
+ DCHECK(IsParent());
+ LocationSummary* locations = defined_by_->GetLocations();
+ Location location = locations->Out();
+ // This interval is the first interval of the instruction. If the output
+ // of the instruction requires a register, we return the position of that instruction
+ // as the first register use.
+ if (location.IsUnallocated()) {
+ if ((location.GetPolicy() == Location::kRequiresRegister)
+ || (location.GetPolicy() == Location::kSameAsFirstInput
+ && (locations->InAt(0).IsRegister()
+ || locations->InAt(0).IsRegisterPair()
+ || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
+ return true;
+ } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
+ || (location.GetPolicy() == Location::kSameAsFirstInput
+ && (locations->InAt(0).IsFpuRegister()
+ || locations->InAt(0).IsFpuRegisterPair()
+ || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
+ return true;
+ }
+ } else if (location.IsRegister() || location.IsRegisterPair()) {
+ return true;
+ }
+ return false;
+ }
+
private:
LiveInterval(ArenaAllocator* allocator,
Primitive::Type type,
@@ -878,7 +913,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
bool is_fixed = false,
int reg = kNoRegister,
bool is_temp = false,
- bool is_slow_path_safepoint = false,
bool is_high_interval = false)
: allocator_(allocator),
first_range_(nullptr),
@@ -895,7 +929,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
spill_slot_(kNoSpillSlot),
is_fixed_(is_fixed),
is_temp_(is_temp),
- is_slow_path_safepoint_(is_slow_path_safepoint),
is_high_interval_(is_high_interval),
high_or_low_interval_(nullptr),
defined_by_(defined_by) {}
@@ -925,33 +958,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
return range;
}
- bool DefinitionRequiresRegister() const {
- DCHECK(IsParent());
- LocationSummary* locations = defined_by_->GetLocations();
- Location location = locations->Out();
- // This interval is the first interval of the instruction. If the output
- // of the instruction requires a register, we return the position of that instruction
- // as the first register use.
- if (location.IsUnallocated()) {
- if ((location.GetPolicy() == Location::kRequiresRegister)
- || (location.GetPolicy() == Location::kSameAsFirstInput
- && (locations->InAt(0).IsRegister()
- || locations->InAt(0).IsRegisterPair()
- || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
- return true;
- } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
- || (location.GetPolicy() == Location::kSameAsFirstInput
- && (locations->InAt(0).IsFpuRegister()
- || locations->InAt(0).IsFpuRegisterPair()
- || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
- return true;
- }
- } else if (location.IsRegister() || location.IsRegisterPair()) {
- return true;
- }
- return false;
- }
-
bool IsDefiningPosition(size_t position) const {
return IsParent() && (position == GetStart());
}
@@ -969,38 +975,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
return false;
}
- bool IsLinearOrderWellFormed(const HGraph& graph) {
- for (HBasicBlock* header : graph.GetBlocks()) {
- if (header == nullptr || !header->IsLoopHeader()) {
- continue;
- }
-
- HLoopInformation* loop = header->GetLoopInformation();
- size_t num_blocks = loop->GetBlocks().NumSetBits();
- size_t found_blocks = 0u;
-
- for (HLinearOrderIterator it(graph); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
- if (loop->Contains(*current)) {
- found_blocks++;
- if (found_blocks == 1u && current != header) {
- // First block is not the header.
- return false;
- } else if (found_blocks == num_blocks && !loop->IsBackEdge(*current)) {
- // Last block is not a back edge.
- return false;
- }
- } else if (found_blocks != 0u && found_blocks != num_blocks) {
- // Blocks are not adjacent.
- return false;
- }
- }
- DCHECK_EQ(found_blocks, num_blocks);
- }
-
- return true;
- }
-
void AddBackEdgeUses(const HBasicBlock& block_at_use) {
DCHECK(block_at_use.IsInLoop());
if (block_at_use.GetGraph()->HasIrreducibleLoops()) {
@@ -1010,8 +984,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
return;
}
- DCHECK(IsLinearOrderWellFormed(*block_at_use.GetGraph()));
-
// Add synthesized uses at the back edge of loops to help the register allocator.
// Note that this method is called in decreasing liveness order, to faciliate adding
// uses at the head of the `first_use_` linked list. Because below
@@ -1107,9 +1079,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
// Whether the interval is for a temporary.
const bool is_temp_;
- // Whether the interval is for a safepoint that calls on slow path.
- const bool is_slow_path_safepoint_;
-
// Whether this interval is a synthesized interval for register pair.
const bool is_high_interval_;
@@ -1217,12 +1186,6 @@ class SsaLivenessAnalysis : public ValueObject {
static constexpr const char* kLivenessPassName = "liveness";
private:
- // Linearize the graph so that:
- // (1): a block is always after its dominator,
- // (2): blocks of loops are contiguous.
- // This creates a natural and efficient ordering when visualizing live ranges.
- void LinearizeGraph();
-
// Give an SSA number to each instruction that defines a value used by another instruction,
// and setup the lifetime information of each instruction and block.
void NumberInstructions();
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index b1ec99ab8e..aec7a3c555 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -34,8 +34,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() {
ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination));
// Add to the worklist phis referenced by non-phi instructions.
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
HPhi* phi = inst_it.Current()->AsPhi();
if (phi->IsDead()) {
@@ -84,8 +83,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() {
// Remove phis that are not live. Visit in post order so that phis
// that are not inputs of loop phis can be removed when they have
// no users left (dead phis might use dead phis).
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
HInstruction* current = block->GetFirstPhi();
HInstruction* next = nullptr;
HPhi* phi;
@@ -119,8 +117,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() {
void SsaRedundantPhiElimination::Run() {
// Add all phis in the worklist. Order does not matter for correctness, and
// neither will necessarily converge faster.
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
worklist_.push_back(inst_it.Current()->AsPhi());
}
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
index 195159f61b..4e256832a2 100644
--- a/compiler/optimizing/x86_memory_gen.cc
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -16,6 +16,7 @@
#include "x86_memory_gen.h"
#include "code_generator.h"
+#include "driver/compiler_options.h"
namespace art {
namespace x86 {
@@ -69,8 +70,8 @@ class MemoryOperandVisitor : public HGraphVisitor {
};
X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph,
- OptimizingCompilerStats* stats,
- CodeGenerator* codegen)
+ CodeGenerator* codegen,
+ OptimizingCompilerStats* stats)
: HOptimization(graph, kX86MemoryOperandGenerationPassName, stats),
do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) {
}
diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h
index 7e886819bb..5f15d9f1e6 100644
--- a/compiler/optimizing/x86_memory_gen.h
+++ b/compiler/optimizing/x86_memory_gen.h
@@ -28,8 +28,8 @@ namespace x86 {
class X86MemoryOperandGeneration : public HOptimization {
public:
X86MemoryOperandGeneration(HGraph* graph,
- OptimizingCompilerStats* stats,
- CodeGenerator* codegen);
+ CodeGenerator* codegen,
+ OptimizingCompilerStats* stats);
void Run() OVERRIDE;