diff options
Diffstat (limited to 'compiler/optimizing')
109 files changed, 18081 insertions, 6341 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 8aefd9ea1f..d2357a5d05 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -887,7 +887,7 @@ class BCEVisitor : public HGraphVisitor { bool needs_finite_test = false; bool needs_taken_test = false; if (DynamicBCESeemsProfitable(loop, bounds_check->GetBlock()) && - induction_range_.CanGenerateCode( + induction_range_.CanGenerateRange( bounds_check, index, &needs_finite_test, &needs_taken_test) && CanHandleInfiniteLoop(loop, index, needs_finite_test) && // Do this test last, since it may generate code. @@ -1361,6 +1361,11 @@ class BCEVisitor : public HGraphVisitor { ValueBound other_value = ValueBound::AsValueBound(other_index); int32_t other_c = other_value.GetConstant(); if (array_length == other_array_length && base == other_value.GetInstruction()) { + // Ensure every candidate could be picked for code generation. + bool b1 = false, b2 = false; + if (!induction_range_.CanGenerateRange(other_bounds_check, other_index, &b1, &b2)) { + continue; + } // Does the current basic block dominate all back edges? If not, // add this candidate later only if it falls into the range. if (!loop->DominatesAllBackEdges(user->GetBlock())) { @@ -1403,10 +1408,10 @@ class BCEVisitor : public HGraphVisitor { // whether code generation on the original and, thus, related bounds check was possible. // It handles either loop invariants (lower is not set) or unit strides. if (other_c == max_c) { - induction_range_.GenerateRangeCode( + induction_range_.GenerateRange( other_bounds_check, other_index, GetGraph(), block, &max_lower, &max_upper); } else if (other_c == min_c && base != nullptr) { - induction_range_.GenerateRangeCode( + induction_range_.GenerateRange( other_bounds_check, other_index, GetGraph(), block, &min_lower, &min_upper); } ReplaceInstruction(other_bounds_check, other_index); @@ -1699,11 +1704,8 @@ class BCEVisitor : public HGraphVisitor { // Insert the taken-test to see if the loop body is entered. If the // loop isn't entered at all, it jumps around the deoptimization block. if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); // placeholder - HInstruction* condition = nullptr; - induction_range_.GenerateTakenTest(header->GetLastInstruction(), - GetGraph(), - if_block, - &condition); + HInstruction* condition = induction_range_.GenerateTakenTest( + header->GetLastInstruction(), GetGraph(), if_block); DCHECK(condition != nullptr); if_block->RemoveInstruction(if_block->GetLastInstruction()); if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition)); @@ -1843,8 +1845,8 @@ void BoundsCheckElimination::Run() { // that value dominated by that instruction fits in that range. Range of that // value can be narrowed further down in the dominator tree. BCEVisitor visitor(graph_, side_effects_, induction_analysis_); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); + for (size_t i = 0, size = graph_->GetReversePostOrder().size(); i != size; ++i) { + HBasicBlock* current = graph_->GetReversePostOrder()[i]; if (visitor.IsAddedBlock(current)) { // Skip added blocks. Their effects are already taken care of. continue; @@ -1853,8 +1855,11 @@ void BoundsCheckElimination::Run() { // Skip forward to the current block in case new basic blocks were inserted // (which always appear earlier in reverse post order) to avoid visiting the // same basic block twice. - for ( ; !it.Done() && it.Current() != current; it.Advance()) { - } + size_t new_size = graph_->GetReversePostOrder().size(); + DCHECK_GE(new_size, size); + i += new_size - size; + DCHECK_EQ(current, graph_->GetReversePostOrder()[i]); + size = new_size; } // Perform cleanup. diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 86742e6526..2927e1f7c0 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -51,7 +51,7 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_branches) { if (compiler_options.IsHugeMethod(code_item_.insns_size_in_code_units_)) { VLOG(compiler) << "Skip compilation of huge method " - << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) + << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) << ": " << code_item_.insns_size_in_code_units_ << " code units"; MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod); return true; @@ -61,7 +61,7 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_branches) { if (compiler_options.IsLargeMethod(code_item_.insns_size_in_code_units_) && (number_of_branches == 0)) { VLOG(compiler) << "Skip compilation of large method with no branch " - << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) + << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) << ": " << code_item_.insns_size_in_code_units_ << " code units"; MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches); return true; diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 580ef72767..f896f1199e 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -43,7 +43,7 @@ class HGraphBuilder : public ValueObject { OptimizingCompilerStats* compiler_stats, const uint8_t* interpreter_metadata, Handle<mirror::DexCache> dex_cache, - StackHandleScopeCollection* handles) + VariableSizedHandleScope* handles) : graph_(graph), dex_file_(dex_file), code_item_(code_item), @@ -68,7 +68,7 @@ class HGraphBuilder : public ValueObject { // Only for unit testing. HGraphBuilder(HGraph* graph, const DexFile::CodeItem& code_item, - StackHandleScopeCollection* handles, + VariableSizedHandleScope* handles, Primitive::Type return_type = Primitive::kPrimInt) : graph_(graph), dex_file_(nullptr), diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 4a4b98cc48..8b450e11dc 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -18,6 +18,7 @@ #ifdef ART_ENABLE_CODEGEN_arm #include "code_generator_arm.h" +#include "code_generator_arm_vixl.h" #endif #ifdef ART_ENABLE_CODEGEN_arm64 @@ -283,8 +284,7 @@ void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches A } void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, - size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fpu_registers, + size_t maximum_safepoint_spill_size, size_t number_of_out_slots, const ArenaVector<HBasicBlock*>& block_order) { block_order_ = &block_order; @@ -298,14 +298,12 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, && !HasAllocatedCalleeSaveRegisters() && IsLeafMethod() && !RequiresCurrentMethod()) { - DCHECK_EQ(maximum_number_of_live_core_registers, 0u); - DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u); + DCHECK_EQ(maximum_safepoint_spill_size, 0u); SetFrameSize(CallPushesPC() ? GetWordSize() : 0); } else { SetFrameSize(RoundUp( first_register_slot_in_slow_path_ - + maximum_number_of_live_core_registers * GetWordSize() - + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize() + + maximum_safepoint_spill_size + FrameEntrySpillSize(), kStackAlignment)); } @@ -348,7 +346,7 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok // Initialize to anything to silent compiler warnings. QuickEntrypointEnum entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck; - switch (invoke->GetOriginalInvokeType()) { + switch (invoke->GetInvokeType()) { case kStatic: entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck; break; @@ -578,11 +576,19 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph, #ifdef ART_ENABLE_CODEGEN_arm case kArm: case kThumb2: { - return std::unique_ptr<CodeGenerator>( - new (arena) arm::CodeGeneratorARM(graph, - *isa_features.AsArmInstructionSetFeatures(), - compiler_options, - stats)); + if (kArmUseVIXL32) { + return std::unique_ptr<CodeGenerator>( + new (arena) arm::CodeGeneratorARMVIXL(graph, + *isa_features.AsArmInstructionSetFeatures(), + compiler_options, + stats)); + } else { + return std::unique_ptr<CodeGenerator>( + new (arena) arm::CodeGeneratorARM(graph, + *isa_features.AsArmInstructionSetFeatures(), + compiler_options, + stats)); + } } #endif #ifdef ART_ENABLE_CODEGEN_arm64 @@ -665,9 +671,9 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, return; } ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc)); - for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) { - if (it.Current()->IsLoopHeader()) { - HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck(); + for (HBasicBlock* block : graph.GetReversePostOrder()) { + if (block->IsLoopHeader()) { + HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { loop_headers.push_back(suspend_check); } @@ -753,7 +759,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // Collect PC infos for the mapping table. - uint32_t native_pc = GetAssembler()->CodeSize(); + uint32_t native_pc = GetAssembler()->CodePosition(); if (instruction == nullptr) { // For stack overflow checks and native-debug-info entries without dex register @@ -765,16 +771,19 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, LocationSummary* locations = instruction->GetLocations(); uint32_t register_mask = locations->GetRegisterMask(); + DCHECK_EQ(register_mask & ~locations->GetLiveRegisters()->GetCoreRegisters(), 0u); if (locations->OnlyCallsOnSlowPath()) { // In case of slow path, we currently set the location of caller-save registers // to register (instead of their stack location when pushed before the slow-path // call). Therefore register_mask contains both callee-save and caller-save - // registers that hold objects. We must remove the caller-save from the mask, since - // they will be overwritten by the callee. - register_mask &= core_callee_save_mask_; + // registers that hold objects. We must remove the spilled caller-save from the + // mask, since they will be overwritten by the callee. + uint32_t spills = GetSlowPathSpills(locations, /* core_registers */ true); + register_mask &= ~spills; + } else { + // The register mask must be a subset of callee-save registers. + DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); } - // The register mask must be a subset of callee-save registers. - DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); stack_map_stream_.BeginStackMapEntry(outer_dex_pc, native_pc, register_mask, @@ -1081,13 +1090,6 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo } } -bool CodeGenerator::IsImplicitNullCheckAllowed(HNullCheck* null_check) const { - return compiler_options_.GetImplicitNullChecks() && - // Null checks which might throw into a catch block need to save live - // registers and therefore cannot be done implicitly. - !null_check->CanThrowIntoCatchBlock(); -} - bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) { HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves(); @@ -1096,6 +1098,10 @@ bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) { } void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { + if (!compiler_options_.GetImplicitNullChecks()) { + return; + } + // If we are from a static path don't record the pc as we can't throw NPE. // NB: having the checks here makes the code much less verbose in the arch // specific code generators. @@ -1114,16 +1120,35 @@ void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { // and needs to record the pc. if (first_prev_not_move != nullptr && first_prev_not_move->IsNullCheck()) { HNullCheck* null_check = first_prev_not_move->AsNullCheck(); - if (IsImplicitNullCheckAllowed(null_check)) { - // TODO: The parallel moves modify the environment. Their changes need to be - // reverted otherwise the stack maps at the throw point will not be correct. - RecordPcInfo(null_check, null_check->GetDexPc()); - } + // TODO: The parallel moves modify the environment. Their changes need to be + // reverted otherwise the stack maps at the throw point will not be correct. + RecordPcInfo(null_check, null_check->GetDexPc()); + } +} + +LocationSummary* CodeGenerator::CreateThrowingSlowPathLocations(HInstruction* instruction, + RegisterSet caller_saves) { + // Note: Using kNoCall allows the method to be treated as leaf (and eliminate the + // HSuspendCheck from entry block). However, it will still get a valid stack frame + // because the HNullCheck needs an environment. + LocationSummary::CallKind call_kind = LocationSummary::kNoCall; + // When throwing from a try block, we may need to retrieve dalvik registers from + // physical registers and we also need to set up stack mask for GC. This is + // implicitly achieved by passing kCallOnSlowPath to the LocationSummary. + bool can_throw_into_catch_block = instruction->CanThrowIntoCatchBlock(); + if (can_throw_into_catch_block) { + call_kind = LocationSummary::kCallOnSlowPath; + } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (can_throw_into_catch_block && compiler_options_.GetImplicitNullChecks()) { + locations->SetCustomSlowPathCallerSaves(caller_saves); // Default: no caller-save registers. } + DCHECK(!instruction->HasUses()); + return locations; } void CodeGenerator::GenerateNullCheck(HNullCheck* instruction) { - if (IsImplicitNullCheckAllowed(instruction)) { + if (compiler_options_.GetImplicitNullChecks()) { MaybeRecordStat(kImplicitNullCheckGenerated); GenerateImplicitNullCheck(instruction); } else { @@ -1163,39 +1188,53 @@ void CodeGenerator::EmitParallelMoves(Location from1, GetMoveResolver()->EmitNativeCode(¶llel_move); } -void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path) { +void CodeGenerator::ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, + HInstruction* instruction, + SlowPathCode* slow_path) { // Ensure that the call kind indication given to the register allocator is - // coherent with the runtime call generated, and that the GC side effect is - // set when required. + // coherent with the runtime call generated. if (slow_path == nullptr) { DCHECK(instruction->GetLocations()->WillCall()) << "instruction->DebugName()=" << instruction->DebugName(); - DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) - << "instruction->DebugName()=" << instruction->DebugName() - << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString(); } else { - DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal()) - << "instruction->DebugName()=" << instruction->DebugName() - << " slow_path->GetDescription()=" << slow_path->GetDescription(); - DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || - // When (non-Baker) read barriers are enabled, some instructions - // use a slow path to emit a read barrier, which does not trigger - // GC. - (kEmitCompilerReadBarrier && - !kUseBakerReadBarrier && - (instruction->IsInstanceFieldGet() || - instruction->IsStaticFieldGet() || - instruction->IsArrayGet() || - instruction->IsLoadClass() || - instruction->IsLoadString() || - instruction->IsInstanceOf() || - instruction->IsCheckCast() || - (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified())))) + DCHECK(instruction->GetLocations()->CallsOnSlowPath() || slow_path->IsFatal()) << "instruction->DebugName()=" << instruction->DebugName() - << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString() << " slow_path->GetDescription()=" << slow_path->GetDescription(); } + // Check that the GC side effect is set when required. + // TODO: Reverse EntrypointCanTriggerGC + if (EntrypointCanTriggerGC(entrypoint)) { + if (slow_path == nullptr) { + DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) + << "instruction->DebugName()=" << instruction->DebugName() + << " instruction->GetSideEffects().ToString()=" + << instruction->GetSideEffects().ToString(); + } else { + DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || + // When (non-Baker) read barriers are enabled, some instructions + // use a slow path to emit a read barrier, which does not trigger + // GC. + (kEmitCompilerReadBarrier && + !kUseBakerReadBarrier && + (instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsArrayGet() || + instruction->IsLoadClass() || + instruction->IsLoadString() || + instruction->IsInstanceOf() || + instruction->IsCheckCast() || + (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified())))) + << "instruction->DebugName()=" << instruction->DebugName() + << " instruction->GetSideEffects().ToString()=" + << instruction->GetSideEffects().ToString() + << " slow_path->GetDescription()=" << slow_path->GetDescription(); + } + } else { + // The GC side effect is not required for the instruction. But the instruction might still have + // it, for example if it calls other entrypoints requiring it. + } + // Check the coherency of leaf information. DCHECK(instruction->IsSuspendCheck() || ((slow_path != nullptr) && slow_path->IsFatal()) @@ -1216,68 +1255,56 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet() || instruction->IsArrayGet() || + instruction->IsArraySet() || instruction->IsLoadClass() || instruction->IsLoadString() || instruction->IsInstanceOf() || instruction->IsCheckCast() || - (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified())) + (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()) || + (instruction->IsInvokeStaticOrDirect() && instruction->GetLocations()->Intrinsified())) << "instruction->DebugName()=" << instruction->DebugName() << " slow_path->GetDescription()=" << slow_path->GetDescription(); } void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { - if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (live_registers->ContainsCoreRegister(i)) { - // If the register holds an object, update the stack mask. - if (locations->RegisterContainsObject(i)) { - locations->SetStackBit(stack_offset / kVRegSize); - } - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - saved_core_stack_offsets_[i] = stack_offset; - stack_offset += codegen->SaveCoreRegister(stack_offset, i); - } + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); } + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_core_stack_offsets_[i] = stack_offset; + stack_offset += codegen->SaveCoreRegister(stack_offset, i); } - for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (live_registers->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - saved_fpu_stack_offsets_[i] = stack_offset; - stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i); - } - } + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + for (uint32_t i : LowToHighBits(fp_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_fpu_stack_offsets_[i] = stack_offset; + stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i); } } void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { - if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (live_registers->ContainsCoreRegister(i)) { - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - stack_offset += codegen->RestoreCoreRegister(stack_offset, i); - } - } + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + stack_offset += codegen->RestoreCoreRegister(stack_offset, i); } - for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (live_registers->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i); - } - } + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + for (uint32_t i : LowToHighBits(fp_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i); } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index ad02ecf609..a81f24e3d8 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -22,9 +22,8 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/bit_field.h" +#include "base/bit_utils.h" #include "base/enums.h" -#include "compiled_method.h" -#include "driver/compiler_options.h" #include "globals.h" #include "graph_visualizer.h" #include "locations.h" @@ -54,6 +53,7 @@ static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); class Assembler; class CodeGenerator; class CompilerDriver; +class CompilerOptions; class LinkerPatch; class ParallelMoveResolver; @@ -212,8 +212,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual size_t GetFloatingPointSpillSlotSize() const = 0; virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; void InitializeCodeGeneration(size_t number_of_spill_slots, - size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fpu_registers, + size_t maximum_safepoint_spill_size, size_t number_of_out_slots, const ArenaVector<HBasicBlock*>& block_order); // Backends can override this as necessary. For most, no special alignment is required. @@ -279,6 +278,30 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { return (fpu_callee_save_mask_ & (1 << reg)) != 0; } + uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const { + DCHECK(locations->OnlyCallsOnSlowPath() || + (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() && + !locations->HasCustomSlowPathCallingConvention())); + uint32_t live_registers = core_registers + ? locations->GetLiveRegisters()->GetCoreRegisters() + : locations->GetLiveRegisters()->GetFloatingPointRegisters(); + if (locations->HasCustomSlowPathCallingConvention()) { + // Save only the live registers that the custom calling convention wants us to save. + uint32_t caller_saves = core_registers + ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters() + : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters(); + return live_registers & caller_saves; + } else { + // Default ABI, we need to spill non-callee-save live registers. + uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_; + return live_registers & ~callee_saves; + } + } + + size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const { + return POPCOUNT(GetSlowPathSpills(locations, core_registers)); + } + // Record native to dex mapping for a suspend point. Required by runtime. void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); // Check whether we have already recorded mapping at this PC. @@ -290,6 +313,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { bool CanMoveNullCheckToUser(HNullCheck* null_check); void MaybeRecordImplicitNullCheck(HInstruction* instruction); + LocationSummary* CreateThrowingSlowPathLocations( + HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty()); void GenerateNullCheck(HNullCheck* null_check); virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0; virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0; @@ -299,12 +324,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // TODO: Replace with a catch-entering instruction that records the environment. void RecordCatchBlockInfo(); - // Returns true if implicit null checks are allowed in the compiler options - // and if the null check is not inside a try block. We currently cannot do - // implicit null checks in that case because we need the NullCheckSlowPath to - // save live registers, which may be needed by the runtime to set catch phis. - bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const; - // TODO: Avoid creating the `std::unique_ptr` here. void AddSlowPath(SlowPathCode* slow_path) { slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path)); @@ -340,6 +359,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } + bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; } + bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; } + // Helper that returns the pointer offset of an index in an object array. // Note: this method assumes we always have the same pointer size, regardless // of the architecture. @@ -383,7 +405,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // Perfoms checks pertaining to an InvokeRuntime call. - void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path); + void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, + HInstruction* instruction, + SlowPathCode* slow_path); // Perfoms checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call. static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction, @@ -491,7 +515,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // otherwise return a fall-back info that should be used instead. virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method) = 0; + HInvokeStaticOrDirect* invoke) = 0; // Generate a call to a static or direct method. virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0; @@ -507,40 +531,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { uint32_t GetReferenceDisableFlagOffset() const; protected: - // Method patch info used for recording locations of required linker patches and - // target methods. The target method can be used for various purposes, whether for - // patching the address of the method or the code pointer or a PC-relative call. + // Patch info used for recording locations of required linker patches and their targets, + // i.e. target method, string, type or code identified by their dex file and index. template <typename LabelType> - struct MethodPatchInfo { - explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { } - - MethodReference target_method; - LabelType label; - }; - - // String patch info used for recording locations of required linker patches and - // target strings. The actual string address can be absolute or PC-relative. - template <typename LabelType> - struct StringPatchInfo { - StringPatchInfo(const DexFile& df, uint32_t index) - : dex_file(df), string_index(index), label() { } - - const DexFile& dex_file; - uint32_t string_index; - LabelType label; - }; - - // Type patch info used for recording locations of required linker patches and - // target types. The actual type address can be absolute or PC-relative. - // TODO: Consider merging with MethodPatchInfo and StringPatchInfo - all these - // classes contain the dex file, some index and the label. - template <typename LabelType> - struct TypePatchInfo { - TypePatchInfo(const DexFile& df, uint32_t index) - : dex_file(df), type_index(index), label() { } + struct PatchInfo { + PatchInfo(const DexFile& target_dex_file, uint32_t target_index) + : dex_file(target_dex_file), index(target_index) { } const DexFile& dex_file; - uint32_t type_index; + uint32_t index; LabelType label; }; @@ -556,12 +555,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { core_spill_mask_(0), fpu_spill_mask_(0), first_register_slot_in_slow_path_(0), + allocated_registers_(RegisterSet::Empty()), blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers, kArenaAllocCodeGenerator)), blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers, kArenaAllocCodeGenerator)), - blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs, - kArenaAllocCodeGenerator)), number_of_core_registers_(number_of_core_registers), number_of_fpu_registers_(number_of_fpu_registers), number_of_register_pairs_(number_of_register_pairs), @@ -598,7 +596,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { return POPCOUNT(core_spill_mask_) * GetWordSize(); } - bool HasAllocatedCalleeSaveRegisters() const { + virtual bool HasAllocatedCalleeSaveRegisters() const { // We check the core registers against 1 because it always comprises the return PC. return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); @@ -649,7 +647,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // arrays. bool* const blocked_core_registers_; bool* const blocked_fpu_registers_; - bool* const blocked_register_pairs_; size_t number_of_core_registers_; size_t number_of_fpu_registers_; size_t number_of_register_pairs_; @@ -687,6 +684,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { bool is_leaf_; // Whether an instruction in the graph accesses the current method. + // TODO: Rename: this actually indicates that some instruction in the method + // needs the environment including a valid stack frame. bool requires_current_method_; friend class OptimizingCFITest; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index cd7a90e280..be65f89ef1 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -59,13 +59,192 @@ static constexpr DRegister DTMP = D31; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value() -class NullCheckSlowPathARM : public SlowPathCode { +static constexpr int kRegListThreshold = 4; + +// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, +// for each live D registers they treat two corresponding S registers as live ones. +// +// Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build +// from a list of contiguous S registers a list of contiguous D registers (processing first/last +// S registers corner cases) and save/restore this new list treating them as D registers. +// - decreasing code size +// - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is +// restored and then used in regular non SlowPath code as D register. +// +// For the following example (v means the S register is live): +// D names: | D0 | D1 | D2 | D4 | ... +// S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ... +// Live? | | v | v | v | v | v | v | | ... +// +// S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed +// as D registers. +static size_t SaveContiguousSRegisterList(size_t first, + size_t last, + CodeGenerator* codegen, + size_t stack_offset) { + DCHECK_LE(first, last); + if ((first == last) && (first == 0)) { + stack_offset += codegen->SaveFloatingPointRegister(stack_offset, first); + return stack_offset; + } + if (first % 2 == 1) { + stack_offset += codegen->SaveFloatingPointRegister(stack_offset, first++); + } + + bool save_last = false; + if (last % 2 == 0) { + save_last = true; + --last; + } + + if (first < last) { + DRegister d_reg = static_cast<DRegister>(first / 2); + DCHECK_EQ((last - first + 1) % 2, 0u); + size_t number_of_d_regs = (last - first + 1) / 2; + + if (number_of_d_regs == 1) { + __ StoreDToOffset(d_reg, SP, stack_offset); + } else if (number_of_d_regs > 1) { + __ add(IP, SP, ShifterOperand(stack_offset)); + __ vstmiad(IP, d_reg, number_of_d_regs); + } + stack_offset += number_of_d_regs * kArmWordSize * 2; + } + + if (save_last) { + stack_offset += codegen->SaveFloatingPointRegister(stack_offset, last + 1); + } + + return stack_offset; +} + +static size_t RestoreContiguousSRegisterList(size_t first, + size_t last, + CodeGenerator* codegen, + size_t stack_offset) { + DCHECK_LE(first, last); + if ((first == last) && (first == 0)) { + stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, first); + return stack_offset; + } + if (first % 2 == 1) { + stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, first++); + } + + bool restore_last = false; + if (last % 2 == 0) { + restore_last = true; + --last; + } + + if (first < last) { + DRegister d_reg = static_cast<DRegister>(first / 2); + DCHECK_EQ((last - first + 1) % 2, 0u); + size_t number_of_d_regs = (last - first + 1) / 2; + if (number_of_d_regs == 1) { + __ LoadDFromOffset(d_reg, SP, stack_offset); + } else if (number_of_d_regs > 1) { + __ add(IP, SP, ShifterOperand(stack_offset)); + __ vldmiad(IP, d_reg, number_of_d_regs); + } + stack_offset += number_of_d_regs * kArmWordSize * 2; + } + + if (restore_last) { + stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, last + 1); + } + + return stack_offset; +} + +void SlowPathCodeARM::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { + size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + size_t orig_offset = stack_offset; + + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); + } + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_core_stack_offsets_[i] = stack_offset; + stack_offset += kArmWordSize; + } + + int reg_num = POPCOUNT(core_spills); + if (reg_num != 0) { + if (reg_num > kRegListThreshold) { + __ StoreList(RegList(core_spills), orig_offset); + } else { + stack_offset = orig_offset; + for (uint32_t i : LowToHighBits(core_spills)) { + stack_offset += codegen->SaveCoreRegister(stack_offset, i); + } + } + } + + uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + orig_offset = stack_offset; + for (uint32_t i : LowToHighBits(fp_spills)) { + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_fpu_stack_offsets_[i] = stack_offset; + stack_offset += kArmWordSize; + } + + stack_offset = orig_offset; + while (fp_spills != 0u) { + uint32_t begin = CTZ(fp_spills); + uint32_t tmp = fp_spills + (1u << begin); + fp_spills &= tmp; // Clear the contiguous range of 1s. + uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined. + stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset); + } + DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); +} + +void SlowPathCodeARM::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { + size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + size_t orig_offset = stack_offset; + + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + stack_offset += kArmWordSize; + } + + int reg_num = POPCOUNT(core_spills); + if (reg_num != 0) { + if (reg_num > kRegListThreshold) { + __ LoadList(RegList(core_spills), orig_offset); + } else { + stack_offset = orig_offset; + for (uint32_t i : LowToHighBits(core_spills)) { + stack_offset += codegen->RestoreCoreRegister(stack_offset, i); + } + } + } + + uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + while (fp_spills != 0u) { + uint32_t begin = CTZ(fp_spills); + uint32_t tmp = fp_spills + (1u << begin); + fp_spills &= tmp; // Clear the contiguous range of 1s. + uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined. + stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset); + } + DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); +} + +class NullCheckSlowPathARM : public SlowPathCodeARM { public: - explicit NullCheckSlowPathARM(HNullCheck* instruction) : SlowPathCode(instruction) {} + explicit NullCheckSlowPathARM(HNullCheck* instruction) : SlowPathCodeARM(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); @@ -74,8 +253,10 @@ class NullCheckSlowPathARM : public SlowPathCode { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - arm_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); + arm_codegen->InvokeRuntime(kQuickThrowNullPointer, + instruction_, + instruction_->GetDexPc(), + this); CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } @@ -87,19 +268,14 @@ class NullCheckSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM); }; -class DivZeroCheckSlowPathARM : public SlowPathCode { +class DivZeroCheckSlowPathARM : public SlowPathCodeARM { public: - explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} + explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : SlowPathCodeARM(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->CanThrowIntoCatchBlock()) { - // Live registers will be restored in the catch block if caught. - SaveLiveRegisters(codegen, instruction_->GetLocations()); - } - arm_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); + arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } @@ -111,19 +287,16 @@ class DivZeroCheckSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM); }; -class SuspendCheckSlowPathARM : public SlowPathCode { +class SuspendCheckSlowPathARM : public SlowPathCodeARM { public: SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor) - : SlowPathCode(instruction), successor_(successor) {} + : SlowPathCodeARM(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - arm_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); + arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ b(GetReturnLabel()); } else { @@ -152,10 +325,10 @@ class SuspendCheckSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM); }; -class BoundsCheckSlowPathARM : public SlowPathCode { +class BoundsCheckSlowPathARM : public SlowPathCodeARM { public: explicit BoundsCheckSlowPathARM(HBoundsCheck* instruction) - : SlowPathCode(instruction) {} + : SlowPathCodeARM(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); @@ -176,10 +349,10 @@ class BoundsCheckSlowPathARM : public SlowPathCode { locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() - ? QUICK_ENTRY_POINT(pThrowStringBounds) - : QUICK_ENTRY_POINT(pThrowArrayBounds); - arm_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this); + QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() + ? kQuickThrowStringBounds + : kQuickThrowArrayBounds; + arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -192,13 +365,13 @@ class BoundsCheckSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM); }; -class LoadClassSlowPathARM : public SlowPathCode { +class LoadClassSlowPathARM : public SlowPathCodeARM { public: LoadClassSlowPathARM(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit) - : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { + : SlowPathCodeARM(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); } @@ -211,10 +384,9 @@ class LoadClassSlowPathARM : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; __ LoadImmediate(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex()); - int32_t entry_point_offset = do_clinit_ - ? QUICK_ENTRY_POINT(pInitializeStaticStorage) - : QUICK_ENTRY_POINT(pInitializeType); - arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); + QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage + : kQuickInitializeType; + arm_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this); if (do_clinit_) { CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); } else { @@ -250,27 +422,57 @@ class LoadClassSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM); }; -class LoadStringSlowPathARM : public SlowPathCode { +class LoadStringSlowPathARM : public SlowPathCodeARM { public: - explicit LoadStringSlowPathARM(HLoadString* instruction) : SlowPathCode(instruction) {} + explicit LoadStringSlowPathARM(HLoadString* instruction) : SlowPathCodeARM(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + HLoadString* load = instruction_->AsLoadString(); + const uint32_t string_index = load->GetStringIndex(); + Register out = locations->Out().AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier); CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + // In the unlucky case that the `temp` is R0, we preserve the address in `out` across + // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call). + bool temp_is_r0 = (temp == calling_convention.GetRegisterAt(0)); + Register entry_address = temp_is_r0 ? out : temp; + DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); + if (call_saves_everything_except_r0 && temp_is_r0) { + __ mov(entry_address, ShifterOperand(temp)); + } + __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index); - arm_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); + arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); + // Store the resolved String to the .bss entry. + if (call_saves_everything_except_r0) { + // The string entry address was preserved in `entry_address` thanks to kSaveEverything. + __ str(R0, Address(entry_address)); + } else { + // For non-Baker read barrier, we need to re-calculate the address of the string entry. + CodeGeneratorARM::PcRelativePatchInfo* labels = + arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + __ BindTrackedLabel(&labels->movw_label); + __ movw(entry_address, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(entry_address, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(entry_address, entry_address, ShifterOperand(PC)); + __ str(R0, Address(entry_address)); + } + + arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); } @@ -280,10 +482,10 @@ class LoadStringSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM); }; -class TypeCheckSlowPathARM : public SlowPathCode { +class TypeCheckSlowPathARM : public SlowPathCodeARM { public: TypeCheckSlowPathARM(HInstruction* instruction, bool is_fatal) - : SlowPathCode(instruction), is_fatal_(is_fatal) {} + : SlowPathCodeARM(instruction), is_fatal_(is_fatal) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -311,7 +513,7 @@ class TypeCheckSlowPathARM : public SlowPathCode { Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), + arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, instruction_->GetDexPc(), this); @@ -320,10 +522,7 @@ class TypeCheckSlowPathARM : public SlowPathCode { arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); } else { DCHECK(instruction_->IsCheckCast()); - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction_, - instruction_->GetDexPc(), - this); + arm_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } @@ -343,19 +542,15 @@ class TypeCheckSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM); }; -class DeoptimizationSlowPathARM : public SlowPathCode { +class DeoptimizationSlowPathARM : public SlowPathCodeARM { public: explicit DeoptimizationSlowPathARM(HDeoptimize* instruction) - : SlowPathCode(instruction) {} + : SlowPathCodeARM(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - instruction_, - instruction_->GetDexPc(), - this); + arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } @@ -365,9 +560,9 @@ class DeoptimizationSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM); }; -class ArraySetSlowPathARM : public SlowPathCode { +class ArraySetSlowPathARM : public SlowPathCodeARM { public: - explicit ArraySetSlowPathARM(HInstruction* instruction) : SlowPathCode(instruction) {} + explicit ArraySetSlowPathARM(HInstruction* instruction) : SlowPathCodeARM(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); @@ -394,10 +589,7 @@ class ArraySetSlowPathARM : public SlowPathCode { codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction_, - instruction_->GetDexPc(), - this); + arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); @@ -409,11 +601,21 @@ class ArraySetSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; -// Slow path marking an object during a read barrier. -class ReadBarrierMarkSlowPathARM : public SlowPathCode { +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { public: - ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj) - : SlowPathCode(instruction), obj_(obj) { + ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location ref) + : SlowPathCodeARM(instruction), ref_(ref) { DCHECK(kEmitCompilerReadBarrier); } @@ -421,17 +623,19 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg = obj_.AsRegister<Register>(); + Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || + instruction_->IsArraySet() || instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -440,39 +644,215 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - DCHECK_NE(reg, SP); - DCHECK_NE(reg, LR); - DCHECK_NE(reg, PC); - DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg; + DCHECK_NE(ref_reg, SP); + DCHECK_NE(ref_reg, LR); + DCHECK_NE(ref_reg, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_reg, IP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in R0): // - // R0 <- obj + // R0 <- ref // R0 <- ReadBarrierMark(R0) - // obj <- R0 + // ref <- R0 // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); // This runtime call does not require a stack map. arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ b(GetExitLabel()); } private: - const Location obj_; + // The location (register) of the marked object reference. + const Location ref_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathARM above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { + public: + ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp1, + Register temp2) + : SlowPathCodeARM(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp1_(temp1), + temp2_(temp2) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegisterPair()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use IP to save the old reference, as IP is + // used internally by the ReadBarrierMarkRegX entry point, and we + // need the old reference after the call to that entry point. + DCHECK_NE(temp1_, IP); + __ Mov(temp1_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + DCHECK_NE(ref_reg, SP); + DCHECK_NE(ref_reg, LR); + DCHECK_NE(ref_reg, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_reg, IP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // R0 <- ref + // R0 <- ReadBarrierMark(R0) + // ref <- R0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set + // (CAS) operation below would abort the CAS, leaving the field + // as-is. + Label done; + __ cmp(temp1_, ShifterOperand(ref_reg)); + __ b(&done, EQ); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + // Convenience aliases. + Register base = obj_; + // The UnsafeCASObject intrinsic uses a register pair as field + // offset ("long offset"), of which only the low part contains + // data. + Register offset = field_offset_.AsRegisterPairLow<Register>(); + Register expected = temp1_; + Register value = ref_reg; + Register tmp_ptr = IP; // Pointer to actual memory. + Register tmp = temp2_; // Value in memory. + + __ add(tmp_ptr, base, ShifterOperand(offset)); + + if (kPoisonHeapReferences) { + __ PoisonHeapReference(expected); + if (value == expected) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. + } else { + __ PoisonHeapReference(value); + } + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + + Label loop_head, exit_loop; + __ Bind(&loop_head); + + __ ldrex(tmp, tmp_ptr); + + __ subs(tmp, tmp, ShifterOperand(expected)); + + __ it(NE); + __ clrex(NE); + + __ b(&exit_loop, NE); + + __ strex(tmp, value, tmp_ptr); + __ cmp(tmp, ShifterOperand(1)); + __ b(&loop_head, EQ); + + __ Bind(&exit_loop); + + if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(expected); + if (value == expected) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. + } else { + __ UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ b(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const Register temp1_; + const Register temp2_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM); +}; + // Slow path generating a read barrier for a heap reference. -class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { +class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM { public: ReadBarrierForHeapReferenceSlowPathARM(HInstruction* instruction, Location out, @@ -480,7 +860,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { Location obj, uint32_t offset, Location index) - : SlowPathCode(instruction), + : SlowPathCodeARM(instruction), out_(out), ref_(ref), obj_(obj), @@ -610,10 +990,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); __ LoadImmediate(calling_convention.GetRegisterAt(2), offset_); } - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), - instruction_, - instruction_->GetDexPc(), - this); + arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes< kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); arm_codegen->Move32(out_, Location::RegisterLocation(R0)); @@ -655,10 +1032,10 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { }; // Slow path generating a read barrier for a GC root. -class ReadBarrierForRootSlowPathARM : public SlowPathCode { +class ReadBarrierForRootSlowPathARM : public SlowPathCodeARM { public: ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root) - : SlowPathCode(instruction), out_(out), root_(root) { + : SlowPathCodeARM(instruction), out_(out), root_(root) { DCHECK(kEmitCompilerReadBarrier); } @@ -677,7 +1054,7 @@ class ReadBarrierForRootSlowPathARM : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, instruction_->GetDexPc(), this); @@ -698,8 +1075,8 @@ class ReadBarrierForRootSlowPathARM : public SlowPathCode { }; #undef __ -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT inline Condition ARMCondition(IfCondition cond) { switch (cond) { @@ -853,9 +1230,6 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { } void CodeGeneratorARM::SetupBlockedRegisters() const { - // Don't allocate the dalvik style register pair passing. - blocked_register_pairs_[R1_R2] = true; - // Stack register, LR and PC are always reserved. blocked_core_registers_[SP] = true; blocked_core_registers_[LR] = true; @@ -875,19 +1249,6 @@ void CodeGeneratorARM::SetupBlockedRegisters() const { blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; } } - - UpdateBlockedPairRegisters(); -} - -void CodeGeneratorARM::UpdateBlockedPairRegisters() const { - for (int i = 0; i < kNumberOfRegisterPairs; i++) { - ArmManagedRegister current = - ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); - if (blocked_core_registers_[current.AsRegisterPairLow()] - || blocked_core_registers_[current.AsRegisterPairHigh()]) { - blocked_register_pairs_[i] = true; - } - } } InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen) @@ -951,7 +1312,13 @@ void CodeGeneratorARM::GenerateFrameEntry() { int adjust = GetFrameSize() - FrameEntrySpillSize(); __ AddConstant(SP, -adjust); __ cfi().AdjustCFAOffset(adjust); - __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, 0); + + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, 0); + } } void CodeGeneratorARM::GenerateFrameExit() { @@ -1218,26 +1585,21 @@ void CodeGeneratorARM::InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { - InvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value(), - instruction, - dex_pc, - slow_path); -} - -void CodeGeneratorARM::InvokeRuntime(int32_t entry_point_offset, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path) { - ValidateInvokeRuntime(instruction, slow_path); - __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset); - __ blx(LR); - RecordPcInfo(instruction, dex_pc, slow_path); + ValidateInvokeRuntime(entrypoint, instruction, slow_path); + GenerateInvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value()); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } } void CodeGeneratorARM::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, HInstruction* instruction, SlowPathCode* slow_path) { ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + GenerateInvokeRuntime(entry_point_offset); +} + +void CodeGeneratorARM::GenerateInvokeRuntime(int32_t entry_point_offset) { __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset); __ blx(LR); } @@ -1548,13 +1910,14 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize); + SlowPathCodeARM* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -1854,9 +2217,7 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), - codegen_->GetAssembler(), - codegen_->GetInstructionSetFeatures()); + IntrinsicLocationsBuilderARM intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); @@ -1902,9 +2263,7 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { - IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), - codegen_->GetAssembler(), - codegen_->GetInstructionSetFeatures()); + IntrinsicLocationsBuilderARM intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; } @@ -2375,19 +2734,13 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimFloat: // Processing a Dex `float-to-long' instruction. - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pF2l), - conversion, - conversion->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc()); CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: // Processing a Dex `double-to-long' instruction. - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pD2l), - conversion, - conversion->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc()); CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; @@ -2434,10 +2787,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: // Processing a Dex `long-to-float' instruction. - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pL2f), - conversion, - conversion->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc()); CheckEntrypointTypes<kQuickL2f, float, int64_t>(); break; @@ -2523,7 +2873,7 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -2560,13 +2910,18 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { break; case Primitive::kPrimLong: { - DCHECK(second.IsRegisterPair()); - __ adds(out.AsRegisterPairLow<Register>(), - first.AsRegisterPairLow<Register>(), - ShifterOperand(second.AsRegisterPairLow<Register>())); - __ adc(out.AsRegisterPairHigh<Register>(), - first.AsRegisterPairHigh<Register>(), - ShifterOperand(second.AsRegisterPairHigh<Register>())); + if (second.IsConstant()) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); + GenerateAddLongConst(out, first, value); + } else { + DCHECK(second.IsRegisterPair()); + __ adds(out.AsRegisterPairLow<Register>(), + first.AsRegisterPairLow<Register>(), + ShifterOperand(second.AsRegisterPairLow<Register>())); + __ adc(out.AsRegisterPairHigh<Register>(), + first.AsRegisterPairHigh<Register>(), + ShifterOperand(second.AsRegisterPairHigh<Register>())); + } break; } @@ -2600,7 +2955,7 @@ void LocationsBuilderARM::VisitSub(HSub* sub) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -2636,13 +2991,18 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) { } case Primitive::kPrimLong: { - DCHECK(second.IsRegisterPair()); - __ subs(out.AsRegisterPairLow<Register>(), - first.AsRegisterPairLow<Register>(), - ShifterOperand(second.AsRegisterPairLow<Register>())); - __ sbc(out.AsRegisterPairHigh<Register>(), - first.AsRegisterPairHigh<Register>(), - ShifterOperand(second.AsRegisterPairHigh<Register>())); + if (second.IsConstant()) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); + GenerateAddLongConst(out, first, -value); + } else { + DCHECK(second.IsRegisterPair()); + __ subs(out.AsRegisterPairLow<Register>(), + first.AsRegisterPairLow<Register>(), + ShifterOperand(second.AsRegisterPairLow<Register>())); + __ sbc(out.AsRegisterPairHigh<Register>(), + first.AsRegisterPairHigh<Register>(), + ShifterOperand(second.AsRegisterPairHigh<Register>())); + } break; } @@ -2959,7 +3319,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(calling_convention.GetRegisterAt(1), second.AsRegister<Register>()); DCHECK_EQ(R0, out.AsRegister<Register>()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr); + codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc()); CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; @@ -2974,7 +3334,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R0, out.AsRegisterPairLow<Register>()); DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr); + codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc()); CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); break; } @@ -3103,26 +3463,26 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { DCHECK_EQ(calling_convention.GetRegisterAt(1), second.AsRegister<Register>()); DCHECK_EQ(R1, out.AsRegister<Register>()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr); + codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc()); CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; } case Primitive::kPrimLong: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr); + codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc()); CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); break; } case Primitive::kPrimFloat: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr); + codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc()); CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr); + codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc()); CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } @@ -3133,18 +3493,12 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { } void LocationsBuilderARM::VisitDivZeroCheck(HDivZeroCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void InstructionCodeGeneratorARM::VisitDivZeroCheck(HDivZeroCheck* instruction) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM(instruction); + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM(instruction); codegen_->AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); @@ -3562,10 +3916,7 @@ void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { __ blx(LR); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } } @@ -3585,10 +3936,7 @@ void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) { __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - codegen_->InvokeRuntime(instruction->GetEntrypoint(), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } @@ -3984,6 +4332,9 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI object_field_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); bool volatile_for_double = field_info.IsVolatile() @@ -4044,31 +4395,51 @@ bool LocationsBuilderARM::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) { uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst)); if (Primitive::Is64BitType(input_cst->GetType())) { - return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) && - CanEncodeConstantAsImmediate(High32Bits(value), opcode); + Opcode high_opcode = opcode; + SetCc low_set_cc = kCcDontCare; + switch (opcode) { + case SUB: + // Flip the operation to an ADD. + value = -value; + opcode = ADD; + FALLTHROUGH_INTENDED; + case ADD: + if (Low32Bits(value) == 0u) { + return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare); + } + high_opcode = ADC; + low_set_cc = kCcSet; + break; + default: + break; + } + return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) && + CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare); } else { return CanEncodeConstantAsImmediate(Low32Bits(value), opcode); } } -bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) { +bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, + Opcode opcode, + SetCc set_cc) { ShifterOperand so; ArmAssembler* assembler = codegen_->GetAssembler(); - if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) { + if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, set_cc, &so)) { return true; } Opcode neg_opcode = kNoOperand; switch (opcode) { - case AND: - neg_opcode = BIC; - break; - case ORR: - neg_opcode = ORN; - break; + case AND: neg_opcode = BIC; value = ~value; break; + case ORR: neg_opcode = ORN; value = ~value; break; + case ADD: neg_opcode = SUB; value = -value; break; + case ADC: neg_opcode = SBC; value = ~value; break; + case SUB: neg_opcode = ADD; value = -value; break; + case SBC: neg_opcode = ADC; value = ~value; break; default: return false; } - return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so); + return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so); } void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, @@ -4282,14 +4653,8 @@ void InstructionCodeGeneratorARM::VisitUnresolvedStaticFieldSet( } void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::RequiresRegister()); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void CodeGeneratorARM::GenerateImplicitNullCheck(HNullCheck* instruction) { @@ -4303,7 +4668,7 @@ void CodeGeneratorARM::GenerateImplicitNullCheck(HNullCheck* instruction) { } void CodeGeneratorARM::GenerateExplicitNullCheck(HNullCheck* instruction) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction); + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction); AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); @@ -4440,6 +4805,9 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { @@ -4454,7 +4822,9 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { } // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + // Also need for String compression feature. + if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) + || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { locations->AddTemp(Location::RequiresRegister()); } } @@ -4467,6 +4837,8 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location out_loc = locations->Out(); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); Primitive::Type type = instruction->GetType(); + const bool maybe_compressed_char_at = mirror::kUseStringCompression && + instruction->IsStringCharAt(); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. @@ -4480,10 +4852,31 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: { if (index.IsConstant()) { int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); - uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); + if (maybe_compressed_char_at) { + Register length = IP; + Label uncompressed_load, done; + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ LoadFromOffset(kLoadWord, length, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ cmp(length, ShifterOperand(0)); + __ b(&uncompressed_load, GE); + __ LoadFromOffset(kLoadUnsignedByte, + out_loc.AsRegister<Register>(), + obj, + data_offset + const_index); + __ b(&done); + __ Bind(&uncompressed_load); + __ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar), + out_loc.AsRegister<Register>(), + obj, + data_offset + (const_index << 1)); + __ Bind(&done); + } else { + uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); - LoadOperandType load_type = GetLoadOperandType(type); - __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset); + LoadOperandType load_type = GetLoadOperandType(type); + __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset); + } } else { Register temp = IP; @@ -4499,7 +4892,24 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } else { __ add(temp, obj, ShifterOperand(data_offset)); } - codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); + if (maybe_compressed_char_at) { + Label uncompressed_load, done; + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + Register length = locations->GetTemp(0).AsRegister<Register>(); + __ LoadFromOffset(kLoadWord, length, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ cmp(length, ShifterOperand(0)); + __ b(&uncompressed_load, GE); + __ ldrb(out_loc.AsRegister<Register>(), + Address(temp, index.AsRegister<Register>(), Shift::LSL, 0)); + __ b(&done); + __ Bind(&uncompressed_load); + __ ldrh(out_loc.AsRegister<Register>(), + Address(temp, index.AsRegister<Register>(), Shift::LSL, 1)); + __ Bind(&done); + } else { + codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); + } } break; } @@ -4599,7 +5009,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { if (type == Primitive::kPrimNot) { // Potential implicit null checks, in the case of reference // arrays, are handled in the previous switch statement. - } else { + } else if (!maybe_compressed_char_at) { codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -4610,12 +5020,10 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - bool object_array_set_with_read_barrier = - kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + may_need_runtime_call_for_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); @@ -4712,13 +5120,15 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { } DCHECK(needs_write_barrier); - Register temp1 = locations->GetTemp(0).AsRegister<Register>(); - Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Location temp2_loc = locations->GetTemp(1); + Register temp2 = temp2_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); Label done; - SlowPathCode* slow_path = nullptr; + SlowPathCodeARM* slow_path = nullptr; if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction); @@ -4743,63 +5153,42 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ Bind(&non_zero); } - if (kEmitCompilerReadBarrier) { - // When read barriers are enabled, the type checking - // instrumentation requires two read barriers: - // - // __ Mov(temp2, temp1); - // // /* HeapReference<Class> */ temp1 = temp1->component_type_ - // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - // codegen_->GenerateReadBarrierSlow( - // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset); - // - // // /* HeapReference<Class> */ temp2 = value->klass_ - // __ LoadFromOffset(kLoadWord, temp2, value, class_offset); - // codegen_->GenerateReadBarrierSlow( - // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc); - // - // __ cmp(temp1, ShifterOperand(temp2)); - // - // However, the second read barrier may trash `temp`, as it - // is a temporary register, and as such would not be saved - // along with live registers before calling the runtime (nor - // restored afterwards). So in this case, we bail out and - // delegate the work to the array set slow path. - // - // TODO: Extend the register allocator to support a new - // "(locally) live temp" location so as to avoid always - // going into the slow path when read barriers are enabled. - __ b(slow_path->GetEntryLabel()); - } else { - // /* HeapReference<Class> */ temp1 = array->klass_ - __ LoadFromOffset(kLoadWord, temp1, array, class_offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Note that when read barriers are enabled, the type checks + // are performed without read barriers. This is fine, even in + // the case where a class object is in the from-space after + // the flip, as a comparison involving such a type would not + // produce a false positive; it may of course produce a false + // negative, in which case we would take the ArraySet slow + // path. + + // /* HeapReference<Class> */ temp1 = array->klass_ + __ LoadFromOffset(kLoadWord, temp1, array, class_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + __ cmp(temp1, ShifterOperand(temp2)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + Label do_put; + __ b(&do_put, EQ); + // If heap poisoning is enabled, the `temp1` reference has + // not been unpoisoned yet; unpoison it now. __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - // /* HeapReference<Class> */ temp2 = value->klass_ - __ LoadFromOffset(kLoadWord, temp2, value, class_offset); - // If heap poisoning is enabled, no need to unpoison `temp1` - // nor `temp2`, as we are comparing two poisoned references. - __ cmp(temp1, ShifterOperand(temp2)); - - if (instruction->StaticTypeOfArrayIsObjectArray()) { - Label do_put; - __ b(&do_put, EQ); - // If heap poisoning is enabled, the `temp1` reference has - // not been unpoisoned yet; unpoison it now. - __ MaybeUnpoisonHeapReference(temp1); - - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); - // If heap poisoning is enabled, no need to unpoison - // `temp1`, as we are comparing against null below. - __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_put); - } else { - __ b(slow_path->GetEntryLabel(), NE); - } + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ b(slow_path->GetEntryLabel(), NE); } } @@ -4910,6 +5299,10 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadWord, out, obj, offset); codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out compression flag from String's array length. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ bic(out, out, ShifterOperand(1u << 31)); + } } void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) { @@ -4944,20 +5337,18 @@ void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* } void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) { LocationSummary* locations = instruction->GetLocations(); - SlowPathCode* slow_path = + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction); codegen_->AddSlowPath(slow_path); @@ -4994,7 +5385,9 @@ void InstructionCodeGeneratorARM::VisitParallelMove(HParallelMove* instruction) } void LocationsBuilderARM::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -5269,17 +5662,6 @@ void ParallelMoveResolverARM::RestoreScratch(int reg) { HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - if (kEmitCompilerReadBarrier) { - switch (desired_class_load_kind) { - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageAddress: - // TODO: Implement for read barrier. - return HLoadClass::LoadKind::kDexCacheViaMethod; - default: - break; - } - } switch (desired_class_load_kind) { case HLoadClass::LoadKind::kReferrersClass: break; @@ -5321,10 +5703,15 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { return; } - LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kDexCacheViaMethod || @@ -5338,10 +5725,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { LocationSummary* locations = cls->GetLocations(); if (cls->NeedsAccessCheck()) { codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), - cls, - cls->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc()); CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -5349,6 +5733,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); bool generate_null_check = false; switch (cls->GetLoadKind()) { case HLoadClass::LoadKind::kReferrersClass: { @@ -5356,18 +5741,21 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { DCHECK(!cls->MustGenerateClinitCheck()); // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ Register current_method = locations->InAt(0).AsRegister<Register>(); - GenerateGcRootFieldLoad( - cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + GenerateGcRootFieldLoad(cls, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + requires_read_barrier); break; } case HLoadClass::LoadKind::kBootImageLinkTimeAddress: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!requires_read_barrier); __ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), cls->GetTypeIndex())); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!requires_read_barrier); CodeGeneratorARM::PcRelativePatchInfo* labels = codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); __ BindTrackedLabel(&labels->movw_label); @@ -5379,7 +5767,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { break; } case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!requires_read_barrier); DCHECK_NE(cls->GetAddress(), 0u); uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address)); @@ -5399,7 +5787,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { uint32_t offset = address & MaxInt<uint32_t>(offset_bits); __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address)); // /* GcRoot<mirror::Class> */ out = *(base_address + offset) - GenerateGcRootFieldLoad(cls, out_loc, out, offset); + GenerateGcRootFieldLoad(cls, out_loc, out, offset, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; } @@ -5408,7 +5796,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase(); int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset(); // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset) - GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset); + GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; } @@ -5422,14 +5810,14 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); // /* GcRoot<mirror::Class> */ out = out[type_index] size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - GenerateGcRootFieldLoad(cls, out_loc, out, offset); + GenerateGcRootFieldLoad(cls, out_loc, out, offset, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); } } if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); codegen_->AddSlowPath(slow_path); if (generate_null_check) { @@ -5454,7 +5842,7 @@ void LocationsBuilderARM::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorARM::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( check->GetLoadClass(), check, check->GetDexPc(), true); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, @@ -5462,7 +5850,7 @@ void InstructionCodeGeneratorARM::VisitClinitCheck(HClinitCheck* check) { } void InstructionCodeGeneratorARM::GenerateClassInitializationCheck( - SlowPathCode* slow_path, Register class_reg) { + SlowPathCodeARM* slow_path, Register class_reg) { __ LoadFromOffset(kLoadWord, IP, class_reg, mirror::Class::StatusOffset().Int32Value()); __ cmp(IP, ShifterOperand(mirror::Class::kStatusInitialized)); __ b(slow_path->GetEntryLabel(), LT); @@ -5474,17 +5862,6 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck( HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { - if (kEmitCompilerReadBarrier) { - switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageAddress: - // TODO: Implement for read barrier. - return HLoadString::LoadKind::kDexCacheViaMethod; - default: - break; - } - } switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: DCHECK(!GetCompilerOptions().GetCompilePic()); @@ -5497,15 +5874,8 @@ HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind( case HLoadString::LoadKind::kDexCacheAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); - // We disable pc-relative load when there is an irreducible loop, as the optimization - // is incompatible with it. - // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods - // with irreducible loops. - if (GetGraph()->HasIrreducibleLoops()) { - return HLoadString::LoadKind::kDexCacheViaMethod; - } break; case HLoadString::LoadKind::kDexCacheViaMethod: break; @@ -5514,32 +5884,51 @@ HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind( } void LocationsBuilderARM::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath + LocationSummary::CallKind call_kind = load->NeedsEnvironment() + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + HLoadString::LoadKind load_kind = load->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod || - load_kind == HLoadString::LoadKind::kDexCachePcRelative) { - locations->SetInAt(0, Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + locations->SetOut(Location::RegisterLocation(R0)); + } else { + locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything, including temps. + // Note that IP may theoretically be clobbered by saving/restoring the live register + // (only one thanks to the custom calling convention), so we request a different temp. + locations->AddTemp(Location::RequiresRegister()); + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() + // that the the kPrimNot result register is the same as the first argument register. + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } - locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); + HLoadString::LoadKind load_kind = load->GetLoadKind(); - switch (load->GetLoadKind()) { + switch (load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: { - DCHECK(!kEmitCompilerReadBarrier); __ LoadLiteral(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), load->GetStringIndex())); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARM::PcRelativePatchInfo* labels = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); __ BindTrackedLabel(&labels->movw_label); @@ -5551,61 +5940,40 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); DCHECK_NE(load->GetAddress(), 0u); uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); - // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives - // a 128B range. To try and reduce the number of literals if we load multiple strings, - // simply split the dex cache address to a 128B aligned base loaded from a literal - // and the remaining offset embedded in the load. - static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes."); - DCHECK_ALIGNED(load->GetAddress(), 4u); - constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2; - uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits); - uint32_t offset = address & MaxInt<uint32_t>(offset_bits); - __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address)); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) - GenerateGcRootFieldLoad(load, out_loc, out, offset); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - Register base_reg = locations->InAt(0).AsRegister<Register>(); - HArmDexCacheArraysBase* base = load->InputAt(0)->AsArmDexCacheArraysBase(); - int32_t offset = load->GetDexCacheElementOffset() - base->GetElementOffset(); - // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset) - GenerateGcRootFieldLoad(load, out_loc, base_reg, offset); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - Register current_method = locations->InAt(0).AsRegister<Register>(); - - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - break; + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + CodeGeneratorARM::PcRelativePatchInfo* labels = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + __ BindTrackedLabel(&labels->movw_label); + __ movw(temp, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(temp, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(temp, temp, ShifterOperand(PC)); + GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kEmitCompilerReadBarrier); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); + codegen_->AddSlowPath(slow_path); + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); - codegen_->AddSlowPath(slow_path); - __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Consider re-adding the compiler code to do string dex cache lookup again. + DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); + __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex()); + codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); } static int32_t GetExceptionTlsOffset() { @@ -5640,8 +6008,7 @@ void LocationsBuilderARM::VisitThrow(HThrow* instruction) { } void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) { - codegen_->InvokeRuntime( - QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr); + codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } @@ -5656,6 +6023,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -5663,6 +6031,7 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -5672,6 +6041,9 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // The "out" register is used as a temporary, so it overlaps with the inputs. @@ -5700,7 +6072,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); Label done, zero; - SlowPathCode* slow_path = nullptr; + SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. // avoid null check if we know obj is not null. @@ -5892,7 +6264,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { type_check_kind == TypeCheckKind::kClassHierarchyCheck || type_check_kind == TypeCheckKind::kArrayObjectCheck) && !instruction->CanThrowIntoCatchBlock(); - SlowPathCode* type_check_slow_path = + SlowPathCodeARM* type_check_slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, is_type_check_slow_path_fatal); codegen_->AddSlowPath(type_check_slow_path); @@ -6041,11 +6413,9 @@ void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) } void InstructionCodeGeneratorARM::VisitMonitorOperation(HMonitorOperation* instruction) { - codegen_->InvokeRuntime(instruction->IsEnter() - ? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, + instruction, + instruction->GetDexPc()); if (instruction->IsEnter()) { CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); } else { @@ -6198,6 +6568,34 @@ void InstructionCodeGeneratorARM::GenerateEorConst(Register out, Register first, __ eor(out, first, ShifterOperand(value)); } +void InstructionCodeGeneratorARM::GenerateAddLongConst(Location out, + Location first, + uint64_t value) { + Register out_low = out.AsRegisterPairLow<Register>(); + Register out_high = out.AsRegisterPairHigh<Register>(); + Register first_low = first.AsRegisterPairLow<Register>(); + Register first_high = first.AsRegisterPairHigh<Register>(); + uint32_t value_low = Low32Bits(value); + uint32_t value_high = High32Bits(value); + if (value_low == 0u) { + if (out_low != first_low) { + __ mov(out_low, ShifterOperand(first_low)); + } + __ AddConstant(out_high, first_high, value_high); + return; + } + __ AddConstantSetFlags(out_low, first_low, value_low); + ShifterOperand so; + if (__ ShifterOperandCanHold(out_high, first_high, ADC, value_high, kCcDontCare, &so)) { + __ adc(out_high, first_high, so); + } else if (__ ShifterOperandCanHold(out_low, first_low, SBC, ~value_high, kCcDontCare, &so)) { + __ sbc(out_high, first_high, so); + } else { + LOG(FATAL) << "Unexpected constant " << value_high; + UNREACHABLE(); + } +} + void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) { LocationSummary* locations = instruction->GetLocations(); Location first = locations->InAt(0); @@ -6335,9 +6733,11 @@ void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruction, Location root, Register obj, - uint32_t offset) { + uint32_t offset, + bool requires_read_barrier) { Register root_reg = root.AsRegister<Register>(); - if (kEmitCompilerReadBarrier) { + if (requires_read_barrier) { + DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: @@ -6357,8 +6757,8 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. - SlowPathCode* slow_path = + // Slow path marking the GC root `root`. + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root); codegen_->AddSlowPath(slow_path); @@ -6427,7 +6827,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + Register* temp2) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6466,13 +6868,15 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Introduce a dependency on the lock_word including the rb_state, // which shall prevent load-load reordering without using // a memory barrier (which would be more expensive). - // obj is unchanged by this operation, but its value now depends on temp_reg. + // `obj` is unchanged by this operation, but its value now depends + // on `temp_reg`. __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32)); // The actual reference load. if (index.IsValid()) { - // Load types involving an "index": ArrayGet and - // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) if (index.IsConstant()) { size_t computed_offset = @@ -6480,9 +6884,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); } else { // Handle the special case of the - // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use - // a register pair as index ("long offset"), of which only the low - // part contains data. + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics, which use a register pair as index ("long + // offset"), of which only the low part contains data. Register index_reg = index.IsRegisterPair() ? index.AsRegisterPairLow<Register>() : index.AsRegister<Register>(); @@ -6497,9 +6901,22 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); + // Slow path marking the object `ref` when it is gray. + SlowPathCodeARM* slow_path; + if (always_update_field) { + DCHECK(temp2 != nullptr); + // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register pair (of which only the lower half + // is used). Thus `offset` and `scale_factor` above are expected + // to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM( + instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); + } AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) @@ -6534,7 +6951,7 @@ void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction, // not used by the artReadBarrierSlow entry point. // // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); @@ -6569,7 +6986,7 @@ void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction, // // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. - SlowPathCode* slow_path = + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root); AddSlowPath(slow_path); @@ -6579,7 +6996,7 @@ void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction, HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method) { + HInvokeStaticOrDirect* invoke) { HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; // We disable pc-relative load when there is an irreducible loop, as the optimization // is incompatible with it. @@ -6593,7 +7010,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOr if (dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) { const DexFile& outer_dex_file = GetGraph()->GetDexFile(); - if (&outer_dex_file != target_method.dex_file) { + if (&outer_dex_file != invoke->GetTargetMethod().dex_file) { // Calls across dex files are more likely to exceed the available BL range, // so use absolute patch with fixup if available and kCallArtMethod otherwise. HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = @@ -6655,10 +7072,13 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { - case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: + case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { + uint32_t offset = + GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); // temp = thread->string_init_entrypoint - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset()); + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, offset); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; @@ -6708,7 +7128,8 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ bl(GetFrameEntryLabel()); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); __ BindTrackedLabel(&relative_call_patches_.back().label); // Arbitrarily branch to the BL itself, override at link time. __ bl(&relative_call_patches_.back().label); @@ -6810,17 +7231,37 @@ Literal* CodeGeneratorARM::DeduplicateDexCacheAddressLiteral(uint32_t address) { return DeduplicateUint32Literal(address, &uint32_literals_); } +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorARM::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + const DexFile& dex_file = info.target_dex_file; + size_t offset_or_index = info.offset_or_index; + DCHECK(info.add_pc_label.IsBound()); + uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); + // Add MOVW patch. + DCHECK(info.movw_label.IsBound()); + uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); + linker_patches->push_back(Factory(movw_offset, &dex_file, add_pc_offset, offset_or_index)); + // Add MOVT patch. + DCHECK(info.movt_label.IsBound()); + uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); + linker_patches->push_back(Factory(movt_offset, &dex_file, add_pc_offset, offset_or_index)); + } +} + void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size() + - /* MOVW+MOVT for each base */ 2u * pc_relative_dex_cache_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + boot_image_string_patches_.size() + - /* MOVW+MOVT for each base */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + boot_image_type_patches_.size() + - /* MOVW+MOVT for each base */ 2u * pc_relative_type_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + boot_image_address_patches_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { @@ -6841,32 +7282,13 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_method.dex_file, target_method.dex_method_index)); } - for (const MethodPatchInfo<Label>& info : relative_call_patches_) { + for (const PatchInfo<Label>& info : relative_call_patches_) { uint32_t literal_offset = info.label.Position(); - linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); - } - for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t base_element_offset = info.offset_or_index; - DCHECK(info.add_pc_label.IsBound()); - uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); - // Add MOVW patch. - DCHECK(info.movw_label.IsBound()); - uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset, - &dex_file, - add_pc_offset, - base_element_offset)); - // Add MOVT patch. - DCHECK(info.movt_label.IsBound()); - uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset, - &dex_file, - add_pc_offset, - base_element_offset)); + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); } + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); for (const auto& entry : boot_image_string_patches_) { const StringReference& target_string = entry.first; Literal* literal = entry.second; @@ -6876,25 +7298,12 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_string.dex_file, target_string.string_index)); } - for (const PcRelativePatchInfo& info : pc_relative_string_patches_) { - const DexFile& dex_file = info.target_dex_file; - uint32_t string_index = info.offset_or_index; - DCHECK(info.add_pc_label.IsBound()); - uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); - // Add MOVW patch. - DCHECK(info.movw_label.IsBound()); - uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeStringPatch(movw_offset, - &dex_file, - add_pc_offset, - string_index)); - // Add MOVT patch. - DCHECK(info.movt_label.IsBound()); - uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeStringPatch(movt_offset, - &dex_file, - add_pc_offset, - string_index)); + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); } for (const auto& entry : boot_image_type_patches_) { const TypeReference& target_type = entry.first; @@ -6905,26 +7314,8 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_type.dex_file, target_type.type_index)); } - for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { - const DexFile& dex_file = info.target_dex_file; - uint32_t type_index = info.offset_or_index; - DCHECK(info.add_pc_label.IsBound()); - uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); - // Add MOVW patch. - DCHECK(info.movw_label.IsBound()); - uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeTypePatch(movw_offset, - &dex_file, - add_pc_offset, - type_index)); - // Add MOVT patch. - DCHECK(info.movt_label.IsBound()); - uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeTypePatch(movt_offset, - &dex_file, - add_pc_offset, - type_index)); - } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); for (const auto& entry : boot_image_address_patches_) { DCHECK(GetCompilerOptions().GetIncludePatchInformation()); Literal* literal = entry.second; diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index fa7709b9a3..3d46aab31f 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -50,6 +50,18 @@ static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1, S2, S3 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +class SlowPathCodeARM : public SlowPathCode { + public: + explicit SlowPathCodeARM(HInstruction* instruction) : SlowPathCode(instruction) {} + + void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) FINAL; + void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) FINAL; + + private: + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM); +}; + + class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> { public: InvokeRuntimeCallingConvention() @@ -63,9 +75,9 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegis DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); }; -static constexpr DRegister FromLowSToD(SRegister reg) { - return DCHECK_CONSTEXPR(reg % 2 == 0, , D0) - static_cast<DRegister>(reg / 2); +constexpr DRegister FromLowSToD(SRegister reg) { + DCHECK_EQ(reg % 2, 0); + return static_cast<DRegister>(reg / 2); } @@ -183,7 +195,7 @@ class LocationsBuilderARM : public HGraphVisitor { Location ArithmeticZeroOrFpuRegister(HInstruction* input); Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode); bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode); - bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode); + bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare); CodeGeneratorARM* const codegen_; InvokeDexCallingConventionVisitorARM parameter_visitor_; @@ -216,10 +228,11 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { // is the block to branch to if the suspend check is not needed, and after // the suspend call. void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); - void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); + void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg); void GenerateAndConst(Register out, Register first, uint32_t value); void GenerateOrrConst(Register out, Register first, uint32_t value); void GenerateEorConst(Register out, Register first, uint32_t value); + void GenerateAddLongConst(Location out, Location first, uint64_t value); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleCondition(HCondition* condition); void HandleIntegerRotate(LocationSummary* locations); @@ -270,11 +283,12 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { // // root <- *(obj + offset) // - // while honoring read barriers (if any). + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, Register obj, - uint32_t offset); + uint32_t offset, + bool requires_read_barrier); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Label* true_target, @@ -351,9 +365,6 @@ class CodeGeneratorARM : public CodeGenerator { void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; - // Blocks all register pairs made out of blocked core registers. - void UpdateBlockedPairRegisters() const; - ParallelMoveResolverARM* GetMoveResolver() OVERRIDE { return &move_resolver_; } @@ -389,12 +400,7 @@ class CodeGeneratorARM : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path) OVERRIDE; - - void InvokeRuntime(int32_t offset, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path); + SlowPathCode* slow_path = nullptr) OVERRIDE; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -402,6 +408,8 @@ class CodeGeneratorARM : public CodeGenerator { HInstruction* instruction, SlowPathCode* slow_path); + void GenerateInvokeRuntime(int32_t entry_point_offset); + // Emit a write barrier. void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null); @@ -443,7 +451,7 @@ class CodeGeneratorARM : public CodeGenerator { // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method) OVERRIDE; + HInvokeStaticOrDirect* invoke) OVERRIDE; void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; @@ -500,6 +508,18 @@ class CodeGeneratorARM : public CodeGenerator { bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. + + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires an extra temporary register, which must be provided as a + // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -507,7 +527,9 @@ class CodeGeneratorARM : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + Register* temp2 = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -557,10 +579,10 @@ class CodeGeneratorARM : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); - void GenerateNop(); + void GenerateNop() OVERRIDE; - void GenerateImplicitNullCheck(HNullCheck* instruction); - void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); @@ -582,6 +604,10 @@ class CodeGeneratorARM : public CodeGenerator { uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches); + // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. Label frame_entry_label_; @@ -598,12 +624,12 @@ class CodeGeneratorARM : public CodeGenerator { MethodToLiteralMap call_patches_; // Relative call patch info. // Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative patch info for each HArmDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. BootStringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info. + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Deduplication map for boot type literals for kBootImageLinkTimeAddress. BootTypeToLiteralMap boot_image_type_patches_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index f3a09fd09f..b53750966d 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -46,16 +46,20 @@ class GcRoot; namespace arm64 { +using helpers::ARM64EncodableConstantOrRegister; +using helpers::ArtVixlRegCodeCoherentForRegSet; using helpers::CPURegisterFrom; using helpers::DRegisterFrom; using helpers::FPRegisterFrom; using helpers::HeapOperand; using helpers::HeapOperandFrom; using helpers::InputCPURegisterAt; +using helpers::InputCPURegisterOrZeroRegAt; using helpers::InputFPRegisterAt; -using helpers::InputRegisterAt; using helpers::InputOperandAt; +using helpers::InputRegisterAt; using helpers::Int64ConstantFrom; +using helpers::IsConstantZeroBitPattern; using helpers::LocationFrom; using helpers::OperandFromMemOperand; using helpers::OutputCPURegister; @@ -66,8 +70,6 @@ using helpers::StackOperandFrom; using helpers::VIXLRegCodeFromART; using helpers::WRegisterFrom; using helpers::XRegisterFrom; -using helpers::ARM64EncodableConstantOrRegister; -using helpers::ArtVixlRegCodeCoherentForRegSet; static constexpr int kCurrentMethodStackOffset = 0; // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump @@ -131,24 +133,24 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type retur return ARM64ReturnLocation(return_type); } -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value() // Calculate memory accessing operand for save/restore live registers. static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, - RegisterSet* register_set, + LocationSummary* locations, int64_t spill_offset, bool is_save) { - DCHECK(ArtVixlRegCodeCoherentForRegSet(register_set->GetCoreRegisters(), + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills, codegen->GetNumberOfCoreRegisters(), - register_set->GetFloatingPointRegisters(), + fp_spills, codegen->GetNumberOfFloatingPointRegisters())); - CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, - register_set->GetCoreRegisters() & (~callee_saved_core_registers.GetList())); - CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize, - register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.GetList())); + CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills); + CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize, fp_spills); MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler(); UseScratchRegisterScope temps(masm); @@ -182,38 +184,35 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, } void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* register_set = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { - if (!codegen->IsCoreCalleeSaveRegister(i) && register_set->ContainsCoreRegister(i)) { - // If the register holds an object, update the stack mask. - if (locations->RegisterContainsObject(i)) { - locations->SetStackBit(stack_offset / kVRegSize); - } - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - saved_core_stack_offsets_[i] = stack_offset; - stack_offset += kXRegSizeInBytes; + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); } + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_core_stack_offsets_[i] = stack_offset; + stack_offset += kXRegSizeInBytes; } - for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (!codegen->IsFloatingPointCalleeSaveRegister(i) && - register_set->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - saved_fpu_stack_offsets_[i] = stack_offset; - stack_offset += kDRegSizeInBytes; - } + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + for (uint32_t i : LowToHighBits(fp_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_fpu_stack_offsets_[i] = stack_offset; + stack_offset += kDRegSizeInBytes; } - SaveRestoreLiveRegistersHelper(codegen, register_set, + SaveRestoreLiveRegistersHelper(codegen, + locations, codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */); } void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* register_set = locations->GetLiveRegisters(); - SaveRestoreLiveRegistersHelper(codegen, register_set, + SaveRestoreLiveRegistersHelper(codegen, + locations, codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */); } @@ -236,10 +235,10 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { codegen->EmitParallelMoves( locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt, locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() - ? QUICK_ENTRY_POINT(pThrowStringBounds) - : QUICK_ENTRY_POINT(pThrowArrayBounds); - arm64_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this); + QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() + ? kQuickThrowStringBounds + : kQuickThrowArrayBounds; + arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -259,12 +258,7 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->CanThrowIntoCatchBlock()) { - // Live registers will be restored in the catch block if caught. - SaveLiveRegisters(codegen, instruction_->GetLocations()); - } - arm64_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); + arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } @@ -295,9 +289,9 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { InvokeRuntimeCallingConvention calling_convention; __ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex()); - int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) - : QUICK_ENTRY_POINT(pInitializeType); - arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); + QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage + : kQuickInitializeType; + arm64_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this); if (do_clinit_) { CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); } else { @@ -337,32 +331,60 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {} + LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label) + : SlowPathCodeARM64(instruction), + temp_(temp), + adrp_label_(adrp_label) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + // temp_ is a scratch register. Make sure it's not used for saving/restoring registers. + UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler()); + temps.Exclude(temp_); + __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); __ Mov(calling_convention.GetRegisterAt(0).W(), string_index); - arm64_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); + arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); Primitive::Type type = instruction_->GetType(); arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile(); + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // The string entry page address was preserved in temp_ thanks to kSaveEverything. + } else { + // For non-Baker read barrier, we need to re-calculate the address of the string entry page. + adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index); + arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_); + } + vixl::aarch64::Label* strp_label = + arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_); + { + SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler()); + __ Bind(strp_label); + __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot), + MemOperand(temp_, /* offset placeholder */ 0)); + } + __ B(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } private: + const Register temp_; + vixl::aarch64::Label* adrp_label_; + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); }; @@ -377,8 +399,10 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - arm64_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); + arm64_codegen->InvokeRuntime(kQuickThrowNullPointer, + instruction_, + instruction_->GetDexPc(), + this); CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } @@ -398,11 +422,8 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - arm64_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); + arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ B(GetReturnLabel()); } else { @@ -460,8 +481,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { object_class, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { - arm64_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this); + arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); @@ -469,7 +489,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); } else { DCHECK(instruction_->IsCheckCast()); - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); + arm64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } @@ -480,7 +500,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { } const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; } - bool IsFatal() const { return is_fatal_; } + bool IsFatal() const OVERRIDE { return is_fatal_; } private: const bool is_fatal_; @@ -496,11 +516,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - instruction_, - instruction_->GetDexPc(), - this); + arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } @@ -539,10 +555,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction_, - instruction_->GetDexPc(), - this); + arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); @@ -576,11 +589,21 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { public: - ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj) - : SlowPathCodeARM64(instruction), obj_(obj) { + ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location ref) + : SlowPathCodeARM64(instruction), ref_(ref) { DCHECK(kEmitCompilerReadBarrier); } @@ -589,15 +612,18 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg())); + DCHECK(ref_.IsRegister()) << ref_; + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || + instruction_->IsArraySet() || instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -606,37 +632,207 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - DCHECK_NE(obj_.reg(), LR); - DCHECK_NE(obj_.reg(), WSP); - DCHECK_NE(obj_.reg(), WZR); - DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg(); + DCHECK_NE(ref_.reg(), LR); + DCHECK_NE(ref_.reg(), WSP); + DCHECK_NE(ref_.reg(), WZR); + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_.reg(), IP0); + DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in W0): // - // W0 <- obj + // W0 <- ref // W0 <- ReadBarrierMark(W0) - // obj <- W0 + // ref <- W0 // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(obj_.reg()); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); // This runtime call does not require a stack map. arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ B(GetExitLabel()); } private: - const Location obj_; + // The location (register) of the marked object reference. + const Location ref_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathARM64 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp) + : SlowPathCodeARM64(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp_(temp) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathARM64"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = WRegisterFrom(ref_); + DCHECK(locations->CanCall()); + DCHECK(ref_.IsRegister()) << ref_; + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegister()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use IP to save the old reference, as IP is + // used internally by the ReadBarrierMarkRegX entry point, and we + // need the old reference after the call to that entry point. + DCHECK_NE(LocationFrom(temp_).reg(), IP0); + __ Mov(temp_.W(), ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + DCHECK_NE(ref_.reg(), LR); + DCHECK_NE(ref_.reg(), WSP); + DCHECK_NE(ref_.reg(), WZR); + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_.reg(), IP0); + DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in W0): + // + // W0 <- ref + // W0 <- ReadBarrierMark(W0) + // ref <- W0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); + // This runtime call does not require a stack map. + arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LDXR/CMP/BNE sequence of instructions in the compare-and-set + // (CAS) operation below would abort the CAS, leaving the field + // as-is. + vixl::aarch64::Label done; + __ Cmp(temp_.W(), ref_reg); + __ B(eq, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + MacroAssembler* masm = arm64_codegen->GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + + // Convenience aliases. + Register base = obj_.W(); + Register offset = XRegisterFrom(field_offset_); + Register expected = temp_.W(); + Register value = ref_reg; + Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. + Register tmp_value = temps.AcquireW(); // Value in memory. + + __ Add(tmp_ptr, base.X(), Operand(offset)); + + if (kPoisonHeapReferences) { + arm64_codegen->GetAssembler()->PoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. + } else { + arm64_codegen->GetAssembler()->PoisonHeapReference(value); + } + } + + // do { + // tmp_value = [tmp_ptr] - expected; + // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); + + vixl::aarch64::Label loop_head, comparison_failed, exit_loop; + __ Bind(&loop_head); + __ Ldxr(tmp_value, MemOperand(tmp_ptr)); + __ Cmp(tmp_value, expected); + __ B(&comparison_failed, ne); + __ Stxr(tmp_value, value, MemOperand(tmp_ptr)); + __ Cbnz(tmp_value, &loop_head); + __ B(&exit_loop); + __ Bind(&comparison_failed); + __ Clrex(); + __ Bind(&exit_loop); + + if (kPoisonHeapReferences) { + arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. + } else { + arm64_codegen->GetAssembler()->UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ B(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const Register temp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { public: @@ -750,7 +946,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) << instruction_->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset_, 0U); + DCHECK_EQ(offset_, 0u); DCHECK(index_.IsRegister()); } } @@ -777,7 +973,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); } - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this); @@ -856,7 +1052,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { // which would emit a 32-bit move, as `type` is a (32-bit wide) // reference type (`Primitive::kPrimNot`). __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, instruction_->GetDexPc(), this); @@ -1038,7 +1234,15 @@ void CodeGeneratorARM64::GenerateFrameEntry() { // ... : other preserved fp registers. // ... : reserved frame space. // sp[0] : current method. - __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); + + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); + } else { + __ Claim(frame_size); + } GetAssembler()->cfi().AdjustCFAOffset(frame_size); GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); @@ -1463,12 +1667,18 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type, break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - DCHECK(src.IsFPRegister()); DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type)); + Register temp_src; + if (src.IsZero()) { + // The zero register is used to avoid synthesizing zero constants. + temp_src = Register(src); + } else { + DCHECK(src.IsFPRegister()); + temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); + __ Fmov(temp_src, FPRegister(src)); + } - Register temp = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); - __ Fmov(temp, FPRegister(src)); - __ Stlr(temp, base); + __ Stlr(temp_src, base); break; } case Primitive::kPrimVoid: @@ -1480,27 +1690,21 @@ void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { - InvokeRuntime(GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value(), - instruction, - dex_pc, - slow_path); -} - -void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path) { - ValidateInvokeRuntime(instruction, slow_path); - BlockPoolsScope block_pools(GetVIXLAssembler()); - __ Ldr(lr, MemOperand(tr, entry_point_offset)); - __ Blr(lr); - RecordPcInfo(instruction, dex_pc, slow_path); + ValidateInvokeRuntime(entrypoint, instruction, slow_path); + GenerateInvokeRuntime(GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value()); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } } void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, HInstruction* instruction, SlowPathCode* slow_path) { ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + GenerateInvokeRuntime(entry_point_offset); +} + +void CodeGeneratorARM64::GenerateInvokeRuntime(int32_t entry_point_offset) { BlockPoolsScope block_pools(GetVIXLAssembler()); __ Ldr(lr, MemOperand(tr, entry_point_offset)); __ Blr(lr); @@ -1641,6 +1845,9 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { object_field_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); @@ -1707,7 +1914,9 @@ void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { + if (IsConstantZeroBitPattern(instruction->InputAt(1))) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + } else if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { locations->SetInAt(1, Location::RequiresFpuRegister()); } else { locations->SetInAt(1, Location::RequiresRegister()); @@ -1721,7 +1930,7 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, BlockPoolsScope block_pools(GetVIXLAssembler()); Register obj = InputRegisterAt(instruction, 0); - CPURegister value = InputCPURegisterAt(instruction, 1); + CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1); CPURegister source = value; Offset offset = field_info.GetFieldOffset(); Primitive::Type field_type = field_info.GetFieldType(); @@ -2064,6 +2273,9 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { @@ -2085,7 +2297,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { Location index = locations->InAt(1); Location out = locations->Out(); uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); - + const bool maybe_compressed_char_at = mirror::kUseStringCompression && + instruction->IsStringCharAt(); MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); // Block pools between `Load` and `MaybeRecordImplicitNullCheck`. @@ -2103,9 +2316,28 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } else { // General case. MemOperand source = HeapOperand(obj); + Register length; + if (maybe_compressed_char_at) { + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + length = temps.AcquireW(); + __ Ldr(length, HeapOperand(obj, count_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); - source = HeapOperand(obj, offset); + if (maybe_compressed_char_at) { + vixl::aarch64::Label uncompressed_load, done; + __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load); + __ Ldrb(Register(OutputCPURegister(instruction)), + HeapOperand(obj, offset + Int64ConstantFrom(index))); + __ B(&done); + __ Bind(&uncompressed_load); + __ Ldrh(Register(OutputCPURegister(instruction)), + HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1))); + __ Bind(&done); + } else { + offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); + source = HeapOperand(obj, offset); + } } else { Register temp = temps.AcquireSameSizeAs(obj); if (instruction->GetArray()->IsIntermediateAddress()) { @@ -2123,11 +2355,24 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } else { __ Add(temp, obj, offset); } - source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); + if (maybe_compressed_char_at) { + vixl::aarch64::Label uncompressed_load, done; + __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load); + __ Ldrb(Register(OutputCPURegister(instruction)), + HeapOperand(temp, XRegisterFrom(index), LSL, 0)); + __ B(&done); + __ Bind(&uncompressed_load); + __ Ldrh(Register(OutputCPURegister(instruction)), + HeapOperand(temp, XRegisterFrom(index), LSL, 1)); + __ Bind(&done); + } else { + source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); + } + } + if (!maybe_compressed_char_at) { + codegen_->Load(type, OutputCPURegister(instruction), source); + codegen_->MaybeRecordImplicitNullCheck(instruction); } - - codegen_->Load(type, OutputCPURegister(instruction), source); - codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { static_assert( @@ -2151,25 +2396,30 @@ void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) { uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); + vixl::aarch64::Register out = OutputRegister(instruction); BlockPoolsScope block_pools(GetVIXLAssembler()); - __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), offset)); + __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out compression flag from String's array length. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ And(out.W(), out.W(), Operand(static_cast<int32_t>(INT32_MAX))); + } } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - bool object_array_set_with_read_barrier = - kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + may_need_runtime_call_for_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(value_type)) { + if (IsConstantZeroBitPattern(instruction->InputAt(2))) { + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + } else if (Primitive::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { locations->SetInAt(2, Location::RequiresRegister()); @@ -2184,7 +2434,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); Register array = InputRegisterAt(instruction, 0); - CPURegister value = InputCPURegisterAt(instruction, 2); + CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2); CPURegister source = value; Location index = locations->InAt(1); size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); @@ -2223,7 +2473,6 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { codegen_->Store(value_type, value, destination); codegen_->MaybeRecordImplicitNullCheck(instruction); } else { - DCHECK(needs_write_barrier); DCHECK(!instruction->GetArray()->IsIntermediateAddress()); vixl::aarch64::Label done; SlowPathCodeARM64* slow_path = nullptr; @@ -2261,65 +2510,44 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { __ Bind(&non_zero); } - if (kEmitCompilerReadBarrier) { - // When read barriers are enabled, the type checking - // instrumentation requires two read barriers: - // - // __ Mov(temp2, temp); - // // /* HeapReference<Class> */ temp = temp->component_type_ - // __ Ldr(temp, HeapOperand(temp, component_offset)); - // codegen_->GenerateReadBarrierSlow( - // instruction, temp_loc, temp_loc, temp2_loc, component_offset); - // - // // /* HeapReference<Class> */ temp2 = value->klass_ - // __ Ldr(temp2, HeapOperand(Register(value), class_offset)); - // codegen_->GenerateReadBarrierSlow( - // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc); - // - // __ Cmp(temp, temp2); - // - // However, the second read barrier may trash `temp`, as it - // is a temporary register, and as such would not be saved - // along with live registers before calling the runtime (nor - // restored afterwards). So in this case, we bail out and - // delegate the work to the array set slow path. - // - // TODO: Extend the register allocator to support a new - // "(locally) live temp" location so as to avoid always - // going into the slow path when read barriers are enabled. - __ B(slow_path->GetEntryLabel()); - } else { - Register temp2 = temps.AcquireSameSizeAs(array); - // /* HeapReference<Class> */ temp = array->klass_ - __ Ldr(temp, HeapOperand(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Note that when Baker read barriers are enabled, the type + // checks are performed without read barriers. This is fine, + // even in the case where a class object is in the from-space + // after the flip, as a comparison involving such a type would + // not produce a false positive; it may of course produce a + // false negative, in which case we would take the ArraySet + // slow path. + + Register temp2 = temps.AcquireSameSizeAs(array); + // /* HeapReference<Class> */ temp = array->klass_ + __ Ldr(temp, HeapOperand(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ Ldr(temp, HeapOperand(temp, component_offset)); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor `temp2`, as we are comparing two poisoned references. + __ Cmp(temp, temp2); + temps.Release(temp2); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + vixl::aarch64::Label do_put; + __ B(eq, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. GetAssembler()->MaybeUnpoisonHeapReference(temp); - // /* HeapReference<Class> */ temp = temp->component_type_ - __ Ldr(temp, HeapOperand(temp, component_offset)); - // /* HeapReference<Class> */ temp2 = value->klass_ - __ Ldr(temp2, HeapOperand(Register(value), class_offset)); - // If heap poisoning is enabled, no need to unpoison `temp` - // nor `temp2`, as we are comparing two poisoned references. - __ Cmp(temp, temp2); - - if (instruction->StaticTypeOfArrayIsObjectArray()) { - vixl::aarch64::Label do_put; - __ B(eq, &do_put); - // If heap poisoning is enabled, the `temp` reference has - // not been unpoisoned yet; unpoison it now. - GetAssembler()->MaybeUnpoisonHeapReference(temp); - - // /* HeapReference<Class> */ temp = temp->super_class_ - __ Ldr(temp, HeapOperand(temp, super_offset)); - // If heap poisoning is enabled, no need to unpoison - // `temp`, as we are comparing against null below. - __ Cbnz(temp, slow_path->GetEntryLabel()); - __ Bind(&do_put); - } else { - __ B(ne, slow_path->GetEntryLabel()); - } - temps.Release(temp2); + // /* HeapReference<Class> */ temp = temp->super_class_ + __ Ldr(temp, HeapOperand(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ B(ne, slow_path->GetEntryLabel()); } } @@ -2354,22 +2582,19 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode())); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) { BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); - __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1)); __ B(slow_path->GetEntryLabel(), hs); } @@ -2733,14 +2958,8 @@ void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) { } void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { @@ -2972,6 +3191,7 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -3105,6 +3325,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -3112,6 +3333,7 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -3121,6 +3343,9 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // The "out" register is used as a temporary, so it overlaps with the inputs. @@ -3601,7 +3826,7 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method ATTRIBUTE_UNUSED) { + HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { // On ARM64 we support all dispatch types. return desired_dispatch_info; } @@ -3627,10 +3852,13 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { - case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: + case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { + uint32_t offset = + GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); // temp = thread->string_init_entrypoint - __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset())); + __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset)); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; @@ -3645,22 +3873,14 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { // Add ADRP with its PC-relative DexCache access patch. - const DexFile& dex_file = *invoke->GetTargetMethod().dex_file; + const DexFile& dex_file = invoke->GetDexFile(); uint32_t element_offset = invoke->GetDexCacheArrayOffset(); vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(XRegisterFrom(temp), /* offset placeholder */ 0); - } + EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); // Add LDR with its PC-relative DexCache access patch. vixl::aarch64::Label* ldr_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(ldr_label); - __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), /* offset placeholder */ 0)); - } + EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { @@ -3693,7 +3913,8 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Bl(&frame_entry_label_); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); vixl::aarch64::Label* label = &relative_call_patches_.back().label; SingleEmissionCheckScope guard(GetVIXLAssembler()); __ Bind(label); @@ -3815,6 +4036,45 @@ vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddress return DeduplicateUint64Literal(address); } +void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register reg) { + DCHECK(reg.IsX()); + SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(fixup_label); + __ adrp(reg, /* offset placeholder */ 0); +} + +void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base) { + DCHECK(out.IsX()); + DCHECK(base.IsX()); + SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(fixup_label); + __ add(out, base, Operand(/* offset placeholder */ 0)); +} + +void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base) { + DCHECK(base.IsX()); + SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(fixup_label); + __ ldr(out, MemOperand(base, /* offset placeholder */ 0)); +} + +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + linker_patches->push_back(Factory(info.label.GetLocation(), + &info.target_dex_file, + info.pc_insn_label->GetLocation(), + info.offset_or_index)); + } +} + void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -3842,10 +4102,9 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_method.dex_file, target_method.dex_method_index)); } - for (const MethodPatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) { - linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.GetLocation(), - info.target_method.dex_file, - info.target_method.dex_method_index)); + for (const PatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) { + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(info.label.GetLocation(), &info.dex_file, info.index)); } for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(), @@ -3860,11 +4119,12 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_string.dex_file, target_string.string_index)); } - for (const PcRelativePatchInfo& info : pc_relative_string_patches_) { - linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.GetLocation(), - &info.target_dex_file, - info.pc_insn_label->GetLocation(), - info.offset_or_index)); + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); } for (const auto& entry : boot_image_type_patches_) { const TypeReference& target_type = entry.first; @@ -3873,12 +4133,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_type.dex_file, target_type.type_index)); } - for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { - linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.GetLocation(), - &info.target_dex_file, - info.pc_insn_label->GetLocation(), - info.offset_or_index)); - } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); for (const auto& entry : boot_image_address_patches_) { DCHECK(GetCompilerOptions().GetIncludePatchInformation()); vixl::aarch64::Literal<uint32_t>* literal = entry.second; @@ -3946,17 +4202,6 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - if (kEmitCompilerReadBarrier) { - switch (desired_class_load_kind) { - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageAddress: - // TODO: Implement for read barrier. - return HLoadClass::LoadKind::kDexCacheViaMethod; - default: - break; - } - } switch (desired_class_load_kind) { case HLoadClass::LoadKind::kReferrersClass: break; @@ -3991,10 +4236,15 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { return; } - LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { @@ -4006,10 +4256,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { if (cls->NeedsAccessCheck()) { codegen_->MoveConstant(cls->GetLocations()->GetTemp(0), cls->GetTypeIndex()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), - cls, - cls->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc()); CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -4017,6 +4264,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { Location out_loc = cls->GetLocations()->Out(); Register out = OutputRegister(cls); + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); bool generate_null_check = false; switch (cls->GetLoadKind()) { case HLoadClass::LoadKind::kReferrersClass: { @@ -4024,38 +4272,34 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { DCHECK(!cls->MustGenerateClinitCheck()); // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ Register current_method = InputRegisterAt(cls, 0); - GenerateGcRootFieldLoad( - cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + GenerateGcRootFieldLoad(cls, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + /* fixup_label */ nullptr, + requires_read_barrier); break; } case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!requires_read_barrier); __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), cls->GetTypeIndex())); break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!requires_read_barrier); // Add ADRP with its PC-relative type patch. const DexFile& dex_file = cls->GetDexFile(); uint32_t type_index = cls->GetTypeIndex(); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative type patch. vixl::aarch64::Label* add_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(add_label); - __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0)); - } + codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); break; } case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!requires_read_barrier); DCHECK(cls->GetAddress() != 0u && IsUint<32>(cls->GetAddress())); __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(cls->GetAddress())); break; @@ -4073,7 +4317,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { uint32_t offset = cls->GetAddress() & MaxInt<uint64_t>(offset_bits); __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address)); // /* GcRoot<mirror::Class> */ out = *(base_address + offset) - GenerateGcRootFieldLoad(cls, out_loc, out.X(), offset); + GenerateGcRootFieldLoad(cls, + out_loc, + out.X(), + offset, + /* fixup_label */ nullptr, + requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; } @@ -4083,16 +4332,17 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { uint32_t element_offset = cls->GetDexCacheElementOffset(); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add LDR with its PC-relative DexCache access patch. vixl::aarch64::Label* ldr_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ - GenerateGcRootFieldLoad(cls, out_loc, out.X(), /* offset placeholder */ 0, ldr_label); + GenerateGcRootFieldLoad(cls, + out_loc, + out.X(), + /* offset placeholder */ 0, + ldr_label, + requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; } @@ -4104,8 +4354,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { Register current_method = InputRegisterAt(cls, 0); __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); // /* GcRoot<mirror::Class> */ out = out[type_index] - GenerateGcRootFieldLoad( - cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); + GenerateGcRootFieldLoad(cls, + out_loc, + out.X(), + CodeGenerator::GetCacheOffset(cls->GetTypeIndex()), + /* fixup_label */ nullptr, + requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; } @@ -4151,17 +4405,6 @@ void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear A HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { - if (kEmitCompilerReadBarrier) { - switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageAddress: - // TODO: Implement for read barrier. - return HLoadString::LoadKind::kDexCacheViaMethod; - default: - break; - } - } switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: DCHECK(!GetCompilerOptions().GetCompilePic()); @@ -4174,7 +4417,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( case HLoadString::LoadKind::kDexCacheAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kDexCacheViaMethod: @@ -4184,110 +4427,96 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( } void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath + LocationSummary::CallKind call_kind = load->NeedsEnvironment() + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { - locations->SetInAt(0, Location::RequiresRegister()); + InvokeRuntimeCallingConvention calling_convention; + locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); + } else { + locations->SetOut(Location::RequiresRegister()); + if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything, including temps. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); + DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), + RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot), + Primitive::kPrimNot).GetCode()); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } - locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { - Location out_loc = load->GetLocations()->Out(); Register out = OutputRegister(load); switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!kEmitCompilerReadBarrier); __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), load->GetStringIndex())); return; // No dex cache slow path. case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(!kEmitCompilerReadBarrier); // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); uint32_t string_index = load->GetStringIndex(); + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative String patch. vixl::aarch64::Label* add_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(add_label); - __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0)); - } + codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); DCHECK(load->GetAddress() != 0u && IsUint<32>(load->GetAddress())); __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress())); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads - // that gives a 16KiB range. To try and reduce the number of literals if we load - // multiple strings, simply split the dex cache address to a 16KiB aligned base - // loaded from a literal and the remaining offset embedded in the load. - static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes."); - DCHECK_ALIGNED(load->GetAddress(), 4u); - constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2; - uint64_t base_address = load->GetAddress() & ~MaxInt<uint64_t>(offset_bits); - uint32_t offset = load->GetAddress() & MaxInt<uint64_t>(offset_bits); - __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address)); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) - GenerateGcRootFieldLoad(load, out_loc, out.X(), offset); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - // Add ADRP with its PC-relative DexCache access patch. + case HLoadString::LoadKind::kBssEntry: { + // Add ADRP with its PC-relative String .bss entry patch. const DexFile& dex_file = load->GetDexFile(); - uint32_t element_offset = load->GetDexCacheElementOffset(); - vixl::aarch64::Label* adrp_label = - codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } - // Add LDR with its PC-relative DexCache access patch. + uint32_t string_index = load->GetStringIndex(); + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); + Register temp = temps.AcquireX(); + vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + codegen_->EmitAdrpPlaceholder(adrp_label, temp); + // Add LDR with its PC-relative String patch. vixl::aarch64::Label* ldr_label = - codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, out.X(), /* offset placeholder */ 0, ldr_label); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - Register current_method = InputRegisterAt(load, 0); - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())); - break; + codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ + GenerateGcRootFieldLoad(load, + load->GetLocations()->Out(), + temp, + /* offset placeholder */ 0u, + ldr_label, + kEmitCompilerReadBarrier); + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label); + codegen_->AddSlowPath(slow_path); + __ Cbz(out.X(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); - codegen_->AddSlowPath(slow_path); - __ Cbz(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode()); + __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex()); + codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); } void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { @@ -4307,11 +4536,9 @@ void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction } void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) { - codegen_->InvokeRuntime(instruction->IsEnter() - ? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject: kQuickUnlockObject, + instruction, + instruction->GetDexPc()); if (instruction->IsEnter()) { CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); } else { @@ -4415,10 +4642,7 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { __ Mov(type_index, instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - codegen_->InvokeRuntime(instruction->GetEntrypoint(), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } @@ -4447,10 +4671,7 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) __ Blr(lr); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } } @@ -4484,14 +4705,8 @@ void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) { } void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::RequiresRegister()); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) { @@ -4616,9 +4831,8 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) - : QUICK_ENTRY_POINT(pFmod); - codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr); + QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod; + codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc()); if (type == Primitive::kPrimFloat) { CheckEntrypointTypes<kQuickFmodf, float, float, float>(); } else { @@ -4776,7 +4990,9 @@ void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet( } void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -4801,8 +5017,7 @@ void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { } void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) { - codegen_->InvokeRuntime( - QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr); + codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } @@ -5060,9 +5275,12 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru Location root, Register obj, uint32_t offset, - vixl::aarch64::Label* fixup_label) { + vixl::aarch64::Label* fixup_label, + bool requires_read_barrier) { + DCHECK(fixup_label == nullptr || offset == 0u); Register root_reg = RegisterFrom(root, Primitive::kPrimNot); - if (kEmitCompilerReadBarrier) { + if (requires_read_barrier) { + DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: @@ -5076,9 +5294,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); } else { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(fixup_label); - __ ldr(root_reg, MemOperand(obj, offset)); + codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); } static_assert( sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), @@ -5088,7 +5304,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. + // Slow path marking the GC root `root`. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root); codegen_->AddSlowPath(slow_path); @@ -5107,9 +5323,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru if (fixup_label == nullptr) { __ Add(root_reg.X(), obj.X(), offset); } else { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(fixup_label); - __ add(root_reg.X(), obj.X(), offset); + codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); } // /* mirror::Object* */ root = root->Read() codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); @@ -5120,9 +5334,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); } else { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(fixup_label); - __ ldr(root_reg, MemOperand(obj, offset)); + codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); } // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. @@ -5141,7 +5353,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); - size_t no_scale_factor = 0U; + size_t no_scale_factor = 0u; GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -5192,7 +5404,8 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* size_t scale_factor, Register temp, bool needs_null_check, - bool use_load_acquire) { + bool use_load_acquire, + bool always_update_field) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // If we are emitting an array load, we should not be using a @@ -5239,12 +5452,15 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Introduce a dependency on the lock_word including rb_state, // to prevent load-load reordering, and without using // a memory barrier (which would be more expensive). - // obj is unchanged by this operation, but its value now depends on temp. + // `obj` is unchanged by this operation, but its value now depends + // on `temp`. __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32)); // The actual reference load. if (index.IsValid()) { - // Load types involving an "index". + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. if (use_load_acquire) { // UnsafeGetObjectVolatile intrinsic case. // Register `index` is not an index in an object array, but an @@ -5253,9 +5469,9 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* DCHECK(instruction->GetLocations()->Intrinsified()); DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) << instruction->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset, 0U); - DCHECK_EQ(scale_factor, 0U); - DCHECK_EQ(needs_null_check, 0U); + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, 0u); + DCHECK_EQ(needs_null_check, 0u); // /* HeapReference<Object> */ ref = *(obj + index) MemOperand field = HeapOperand(obj, XRegisterFrom(index)); LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); @@ -5266,10 +5482,10 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); Load(type, ref_reg, HeapOperand(obj, computed_offset)); } else { - Register temp2 = temps.AcquireW(); - __ Add(temp2, obj, offset); - Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor)); - temps.Release(temp2); + Register temp3 = temps.AcquireW(); + __ Add(temp3, obj, offset); + Load(type, ref_reg, HeapOperand(temp3, XRegisterFrom(index), LSL, scale_factor)); + temps.Release(temp3); } } } else { @@ -5285,9 +5501,20 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Object* ref = ref_addr->AsMirrorPtr() GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. - SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); + // Slow path marking the object `ref` when it is gray. + SlowPathCodeARM64* slow_path; + if (always_update_field) { + // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports + // address of the form `obj + field_offset`, where `obj` is a + // register and `field_offset` is a register. Thus `offset` and + // `scale_factor` above are expected to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, 0u); /* "times 1" */ + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64( + instruction, ref, obj, /* field_offset */ index, temp); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); + } AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 1b5fa857e7..7f54b4b6b2 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -27,11 +27,11 @@ #include "utils/arm64/assembler_arm64.h" #include "utils/type_reference.h" -// TODO: make vixl clean wrt -Wshadow. +// TODO(VIXL): Make VIXL compile with -Wshadow. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop namespace art { @@ -289,12 +289,13 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { // // root <- *(obj + offset) // - // while honoring read barriers (if any). + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, vixl::aarch64::Register obj, uint32_t offset, - vixl::aarch64::Label* fixup_label = nullptr); + vixl::aarch64::Label* fixup_label, + bool requires_read_barrier); // Generate a floating-point comparison. void GenerateFcmp(HInstruction* instruction); @@ -491,12 +492,7 @@ class CodeGeneratorARM64 : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path) OVERRIDE; - - void InvokeRuntime(int32_t offset, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path); + SlowPathCode* slow_path = nullptr) OVERRIDE; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -504,6 +500,8 @@ class CodeGeneratorARM64 : public CodeGenerator { HInstruction* instruction, SlowPathCode* slow_path); + void GenerateInvokeRuntime(int32_t entry_point_offset); + ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; } bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { @@ -524,7 +522,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method) OVERRIDE; + HInvokeStaticOrDirect* invoke) OVERRIDE; void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; @@ -566,6 +564,14 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); vixl::aarch64::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address); + void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg); + void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base); + void EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base); + void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; // Fast path implementation of ReadBarrier::Barrier for a heap @@ -588,6 +594,13 @@ class CodeGeneratorARM64 : public CodeGenerator { bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch64::Register obj, @@ -596,7 +609,8 @@ class CodeGeneratorARM64 : public CodeGenerator { size_t scale_factor, vixl::aarch64::Register temp, bool needs_null_check, - bool use_load_acquire); + bool use_load_acquire, + bool always_update_field = false); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -646,10 +660,10 @@ class CodeGeneratorARM64 : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); - void GenerateNop(); + void GenerateNop() OVERRIDE; - void GenerateImplicitNullCheck(HNullCheck* instruction); - void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; private: using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>; @@ -693,6 +707,10 @@ class CodeGeneratorARM64 : public CodeGenerator { void EmitJumpTables(); + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches); + // Labels for each block that will be compiled. // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory. ArenaDeque<vixl::aarch64::Label> block_labels_; // Indexed by block id. @@ -715,12 +733,12 @@ class CodeGeneratorARM64 : public CodeGenerator { MethodToLiteralMap call_patches_; // Relative call patch info. // Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<vixl::aarch64::Label>> relative_call_patches_; + ArenaDeque<PatchInfo<vixl::aarch64::Label>> relative_call_patches_; // PC-relative DexCache access info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. BootStringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info. + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Deduplication map for boot type literals for kBootImageLinkTimeAddress. BootTypeToLiteralMap boot_image_type_patches_; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc new file mode 100644 index 0000000000..f1d11354fa --- /dev/null +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -0,0 +1,4249 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_arm_vixl.h" + +#include "arch/arm/instruction_set_features_arm.h" +#include "art_method.h" +#include "code_generator_utils.h" +#include "common_arm.h" +#include "compiled_method.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "gc/accounting/card_table.h" +#include "mirror/array-inl.h" +#include "mirror/class-inl.h" +#include "thread.h" +#include "utils/arm/assembler_arm_vixl.h" +#include "utils/arm/managed_register_arm.h" +#include "utils/assembler.h" +#include "utils/stack_checks.h" + +namespace art { +namespace arm { + +namespace vixl32 = vixl::aarch32; +using namespace vixl32; // NOLINT(build/namespaces) + +using helpers::DRegisterFrom; +using helpers::DWARFReg; +using helpers::FromLowSToD; +using helpers::HighDRegisterFrom; +using helpers::HighRegisterFrom; +using helpers::InputOperandAt; +using helpers::InputRegisterAt; +using helpers::InputSRegisterAt; +using helpers::InputVRegisterAt; +using helpers::LocationFrom; +using helpers::LowRegisterFrom; +using helpers::LowSRegisterFrom; +using helpers::OutputRegister; +using helpers::OutputSRegister; +using helpers::OutputVRegister; +using helpers::RegisterFrom; +using helpers::SRegisterFrom; + +using RegisterList = vixl32::RegisterList; + +static bool ExpectedPairLayout(Location location) { + // We expected this for both core and fpu register pairs. + return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); +} + +static constexpr size_t kArmInstrMaxSizeInBytes = 4u; + +#ifdef __ +#error "ARM Codegen VIXL macro-assembler macro already defined." +#endif + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()-> // NOLINT +#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value() + +// Marker that code is yet to be, and must, be implemented. +#define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " + +// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, +// for each live D registers they treat two corresponding S registers as live ones. +// +// Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build +// from a list of contiguous S registers a list of contiguous D registers (processing first/last +// S registers corner cases) and save/restore this new list treating them as D registers. +// - decreasing code size +// - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is +// restored and then used in regular non SlowPath code as D register. +// +// For the following example (v means the S register is live): +// D names: | D0 | D1 | D2 | D4 | ... +// S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ... +// Live? | | v | v | v | v | v | v | | ... +// +// S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed +// as D registers. +// +// TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers +// for lists of floating-point registers. +static size_t SaveContiguousSRegisterList(size_t first, + size_t last, + CodeGenerator* codegen, + size_t stack_offset) { + static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes."); + static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes."); + DCHECK_LE(first, last); + if ((first == last) && (first == 0)) { + __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset)); + return stack_offset + kSRegSizeInBytes; + } + if (first % 2 == 1) { + __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset)); + stack_offset += kSRegSizeInBytes; + } + + bool save_last = false; + if (last % 2 == 0) { + save_last = true; + --last; + } + + if (first < last) { + vixl32::DRegister d_reg = vixl32::DRegister(first / 2); + DCHECK_EQ((last - first + 1) % 2, 0u); + size_t number_of_d_regs = (last - first + 1) / 2; + + if (number_of_d_regs == 1) { + __ Vstr(d_reg, MemOperand(sp, stack_offset)); + } else if (number_of_d_regs > 1) { + UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()); + vixl32::Register base = sp; + if (stack_offset != 0) { + base = temps.Acquire(); + __ Add(base, sp, stack_offset); + } + __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs)); + } + stack_offset += number_of_d_regs * kDRegSizeInBytes; + } + + if (save_last) { + __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset)); + stack_offset += kSRegSizeInBytes; + } + + return stack_offset; +} + +static size_t RestoreContiguousSRegisterList(size_t first, + size_t last, + CodeGenerator* codegen, + size_t stack_offset) { + static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes."); + static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes."); + DCHECK_LE(first, last); + if ((first == last) && (first == 0)) { + __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset)); + return stack_offset + kSRegSizeInBytes; + } + if (first % 2 == 1) { + __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset)); + stack_offset += kSRegSizeInBytes; + } + + bool restore_last = false; + if (last % 2 == 0) { + restore_last = true; + --last; + } + + if (first < last) { + vixl32::DRegister d_reg = vixl32::DRegister(first / 2); + DCHECK_EQ((last - first + 1) % 2, 0u); + size_t number_of_d_regs = (last - first + 1) / 2; + if (number_of_d_regs == 1) { + __ Vldr(d_reg, MemOperand(sp, stack_offset)); + } else if (number_of_d_regs > 1) { + UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()); + vixl32::Register base = sp; + if (stack_offset != 0) { + base = temps.Acquire(); + __ Add(base, sp, stack_offset); + } + __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs)); + } + stack_offset += number_of_d_regs * kDRegSizeInBytes; + } + + if (restore_last) { + __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset)); + stack_offset += kSRegSizeInBytes; + } + + return stack_offset; +} + +void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { + size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + size_t orig_offset = stack_offset; + + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); + } + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_core_stack_offsets_[i] = stack_offset; + stack_offset += kArmWordSize; + } + + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset); + + uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + orig_offset = stack_offset; + for (uint32_t i : LowToHighBits(fp_spills)) { + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_fpu_stack_offsets_[i] = stack_offset; + stack_offset += kArmWordSize; + } + + stack_offset = orig_offset; + while (fp_spills != 0u) { + uint32_t begin = CTZ(fp_spills); + uint32_t tmp = fp_spills + (1u << begin); + fp_spills &= tmp; // Clear the contiguous range of 1s. + uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined. + stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset); + } + DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); +} + +void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { + size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + size_t orig_offset = stack_offset; + + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + stack_offset += kArmWordSize; + } + + // TODO(VIXL): Check the coherency of stack_offset after this with a test. + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset); + + uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + while (fp_spills != 0u) { + uint32_t begin = CTZ(fp_spills); + uint32_t tmp = fp_spills + (1u << begin); + fp_spills &= tmp; // Clear the contiguous range of 1s. + uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined. + stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset); + } + DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); +} + +class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + __ Bind(GetEntryLabel()); + if (instruction_->CanThrowIntoCatchBlock()) { + // Live registers will be restored in the catch block if caught. + SaveLiveRegisters(codegen, instruction_->GetLocations()); + } + arm_codegen->InvokeRuntime(kQuickThrowNullPointer, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); + } + + bool IsFatal() const OVERRIDE { return true; } + + const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARMVIXL"; } + + private: + DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL); +}; + +class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction) + : SlowPathCodeARMVIXL(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + __ Bind(GetEntryLabel()); + arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); + } + + bool IsFatal() const OVERRIDE { return true; } + + const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARMVIXL"; } + + private: + DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL); +}; + +class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor) + : SlowPathCodeARMVIXL(instruction), successor_(successor) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + __ Bind(GetEntryLabel()); + arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + if (successor_ == nullptr) { + __ B(GetReturnLabel()); + } else { + __ B(arm_codegen->GetLabelOf(successor_)); + } + } + + vixl32::Label* GetReturnLabel() { + DCHECK(successor_ == nullptr); + return &return_label_; + } + + HBasicBlock* GetSuccessor() const { + return successor_; + } + + const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARMVIXL"; } + + private: + // If not null, the block to branch to after the suspend check. + HBasicBlock* const successor_; + + // If `successor_` is null, the label to branch to after the suspend check. + vixl32::Label return_label_; + + DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL); +}; + +class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit) + : SlowPathCodeARMVIXL(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { + DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = at_->GetLocations(); + + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConventionARMVIXL calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex()); + QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage + : kQuickInitializeType; + arm_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } + + // Move the class to the desired location. + Location out = locations->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + arm_codegen->Move32(locations->Out(), LocationFrom(r0)); + } + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARMVIXL"; } + + private: + // The class this slow path will load. + HLoadClass* const cls_; + + // The instruction where this slow path is happening. + // (Might be the load class or an initialization check). + HInstruction* const at_; + + // The dex PC of `at_`. + const uint32_t dex_pc_; + + // Whether to initialize the class. + const bool do_clinit_; + + DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL); +}; + +inline vixl32::Condition ARMCondition(IfCondition cond) { + switch (cond) { + case kCondEQ: return eq; + case kCondNE: return ne; + case kCondLT: return lt; + case kCondLE: return le; + case kCondGT: return gt; + case kCondGE: return ge; + case kCondB: return lo; + case kCondBE: return ls; + case kCondA: return hi; + case kCondAE: return hs; + } + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +// Maps signed condition to unsigned condition. +inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) { + switch (cond) { + case kCondEQ: return eq; + case kCondNE: return ne; + // Signed to unsigned. + case kCondLT: return lo; + case kCondLE: return ls; + case kCondGT: return hi; + case kCondGE: return hs; + // Unsigned remain unchanged. + case kCondB: return lo; + case kCondBE: return ls; + case kCondA: return hi; + case kCondAE: return hs; + } + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) { + // The ARM condition codes can express all the necessary branches, see the + // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual. + // There is no dex instruction or HIR that would need the missing conditions + // "equal or unordered" or "not equal". + switch (cond) { + case kCondEQ: return eq; + case kCondNE: return ne /* unordered */; + case kCondLT: return gt_bias ? cc : lt /* unordered */; + case kCondLE: return gt_bias ? ls : le /* unordered */; + case kCondGT: return gt_bias ? hi /* unordered */ : gt; + case kCondGE: return gt_bias ? cs /* unordered */ : ge; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const { + stream << vixl32::Register(reg); +} + +void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const { + stream << vixl32::SRegister(reg); +} + +static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { + uint32_t mask = 0; + for (uint32_t i = regs.GetFirstSRegister().GetCode(); + i <= regs.GetLastSRegister().GetCode(); + ++i) { + mask |= (1 << i); + } + return mask; +} + +#undef __ + +CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, + const ArmInstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats) + : CodeGenerator(graph, + kNumberOfCoreRegisters, + kNumberOfSRegisters, + kNumberOfRegisterPairs, + kCoreCalleeSaves.GetList(), + ComputeSRegisterListMask(kFpuCalleeSaves), + compiler_options, + stats), + block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + location_builder_(graph, this), + instruction_visitor_(graph, this), + move_resolver_(graph->GetArena(), this), + assembler_(graph->GetArena()), + isa_features_(isa_features) { + // Always save the LR register to mimic Quick. + AddAllocatedRegister(Location::RegisterLocation(LR)); + // Give d14 and d15 as scratch registers to VIXL. + // They are removed from the register allocator in `SetupBlockedRegisters()`. + // TODO(VIXL): We need two scratch D registers for `EmitSwap` when swapping two double stack + // slots. If that is sufficiently rare, and we have pressure on FP registers, we could instead + // spill in `EmitSwap`. But if we actually are guaranteed to have 32 D registers, we could give + // d30 and d31 to VIXL to avoid removing registers from the allocator. If that is the case, we may + // also want to investigate giving those 14 other D registers to the allocator. + GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d14); + GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15); +} + +#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> + +void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { + GetAssembler()->FinalizeCode(); + CodeGenerator::Finalize(allocator); +} + +void CodeGeneratorARMVIXL::SetupBlockedRegisters() const { + // Stack register, LR and PC are always reserved. + blocked_core_registers_[SP] = true; + blocked_core_registers_[LR] = true; + blocked_core_registers_[PC] = true; + + // Reserve thread register. + blocked_core_registers_[TR] = true; + + // Reserve temp register. + blocked_core_registers_[IP] = true; + + // Registers s28-s31 (d14-d15) are left to VIXL for scratch registers. + // (They are given to the `MacroAssembler` in `CodeGeneratorARMVIXL::CodeGeneratorARMVIXL`.) + blocked_fpu_registers_[28] = true; + blocked_fpu_registers_[29] = true; + blocked_fpu_registers_[30] = true; + blocked_fpu_registers_[31] = true; + + if (GetGraph()->IsDebuggable()) { + // Stubs do not save callee-save floating point registers. If the graph + // is debuggable, we need to deal with these registers differently. For + // now, just block them. + for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode(); + i <= kFpuCalleeSaves.GetLastSRegister().GetCode(); + ++i) { + blocked_fpu_registers_[i] = true; + } + } +} + +InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph, + CodeGeneratorARMVIXL* codegen) + : InstructionCodeGenerator(graph, codegen), + assembler_(codegen->GetAssembler()), + codegen_(codegen) {} + +void CodeGeneratorARMVIXL::ComputeSpillMask() { + core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; + DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; + // There is no easy instruction to restore just the PC on thumb2. We spill and + // restore another arbitrary register. + core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister.GetCode()); + fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; + // We use vpush and vpop for saving and restoring floating point registers, which take + // a SRegister and the number of registers to save/restore after that SRegister. We + // therefore update the `fpu_spill_mask_` to also contain those registers not allocated, + // but in the range. + if (fpu_spill_mask_ != 0) { + uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_); + uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_); + for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) { + fpu_spill_mask_ |= (1 << i); + } + } +} + +void CodeGeneratorARMVIXL::GenerateFrameEntry() { + bool skip_overflow_check = + IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + __ Bind(&frame_entry_label_); + + if (HasEmptyFrame()) { + return; + } + + if (!skip_overflow_check) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); + // The load must immediately precede RecordPcInfo. + AssemblerAccurateScope aas(GetVIXLAssembler(), + kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ ldr(temp, MemOperand(temp)); + RecordPcInfo(nullptr, 0); + } + + __ Push(RegisterList(core_spill_mask_)); + GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_)); + GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister), + 0, + core_spill_mask_, + kArmWordSize); + if (fpu_spill_mask_ != 0) { + uint32_t first = LeastSignificantBit(fpu_spill_mask_); + + // Check that list is contiguous. + DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_))); + + __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_))); + GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_)); + GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0), 0, fpu_spill_mask_, kArmWordSize); + } + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ Sub(sp, sp, adjust); + GetAssembler()->cfi().AdjustCFAOffset(adjust); + GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0); +} + +void CodeGeneratorARMVIXL::GenerateFrameExit() { + if (HasEmptyFrame()) { + __ Bx(lr); + return; + } + GetAssembler()->cfi().RememberState(); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ Add(sp, sp, adjust); + GetAssembler()->cfi().AdjustCFAOffset(-adjust); + if (fpu_spill_mask_ != 0) { + uint32_t first = LeastSignificantBit(fpu_spill_mask_); + + // Check that list is contiguous. + DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_))); + + __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_))); + GetAssembler()->cfi().AdjustCFAOffset( + -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_)); + GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_); + } + // Pop LR into PC to return. + DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U); + uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode; + __ Pop(RegisterList(pop_mask)); + GetAssembler()->cfi().RestoreState(); + GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); +} + +void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) { + __ Bind(GetLabelOf(block)); +} + +void CodeGeneratorARMVIXL::Move32(Location destination, Location source) { + if (source.Equals(destination)) { + return; + } + if (destination.IsRegister()) { + if (source.IsRegister()) { + __ Mov(RegisterFrom(destination), RegisterFrom(source)); + } else if (source.IsFpuRegister()) { + __ Vmov(RegisterFrom(destination), SRegisterFrom(source)); + } else { + GetAssembler()->LoadFromOffset(kLoadWord, + RegisterFrom(destination), + sp, + source.GetStackIndex()); + } + } else if (destination.IsFpuRegister()) { + if (source.IsRegister()) { + __ Vmov(SRegisterFrom(destination), RegisterFrom(source)); + } else if (source.IsFpuRegister()) { + __ Vmov(SRegisterFrom(destination), SRegisterFrom(source)); + } else { + GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex()); + } + } else { + DCHECK(destination.IsStackSlot()) << destination; + if (source.IsRegister()) { + GetAssembler()->StoreToOffset(kStoreWord, + RegisterFrom(source), + sp, + destination.GetStackIndex()); + } else if (source.IsFpuRegister()) { + GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex()); + } else { + DCHECK(source.IsStackSlot()) << source; + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex()); + GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); + } + } +} + +void CodeGeneratorARMVIXL::MoveConstant(Location destination ATTRIBUTE_UNUSED, + int32_t value ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); +} + +void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, Primitive::Type dst_type) { + // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in + // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend. + HParallelMove move(GetGraph()->GetArena()); + move.AddMove(src, dst, dst_type, nullptr); + GetMoveResolver()->EmitNativeCode(&move); +} + +void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location ATTRIBUTE_UNUSED, + LocationSummary* locations ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); +} + +void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path) { + ValidateInvokeRuntime(entrypoint, instruction, slow_path); + GenerateInvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value()); + if (EntrypointRequiresStackMap(entrypoint)) { + // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the + // previous instruction. + RecordPcInfo(instruction, dex_pc, slow_path); + } +} + +void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path) { + ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + GenerateInvokeRuntime(entry_point_offset); +} + +void CodeGeneratorARMVIXL::GenerateInvokeRuntime(int32_t entry_point_offset) { + GetAssembler()->LoadFromOffset(kLoadWord, lr, tr, entry_point_offset); + __ Blx(lr); +} + +void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) { + DCHECK(!successor->IsExitBlock()); + HBasicBlock* block = got->GetBlock(); + HInstruction* previous = got->GetPrevious(); + HLoopInformation* info = block->GetLoopInformation(); + + if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); + GenerateSuspendCheck(info->GetSuspendCheck(), successor); + return; + } + if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { + GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); + } + if (!codegen_->GoesToNextBlock(block, successor)) { + __ B(codegen_->GetLabelOf(successor)); + } +} + +void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) { + got->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) { + HandleGoto(got, got->GetSuccessor()); +} + +void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) { + try_boundary->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) { + HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); + if (!successor->IsExitBlock()) { + HandleGoto(try_boundary, successor); + } +} + +void LocationsBuilderARMVIXL::VisitExit(HExit* exit) { + exit->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { +} + +void InstructionCodeGeneratorARMVIXL::GenerateVcmp(HInstruction* instruction) { + Primitive::Type type = instruction->InputAt(0)->GetType(); + Location lhs_loc = instruction->GetLocations()->InAt(0); + Location rhs_loc = instruction->GetLocations()->InAt(1); + if (rhs_loc.IsConstant()) { + // 0.0 is the only immediate that can be encoded directly in + // a VCMP instruction. + // + // Both the JLS (section 15.20.1) and the JVMS (section 6.5) + // specify that in a floating-point comparison, positive zero + // and negative zero are considered equal, so we can use the + // literal 0.0 for both cases here. + // + // Note however that some methods (Float.equal, Float.compare, + // Float.compareTo, Double.equal, Double.compare, + // Double.compareTo, Math.max, Math.min, StrictMath.max, + // StrictMath.min) consider 0.0 to be (strictly) greater than + // -0.0. So if we ever translate calls to these methods into a + // HCompare instruction, we must handle the -0.0 case with + // care here. + DCHECK(rhs_loc.GetConstant()->IsArithmeticZero()); + if (type == Primitive::kPrimFloat) { + __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0); + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + __ Vcmp(F64, FromLowSToD(LowSRegisterFrom(lhs_loc)), 0.0); + } + } else { + if (type == Primitive::kPrimFloat) { + __ Vcmp(InputSRegisterAt(instruction, 0), InputSRegisterAt(instruction, 1)); + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + __ Vcmp(FromLowSToD(LowSRegisterFrom(lhs_loc)), FromLowSToD(LowSRegisterFrom(rhs_loc))); + } + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateFPJumps(HCondition* cond, + vixl32::Label* true_label, + vixl32::Label* false_label ATTRIBUTE_UNUSED) { + // To branch on the result of the FP compare we transfer FPSCR to APSR (encoded as PC in VMRS). + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(ARMFPCondition(cond->GetCondition(), cond->IsGtBias()), true_label); +} + +void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond, + vixl32::Label* true_label, + vixl32::Label* false_label) { + LocationSummary* locations = cond->GetLocations(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + IfCondition if_cond = cond->GetCondition(); + + vixl32::Register left_high = HighRegisterFrom(left); + vixl32::Register left_low = LowRegisterFrom(left); + IfCondition true_high_cond = if_cond; + IfCondition false_high_cond = cond->GetOppositeCondition(); + vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part + + // Set the conditions for the test, remembering that == needs to be + // decided using the low words. + // TODO: consider avoiding jumps with temporary and CMP low+SBC high + switch (if_cond) { + case kCondEQ: + case kCondNE: + // Nothing to do. + break; + case kCondLT: + false_high_cond = kCondGT; + break; + case kCondLE: + true_high_cond = kCondLT; + break; + case kCondGT: + false_high_cond = kCondLT; + break; + case kCondGE: + true_high_cond = kCondGT; + break; + case kCondB: + false_high_cond = kCondA; + break; + case kCondBE: + true_high_cond = kCondB; + break; + case kCondA: + false_high_cond = kCondB; + break; + case kCondAE: + true_high_cond = kCondA; + break; + } + if (right.IsConstant()) { + int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + int32_t val_low = Low32Bits(value); + int32_t val_high = High32Bits(value); + + __ Cmp(left_high, val_high); + if (if_cond == kCondNE) { + __ B(ARMCondition(true_high_cond), true_label); + } else if (if_cond == kCondEQ) { + __ B(ARMCondition(false_high_cond), false_label); + } else { + __ B(ARMCondition(true_high_cond), true_label); + __ B(ARMCondition(false_high_cond), false_label); + } + // Must be equal high, so compare the lows. + __ Cmp(left_low, val_low); + } else { + vixl32::Register right_high = HighRegisterFrom(right); + vixl32::Register right_low = LowRegisterFrom(right); + + __ Cmp(left_high, right_high); + if (if_cond == kCondNE) { + __ B(ARMCondition(true_high_cond), true_label); + } else if (if_cond == kCondEQ) { + __ B(ARMCondition(false_high_cond), false_label); + } else { + __ B(ARMCondition(true_high_cond), true_label); + __ B(ARMCondition(false_high_cond), false_label); + } + // Must be equal high, so compare the lows. + __ Cmp(left_low, right_low); + } + // The last comparison might be unsigned. + // TODO: optimize cases where this is always true/false + __ B(final_condition, true_label); +} + +void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition, + vixl32::Label* true_target_in, + vixl32::Label* false_target_in) { + // Generated branching requires both targets to be explicit. If either of the + // targets is nullptr (fallthrough) use and bind `fallthrough` instead. + vixl32::Label fallthrough; + vixl32::Label* true_target = (true_target_in == nullptr) ? &fallthrough : true_target_in; + vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in; + + Primitive::Type type = condition->InputAt(0)->GetType(); + switch (type) { + case Primitive::kPrimLong: + GenerateLongComparesAndJumps(condition, true_target, false_target); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + GenerateVcmp(condition); + GenerateFPJumps(condition, true_target, false_target); + break; + default: + LOG(FATAL) << "Unexpected compare type " << type; + } + + if (false_target != &fallthrough) { + __ B(false_target); + } + + if (true_target_in == nullptr || false_target_in == nullptr) { + __ Bind(&fallthrough); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, + vixl32::Label* true_target, + vixl32::Label* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { + // Constant condition, statically compared against "true" (integer value 1). + if (cond->AsIntConstant()->IsTrue()) { + if (true_target != nullptr) { + __ B(true_target); + } + } else { + DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); + if (false_target != nullptr) { + __ B(false_target); + } + } + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { + // Condition has been materialized, compare the output to 0. + if (kIsDebugBuild) { + Location cond_val = instruction->GetLocations()->InAt(condition_input_index); + DCHECK(cond_val.IsRegister()); + } + if (true_target == nullptr) { + __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target); + } else { + __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target); + } + } else { + // Condition has not been materialized. Use its inputs as the comparison and + // its condition as the branch condition. + HCondition* condition = cond->AsCondition(); + + // If this is a long or FP comparison that has been folded into + // the HCondition, generate the comparison directly. + Primitive::Type type = condition->InputAt(0)->GetType(); + if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { + GenerateCompareTestAndBranch(condition, true_target, false_target); + return; + } + + LocationSummary* locations = cond->GetLocations(); + DCHECK(locations->InAt(0).IsRegister()); + vixl32::Register left = InputRegisterAt(cond, 0); + Location right = locations->InAt(1); + if (right.IsRegister()) { + __ Cmp(left, InputRegisterAt(cond, 1)); + } else { + DCHECK(right.IsConstant()); + __ Cmp(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); + } + if (true_target == nullptr) { + __ B(ARMCondition(condition->GetOppositeCondition()), false_target); + } else { + __ B(ARMCondition(condition->GetCondition()), true_target); + } + } + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { + __ B(false_target); + } +} + +void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) { + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); +} + +void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); + if (Primitive::IsFloatingPointType(select->GetType())) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + } + if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { + locations->SetInAt(2, Location::RequiresRegister()); + } + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { + LocationSummary* locations = select->GetLocations(); + vixl32::Label false_target; + GenerateTestAndBranch(select, + /* condition_input_index */ 2, + /* true_target */ nullptr, + &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); +} + +void CodeGeneratorARMVIXL::GenerateNop() { + __ Nop(); +} + +void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); + // Handle the long/FP comparisons made in instruction simplification. + switch (cond->InputAt(0)->GetType()) { + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); + if (!cond->IsEmittedAtUseSite()) { + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } + break; + + // TODO(VIXL): https://android-review.googlesource.com/#/c/252265/ + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + if (!cond->IsEmittedAtUseSite()) { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } + break; + + default: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); + if (!cond->IsEmittedAtUseSite()) { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } + } +} + +void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { + if (cond->IsEmittedAtUseSite()) { + return; + } + + vixl32::Register out = OutputRegister(cond); + vixl32::Label true_label, false_label; + + switch (cond->InputAt(0)->GetType()) { + default: { + // Integer case. + __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1)); + AssemblerAccurateScope aas(GetVIXLAssembler(), + kArmInstrMaxSizeInBytes * 3u, + CodeBufferCheckScope::kMaximumSize); + __ ite(ARMCondition(cond->GetCondition())); + __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1); + __ mov(ARMCondition(cond->GetOppositeCondition()), OutputRegister(cond), 0); + return; + } + case Primitive::kPrimLong: + GenerateLongComparesAndJumps(cond, &true_label, &false_label); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + GenerateVcmp(cond); + GenerateFPJumps(cond, &true_label, &false_label); + break; + } + + // Convert the jumps into the result. + vixl32::Label done_label; + + // False case: result = 0. + __ Bind(&false_label); + __ Mov(out, 0); + __ B(&done_label); + + // True case: result = 1. + __ Bind(&true_label); + __ Mov(out, 1); + __ Bind(&done_label); +} + +void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) { + HandleCondition(comp); +} + +void InstructionCodeGeneratorARMVIXL::VisitEqual(HEqual* comp) { + HandleCondition(comp); +} + +void LocationsBuilderARMVIXL::VisitNotEqual(HNotEqual* comp) { + HandleCondition(comp); +} + +void InstructionCodeGeneratorARMVIXL::VisitNotEqual(HNotEqual* comp) { + HandleCondition(comp); +} + +void LocationsBuilderARMVIXL::VisitLessThan(HLessThan* comp) { + HandleCondition(comp); +} + +void InstructionCodeGeneratorARMVIXL::VisitLessThan(HLessThan* comp) { + HandleCondition(comp); +} + +void LocationsBuilderARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + HandleCondition(comp); +} + +void InstructionCodeGeneratorARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + HandleCondition(comp); +} + +void LocationsBuilderARMVIXL::VisitGreaterThan(HGreaterThan* comp) { + HandleCondition(comp); +} + +void InstructionCodeGeneratorARMVIXL::VisitGreaterThan(HGreaterThan* comp) { + HandleCondition(comp); +} + +void LocationsBuilderARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + HandleCondition(comp); +} + +void InstructionCodeGeneratorARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + HandleCondition(comp); +} + +void LocationsBuilderARMVIXL::VisitBelow(HBelow* comp) { + HandleCondition(comp); +} + +void InstructionCodeGeneratorARMVIXL::VisitBelow(HBelow* comp) { + HandleCondition(comp); +} + +void LocationsBuilderARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) { + HandleCondition(comp); +} + +void InstructionCodeGeneratorARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) { + HandleCondition(comp); +} + +void LocationsBuilderARMVIXL::VisitAbove(HAbove* comp) { + HandleCondition(comp); +} + +void InstructionCodeGeneratorARMVIXL::VisitAbove(HAbove* comp) { + HandleCondition(comp); +} + +void LocationsBuilderARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) { + HandleCondition(comp); +} + +void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) { + HandleCondition(comp); +} + +void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { + // Will be generated at use site. +} + +void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { + // Will be generated at use site. +} + +void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { + // Will be generated at use site. +} + +void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { + // Will be generated at use site. +} + +void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) { + // Will be generated at use site. +} + +void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { + memory_barrier->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { + codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); +} + +void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) { + ret->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { + codegen_->GenerateFrameExit(); +} + +void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall); + locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType())); +} + +void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret ATTRIBUTE_UNUSED) { + codegen_->GenerateFrameExit(); +} + +void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + + // TODO(VIXL): TryDispatch + + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + + // TODO(VIXL): TryGenerateIntrinsicCode + + LocationSummary* locations = invoke->GetLocations(); + DCHECK(locations->HasTemps()); + codegen_->GenerateStaticOrDirectCall(invoke, locations->GetTemp(0)); + // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the + // previous instruction. + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { + InvokeDexCallingConventionVisitorARM calling_convention_visitor; + CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); +} + +void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { + // TODO(VIXL): TryDispatch + + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { + // TODO(VIXL): TryGenerateIntrinsicCode + + codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); + DCHECK(!codegen_->IsLeafMethod()); + // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the + // previous instruction. + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + switch (neg->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) { + LocationSummary* locations = neg->GetLocations(); + Location out = locations->Out(); + Location in = locations->InAt(0); + switch (neg->GetResultType()) { + case Primitive::kPrimInt: + __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0); + break; + + case Primitive::kPrimLong: + // out.lo = 0 - in.lo (and update the carry/borrow (C) flag) + __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0); + // We cannot emit an RSC (Reverse Subtract with Carry) + // instruction here, as it does not exist in the Thumb-2 + // instruction set. We use the following approach + // using SBC and SUB instead. + // + // out.hi = -C + __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out)); + // out.hi = out.hi - in.hi + __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in)); + break; + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + __ Vneg(OutputVRegister(neg), InputVRegisterAt(neg, 0)); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); + } +} + +void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { + Primitive::Type result_type = conversion->GetResultType(); + Primitive::Type input_type = conversion->GetInputType(); + DCHECK_NE(result_type, input_type); + + // The float-to-long, double-to-long and long-to-float type conversions + // rely on a call to the runtime. + LocationSummary::CallKind call_kind = + (((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble) + && result_type == Primitive::kPrimLong) + || (input_type == Primitive::kPrimLong && result_type == Primitive::kPrimFloat)) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kNoCall; + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); + + // The Java language does not allow treating boolean as an integral type but + // our bit representation makes it safe. + + switch (result_type) { + case Primitive::kPrimByte: + switch (input_type) { + case Primitive::kPrimLong: + // Type conversion from long to byte is a result of code transformations. + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + // Processing a Dex `int-to-byte' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case Primitive::kPrimShort: + switch (input_type) { + case Primitive::kPrimLong: + // Type conversion from long to short is a result of code transformations. + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimByte: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + // Processing a Dex `int-to-short' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case Primitive::kPrimInt: + switch (input_type) { + case Primitive::kPrimLong: + // Processing a Dex `long-to-int' instruction. + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case Primitive::kPrimFloat: + // Processing a Dex `float-to-int' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + + case Primitive::kPrimDouble: + // Processing a Dex `double-to-int' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case Primitive::kPrimLong: + switch (input_type) { + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + // Processing a Dex `int-to-long' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case Primitive::kPrimFloat: { + // Processing a Dex `float-to-long' instruction. + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(LocationFrom(r0, r1)); + break; + } + + case Primitive::kPrimDouble: { + // Processing a Dex `double-to-long' instruction. + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0), + calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(LocationFrom(r0, r1)); + break; + } + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case Primitive::kPrimChar: + switch (input_type) { + case Primitive::kPrimLong: + // Type conversion from long to char is a result of code transformations. + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + // Processing a Dex `int-to-char' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case Primitive::kPrimFloat: + switch (input_type) { + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + // Processing a Dex `int-to-float' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + + case Primitive::kPrimLong: { + // Processing a Dex `long-to-float' instruction. + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0), + calling_convention.GetRegisterAt(1))); + locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0))); + break; + } + + case Primitive::kPrimDouble: + // Processing a Dex `double-to-float' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + }; + break; + + case Primitive::kPrimDouble: + switch (input_type) { + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + // Processing a Dex `int-to-double' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + + case Primitive::kPrimLong: + // Processing a Dex `long-to-double' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + + case Primitive::kPrimFloat: + // Processing a Dex `float-to-double' instruction. + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + }; + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } +} + +void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { + LocationSummary* locations = conversion->GetLocations(); + Location out = locations->Out(); + Location in = locations->InAt(0); + Primitive::Type result_type = conversion->GetResultType(); + Primitive::Type input_type = conversion->GetInputType(); + DCHECK_NE(result_type, input_type); + switch (result_type) { + case Primitive::kPrimByte: + switch (input_type) { + case Primitive::kPrimLong: + // Type conversion from long to byte is a result of code transformations. + __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8); + break; + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + // Processing a Dex `int-to-byte' instruction. + __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case Primitive::kPrimShort: + switch (input_type) { + case Primitive::kPrimLong: + // Type conversion from long to short is a result of code transformations. + __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16); + break; + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimByte: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + // Processing a Dex `int-to-short' instruction. + __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case Primitive::kPrimInt: + switch (input_type) { + case Primitive::kPrimLong: + // Processing a Dex `long-to-int' instruction. + DCHECK(out.IsRegister()); + if (in.IsRegisterPair()) { + __ Mov(OutputRegister(conversion), LowRegisterFrom(in)); + } else if (in.IsDoubleStackSlot()) { + GetAssembler()->LoadFromOffset(kLoadWord, + OutputRegister(conversion), + sp, + in.GetStackIndex()); + } else { + DCHECK(in.IsConstant()); + DCHECK(in.GetConstant()->IsLongConstant()); + int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); + __ Mov(OutputRegister(conversion), static_cast<int32_t>(value)); + } + break; + + case Primitive::kPrimFloat: { + // Processing a Dex `float-to-int' instruction. + vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0)); + __ Vcvt(I32, F32, temp, InputSRegisterAt(conversion, 0)); + __ Vmov(OutputRegister(conversion), temp); + break; + } + + case Primitive::kPrimDouble: { + // Processing a Dex `double-to-int' instruction. + vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0)); + __ Vcvt(I32, F64, temp_s, FromLowSToD(LowSRegisterFrom(in))); + __ Vmov(OutputRegister(conversion), temp_s); + break; + } + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case Primitive::kPrimLong: + switch (input_type) { + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + // Processing a Dex `int-to-long' instruction. + DCHECK(out.IsRegisterPair()); + DCHECK(in.IsRegister()); + __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0)); + // Sign extension. + __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31); + break; + + case Primitive::kPrimFloat: + // Processing a Dex `float-to-long' instruction. + codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc()); + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + break; + + case Primitive::kPrimDouble: + // Processing a Dex `double-to-long' instruction. + codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc()); + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case Primitive::kPrimChar: + switch (input_type) { + case Primitive::kPrimLong: + // Type conversion from long to char is a result of code transformations. + __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16); + break; + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + // Processing a Dex `int-to-char' instruction. + __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case Primitive::kPrimFloat: + switch (input_type) { + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimChar: { + // Processing a Dex `int-to-float' instruction. + __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0)); + __ Vcvt(F32, I32, OutputSRegister(conversion), OutputSRegister(conversion)); + break; + } + + case Primitive::kPrimLong: + // Processing a Dex `long-to-float' instruction. + codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc()); + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + break; + + case Primitive::kPrimDouble: + // Processing a Dex `double-to-float' instruction. + __ Vcvt(F32, F64, OutputSRegister(conversion), FromLowSToD(LowSRegisterFrom(in))); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + }; + break; + + case Primitive::kPrimDouble: + switch (input_type) { + case Primitive::kPrimBoolean: + // Boolean input is a result of code transformations. + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimChar: { + // Processing a Dex `int-to-double' instruction. + __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0)); + __ Vcvt(F64, I32, FromLowSToD(LowSRegisterFrom(out)), LowSRegisterFrom(out)); + break; + } + + case Primitive::kPrimLong: { + // Processing a Dex `long-to-double' instruction. + vixl32::Register low = LowRegisterFrom(in); + vixl32::Register high = HighRegisterFrom(in); + + vixl32::SRegister out_s = LowSRegisterFrom(out); + vixl32::DRegister out_d = FromLowSToD(out_s); + + vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0)); + vixl32::DRegister temp_d = FromLowSToD(temp_s); + + vixl32::SRegister constant_s = LowSRegisterFrom(locations->GetTemp(1)); + vixl32::DRegister constant_d = FromLowSToD(constant_s); + + // temp_d = int-to-double(high) + __ Vmov(temp_s, high); + __ Vcvt(F64, I32, temp_d, temp_s); + // constant_d = k2Pow32EncodingForDouble + __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble)); + // out_d = unsigned-to-double(low) + __ Vmov(out_s, low); + __ Vcvt(F64, U32, out_d, out_s); + // out_d += temp_d * constant_d + __ Vmla(F64, out_d, temp_d, constant_d); + break; + } + + case Primitive::kPrimFloat: + // Processing a Dex `float-to-double' instruction. + __ Vcvt(F64, F32, FromLowSToD(LowSRegisterFrom(out)), InputSRegisterAt(conversion, 0)); + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + }; + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } +} + +void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); + switch (add->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/ + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + } + + default: + LOG(FATAL) << "Unexpected add type " << add->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) { + LocationSummary* locations = add->GetLocations(); + Location out = locations->Out(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + + switch (add->GetResultType()) { + case Primitive::kPrimInt: { + __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1)); + } + break; + + // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/ + case Primitive::kPrimLong: { + DCHECK(second.IsRegisterPair()); + __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second)); + __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second)); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1)); + break; + + default: + LOG(FATAL) << "Unexpected add type " << add->GetResultType(); + } +} + +void LocationsBuilderARMVIXL::VisitSub(HSub* sub) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall); + switch (sub->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/ + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + } + default: + LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) { + LocationSummary* locations = sub->GetLocations(); + Location out = locations->Out(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + switch (sub->GetResultType()) { + case Primitive::kPrimInt: { + __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1)); + break; + } + + // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/ + case Primitive::kPrimLong: { + DCHECK(second.IsRegisterPair()); + __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second)); + __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second)); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1)); + break; + + default: + LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); + } +} + +void LocationsBuilderARMVIXL::VisitMul(HMul* mul) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); + switch (mul->GetResultType()) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + } + + default: + LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) { + LocationSummary* locations = mul->GetLocations(); + Location out = locations->Out(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + switch (mul->GetResultType()) { + case Primitive::kPrimInt: { + __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1)); + break; + } + case Primitive::kPrimLong: { + vixl32::Register out_hi = HighRegisterFrom(out); + vixl32::Register out_lo = LowRegisterFrom(out); + vixl32::Register in1_hi = HighRegisterFrom(first); + vixl32::Register in1_lo = LowRegisterFrom(first); + vixl32::Register in2_hi = HighRegisterFrom(second); + vixl32::Register in2_lo = LowRegisterFrom(second); + + // Extra checks to protect caused by the existence of R1_R2. + // The algorithm is wrong if out.hi is either in1.lo or in2.lo: + // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2); + DCHECK_NE(out_hi.GetCode(), in1_lo.GetCode()); + DCHECK_NE(out_hi.GetCode(), in2_lo.GetCode()); + + // input: in1 - 64 bits, in2 - 64 bits + // output: out + // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo + // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32] + // parts: out.lo = (in1.lo * in2.lo)[31:0] + + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + // temp <- in1.lo * in2.hi + __ Mul(temp, in1_lo, in2_hi); + // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ Mla(out_hi, in1_hi, in2_lo, temp); + // out.lo <- (in1.lo * in2.lo)[31:0]; + __ Umull(out_lo, temp, in1_lo, in2_lo); + // out.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ Add(out_hi, out_hi, temp); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1)); + break; + + default: + LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + + Location second = instruction->GetLocations()->InAt(1); + DCHECK(second.IsConstant()); + + vixl32::Register out = OutputRegister(instruction); + vixl32::Register dividend = InputRegisterAt(instruction, 0); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ Mov(out, 0); + } else { + if (imm == 1) { + __ Mov(out, dividend); + } else { + __ Rsb(out, dividend, 0); + } + } +} + +void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + vixl32::Register out = OutputRegister(instruction); + vixl32::Register dividend = InputRegisterAt(instruction, 0); + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); + int ctz_imm = CTZ(abs_imm); + + if (ctz_imm == 1) { + __ Lsr(temp, dividend, 32 - ctz_imm); + } else { + __ Asr(temp, dividend, 31); + __ Lsr(temp, temp, 32 - ctz_imm); + } + __ Add(out, temp, dividend); + + if (instruction->IsDiv()) { + __ Asr(out, out, ctz_imm); + if (imm < 0) { + __ Rsb(out, out, 0); + } + } else { + __ Ubfx(out, out, 0, ctz_imm); + __ Sub(out, out, temp); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + vixl32::Register out = OutputRegister(instruction); + vixl32::Register dividend = InputRegisterAt(instruction, 0); + vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0)); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1)); + int64_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + + int64_t magic; + int shift; + CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + + __ Mov(temp1, magic); + __ Smull(temp2, temp1, dividend, temp1); + + if (imm > 0 && magic < 0) { + __ Add(temp1, temp1, dividend); + } else if (imm < 0 && magic > 0) { + __ Sub(temp1, temp1, dividend); + } + + if (shift != 0) { + __ Asr(temp1, temp1, shift); + } + + if (instruction->IsDiv()) { + __ Sub(out, temp1, Operand(temp1, vixl32::Shift(ASR), 31)); + } else { + __ Sub(temp1, temp1, Operand(temp1, vixl32::Shift(ASR), 31)); + // TODO: Strength reduction for mls. + __ Mov(temp2, imm); + __ Mls(out, temp1, temp2, dividend); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral( + HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + + Location second = instruction->GetLocations()->InAt(1); + DCHECK(second.IsConstant()); + + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + DivRemByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } +} + +void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) { + LocationSummary::CallKind call_kind = LocationSummary::kNoCall; + if (div->GetResultType() == Primitive::kPrimLong) { + // pLdiv runtime call. + call_kind = LocationSummary::kCallOnMainOnly; + } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) { + // sdiv will be replaced by other instruction sequence. + } else if (div->GetResultType() == Primitive::kPrimInt && + !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + // pIdivmod runtime call. + call_kind = LocationSummary::kCallOnMainOnly; + } + + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind); + + switch (div->GetResultType()) { + case Primitive::kPrimInt: { + if (div->InputAt(1)->IsConstant()) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + int32_t value = div->InputAt(1)->AsIntConstant()->GetValue(); + if (value == 1 || value == 0 || value == -1) { + // No temp register required. + } else { + locations->AddTemp(Location::RequiresRegister()); + if (!IsPowerOfTwo(AbsOrMin(value))) { + locations->AddTemp(Location::RequiresRegister()); + } + } + } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } else { + TODO_VIXL32(FATAL); + } + break; + } + case Primitive::kPrimLong: { + TODO_VIXL32(FATAL); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + } + + default: + LOG(FATAL) << "Unexpected div type " << div->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) { + Location rhs = div->GetLocations()->InAt(1); + + switch (div->GetResultType()) { + case Primitive::kPrimInt: { + if (rhs.IsConstant()) { + GenerateDivRemConstantIntegral(div); + } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1)); + } else { + TODO_VIXL32(FATAL); + } + break; + } + + case Primitive::kPrimLong: { + TODO_VIXL32(FATAL); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1)); + break; + + default: + LOG(FATAL) << "Unexpected div type " << div->GetResultType(); + } +} + +void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { + // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ + LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); + if (instruction->HasUses()) { + locations->SetOut(Location::SameAsFirstInput()); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { + DivZeroCheckSlowPathARMVIXL* slow_path = + new (GetGraph()->GetArena()) DivZeroCheckSlowPathARMVIXL(instruction); + codegen_->AddSlowPath(slow_path); + + LocationSummary* locations = instruction->GetLocations(); + Location value = locations->InAt(0); + + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: { + if (value.IsRegister()) { + __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); + } else { + DCHECK(value.IsConstant()) << value; + if (value.GetConstant()->AsIntConstant()->GetValue() == 0) { + __ B(slow_path->GetEntryLabel()); + } + } + break; + } + case Primitive::kPrimLong: { + if (value.IsRegisterPair()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value)); + __ B(eq, slow_path->GetEntryLabel()); + } else { + DCHECK(value.IsConstant()) << value; + if (value.GetConstant()->AsLongConstant()->GetValue() == 0) { + __ B(slow_path->GetEntryLabel()); + } + } + break; + } + default: + LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType(); + } +} + +void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); + vixl32::Register in = InputRegisterAt(ror, 0); + Location rhs = locations->InAt(1); + vixl32::Register out = OutputRegister(ror); + + if (rhs.IsConstant()) { + // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31], + // so map all rotations to a +ve. equivalent in that range. + // (e.g. left *or* right by -2 bits == 30 bits in the same direction.) + uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F; + if (rot) { + // Rotate, mapping left rotations to right equivalents if necessary. + // (e.g. left by 2 bits == right by 30.) + __ Ror(out, in, rot); + } else if (!out.Is(in)) { + __ Mov(out, in); + } + } else { + __ Ror(out, in, RegisterFrom(rhs)); + } +} + +// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer +// rotates by swapping input regs (effectively rotating by the first 32-bits of +// a larger rotation) or flipping direction (thus treating larger right/left +// rotations as sub-word sized rotations in the other direction) as appropriate. +void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); + vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); + vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); + Location rhs = locations->InAt(1); + vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out()); + vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out()); + + if (rhs.IsConstant()) { + uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant()); + // Map all rotations to +ve. equivalents on the interval [0,63]. + rot &= kMaxLongShiftDistance; + // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate + // logic below to a simple pair of binary orr. + // (e.g. 34 bits == in_reg swap + 2 bits right.) + if (rot >= kArmBitsPerWord) { + rot -= kArmBitsPerWord; + std::swap(in_reg_hi, in_reg_lo); + } + // Rotate, or mov to out for zero or word size rotations. + if (rot != 0u) { + __ Lsr(out_reg_hi, in_reg_hi, rot); + __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot)); + __ Lsr(out_reg_lo, in_reg_lo, rot); + __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot)); + } else { + __ Mov(out_reg_lo, in_reg_lo); + __ Mov(out_reg_hi, in_reg_hi); + } + } else { + vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0)); + vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1)); + vixl32::Label end; + vixl32::Label shift_by_32_plus_shift_right; + + __ And(shift_right, RegisterFrom(rhs), 0x1F); + __ Lsrs(shift_left, RegisterFrom(rhs), 6); + // TODO(VIXL): Check that flags are kept after "vixl32::LeaveFlags" enabled. + __ Rsb(shift_left, shift_right, kArmBitsPerWord); + __ B(cc, &shift_by_32_plus_shift_right); + + // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right). + // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right). + __ Lsl(out_reg_hi, in_reg_hi, shift_left); + __ Lsr(out_reg_lo, in_reg_lo, shift_right); + __ Add(out_reg_hi, out_reg_hi, out_reg_lo); + __ Lsl(out_reg_lo, in_reg_lo, shift_left); + __ Lsr(shift_left, in_reg_hi, shift_right); + __ Add(out_reg_lo, out_reg_lo, shift_left); + __ B(&end); + + __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right. + // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). + // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left). + __ Lsr(out_reg_hi, in_reg_hi, shift_right); + __ Lsl(out_reg_lo, in_reg_lo, shift_left); + __ Add(out_reg_hi, out_reg_hi, out_reg_lo); + __ Lsr(out_reg_lo, in_reg_lo, shift_right); + __ Lsl(shift_right, in_reg_hi, shift_left); + __ Add(out_reg_lo, out_reg_lo, shift_right); + + __ Bind(&end); + } +} + +void LocationsBuilderARMVIXL::VisitRor(HRor* ror) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); + switch (ror->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + if (ror->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) { + Primitive::Type type = ror->GetResultType(); + switch (type) { + case Primitive::kPrimInt: { + HandleIntegerRotate(ror); + break; + } + case Primitive::kPrimLong: { + HandleLongRotate(ror); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) { + DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); + + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); + + switch (op->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + if (op->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + // Make the output overlap, as it will be used to hold the masked + // second input. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + if (op->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); + // For simplicity, use kOutputOverlap even though we only require that low registers + // don't clash with high registers which the register allocator currently guarantees. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) { + DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); + + LocationSummary* locations = op->GetLocations(); + Location out = locations->Out(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + + Primitive::Type type = op->GetResultType(); + switch (type) { + case Primitive::kPrimInt: { + vixl32::Register out_reg = OutputRegister(op); + vixl32::Register first_reg = InputRegisterAt(op, 0); + if (second.IsRegister()) { + vixl32::Register second_reg = RegisterFrom(second); + // ARM doesn't mask the shift count so we need to do it ourselves. + __ And(out_reg, second_reg, kMaxIntShiftDistance); + if (op->IsShl()) { + __ Lsl(out_reg, first_reg, out_reg); + } else if (op->IsShr()) { + __ Asr(out_reg, first_reg, out_reg); + } else { + __ Lsr(out_reg, first_reg, out_reg); + } + } else { + int32_t cst = second.GetConstant()->AsIntConstant()->GetValue(); + uint32_t shift_value = cst & kMaxIntShiftDistance; + if (shift_value == 0) { // ARM does not support shifting with 0 immediate. + __ Mov(out_reg, first_reg); + } else if (op->IsShl()) { + __ Lsl(out_reg, first_reg, shift_value); + } else if (op->IsShr()) { + __ Asr(out_reg, first_reg, shift_value); + } else { + __ Lsr(out_reg, first_reg, shift_value); + } + } + break; + } + case Primitive::kPrimLong: { + vixl32::Register o_h = HighRegisterFrom(out); + vixl32::Register o_l = LowRegisterFrom(out); + + vixl32::Register high = HighRegisterFrom(first); + vixl32::Register low = LowRegisterFrom(first); + + if (second.IsRegister()) { + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + + vixl32::Register second_reg = RegisterFrom(second); + + if (op->IsShl()) { + __ And(o_l, second_reg, kMaxLongShiftDistance); + // Shift the high part + __ Lsl(o_h, high, o_l); + // Shift the low part and `or` what overflew on the high part + __ Rsb(temp, o_l, kArmBitsPerWord); + __ Lsr(temp, low, temp); + __ Orr(o_h, o_h, temp); + // If the shift is > 32 bits, override the high part + __ Subs(temp, o_l, kArmBitsPerWord); + { + AssemblerAccurateScope guard(GetVIXLAssembler(), + 3 * kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(pl); + __ lsl(pl, o_h, low, temp); + } + // Shift the low part + __ Lsl(o_l, low, o_l); + } else if (op->IsShr()) { + __ And(o_h, second_reg, kMaxLongShiftDistance); + // Shift the low part + __ Lsr(o_l, low, o_h); + // Shift the high part and `or` what underflew on the low part + __ Rsb(temp, o_h, kArmBitsPerWord); + __ Lsl(temp, high, temp); + __ Orr(o_l, o_l, temp); + // If the shift is > 32 bits, override the low part + __ Subs(temp, o_h, kArmBitsPerWord); + { + AssemblerAccurateScope guard(GetVIXLAssembler(), + 3 * kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(pl); + __ asr(pl, o_l, high, temp); + } + // Shift the high part + __ Asr(o_h, high, o_h); + } else { + __ And(o_h, second_reg, kMaxLongShiftDistance); + // same as Shr except we use `Lsr`s and not `Asr`s + __ Lsr(o_l, low, o_h); + __ Rsb(temp, o_h, kArmBitsPerWord); + __ Lsl(temp, high, temp); + __ Orr(o_l, o_l, temp); + __ Subs(temp, o_h, kArmBitsPerWord); + { + AssemblerAccurateScope guard(GetVIXLAssembler(), + 3 * kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(pl); + __ lsr(pl, o_l, high, temp); + } + __ Lsr(o_h, high, o_h); + } + } else { + // Register allocator doesn't create partial overlap. + DCHECK(!o_l.Is(high)); + DCHECK(!o_h.Is(low)); + int32_t cst = second.GetConstant()->AsIntConstant()->GetValue(); + uint32_t shift_value = cst & kMaxLongShiftDistance; + if (shift_value > 32) { + if (op->IsShl()) { + __ Lsl(o_h, low, shift_value - 32); + __ Mov(o_l, 0); + } else if (op->IsShr()) { + __ Asr(o_l, high, shift_value - 32); + __ Asr(o_h, high, 31); + } else { + __ Lsr(o_l, high, shift_value - 32); + __ Mov(o_h, 0); + } + } else if (shift_value == 32) { + if (op->IsShl()) { + __ Mov(o_h, low); + __ Mov(o_l, 0); + } else if (op->IsShr()) { + __ Mov(o_l, high); + __ Asr(o_h, high, 31); + } else { + __ Mov(o_l, high); + __ Mov(o_h, 0); + } + } else if (shift_value == 1) { + if (op->IsShl()) { + __ Lsls(o_l, low, 1); + __ Adc(o_h, high, high); + } else if (op->IsShr()) { + __ Asrs(o_h, high, 1); + __ Rrx(o_l, low); + } else { + __ Lsrs(o_h, high, 1); + __ Rrx(o_l, low); + } + } else { + DCHECK(2 <= shift_value && shift_value < 32) << shift_value; + if (op->IsShl()) { + __ Lsl(o_h, high, shift_value); + __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value)); + __ Lsl(o_l, low, shift_value); + } else if (op->IsShr()) { + __ Lsr(o_l, low, shift_value); + __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value)); + __ Asr(o_h, high, shift_value); + } else { + __ Lsr(o_l, low, shift_value); + __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value)); + __ Lsr(o_h, high, shift_value); + } + } + } + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void LocationsBuilderARMVIXL::VisitShr(HShr* shr) { + HandleShift(shr); +} + +void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) { + HandleShift(shr); +} + +void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) { + HandleShift(ushr); +} + +void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) { + HandleShift(ushr); +} + +void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + if (instruction->IsStringAlloc()) { + locations->AddTemp(LocationFrom(kMethodRegister)); + } else { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + } + locations->SetOut(LocationFrom(r0)); +} + +void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) { + // Note: if heap poisoning is enabled, the entry point takes cares + // of poisoning the reference. + if (instruction->IsStringAlloc()) { + // String is allocated through StringFactory. Call NewEmptyString entry point. + vixl32::Register temp = RegisterFrom(instruction->GetLocations()->GetTemp(0)); + MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize); + GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString)); + GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value()); + AssemblerAccurateScope aas(GetVIXLAssembler(), + kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ blx(lr); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + } else { + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + } +} + +void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetOut(LocationFrom(r0)); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); +} + +void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); + // Note: if heap poisoning is enabled, the entry point takes cares + // of poisoning the reference. + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); +} + +void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); + if (location.IsStackSlot()) { + location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } else if (location.IsDoubleStackSlot()) { + location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } + locations->SetOut(location); +} + +void InstructionCodeGeneratorARMVIXL::VisitParameterValue( + HParameterValue* instruction ATTRIBUTE_UNUSED) { + // Nothing to do, the parameter is already at its location. +} + +void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(LocationFrom(kMethodRegister)); +} + +void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod( + HCurrentMethod* instruction ATTRIBUTE_UNUSED) { + // Nothing to do, the method is already at its location. +} + +void LocationsBuilderARMVIXL::VisitNot(HNot* not_) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) { + LocationSummary* locations = not_->GetLocations(); + Location out = locations->Out(); + Location in = locations->InAt(0); + switch (not_->GetResultType()) { + case Primitive::kPrimInt: + __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0)); + break; + + case Primitive::kPrimLong: + __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in)); + __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in)); + break; + + default: + LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType(); + } +} + +void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Output overlaps because it is written before doing the low comparison. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1))); + locations->SetOut(Location::RequiresRegister()); + break; + } + default: + LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { + LocationSummary* locations = compare->GetLocations(); + vixl32::Register out = OutputRegister(compare); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + + vixl32::Label less, greater, done; + Primitive::Type type = compare->InputAt(0)->GetType(); + vixl32::Condition less_cond = vixl32::Condition(kNone); + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: { + // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags. + __ Mov(out, 0); + __ Cmp(RegisterFrom(left), RegisterFrom(right)); // Signed compare. + less_cond = lt; + break; + } + case Primitive::kPrimLong: { + __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare. + __ B(lt, &less); + __ B(gt, &greater); + // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags. + __ Mov(out, 0); + __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare. + less_cond = lo; + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + __ Mov(out, 0); + GenerateVcmp(compare); + // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS). + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + less_cond = ARMFPCondition(kCondLT, compare->IsGtBias()); + break; + } + default: + LOG(FATAL) << "Unexpected compare type " << type; + UNREACHABLE(); + } + + __ B(eq, &done); + __ B(less_cond, &less); + + __ Bind(&greater); + __ Mov(out, 1); + __ B(&done); + + __ Bind(&less); + __ Mov(out, -1); + + __ Bind(&done); +} + +void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { + locations->SetInAt(i, Location::Any()); + } + locations->SetOut(Location::Any()); +} + +void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + +void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) { + // TODO (ported from quick): revisit ARM barrier kinds. + DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings. + switch (kind) { + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kAnyAny: { + flavor = DmbOptions::ISH; + break; + } + case MemBarrierKind::kStoreStore: { + flavor = DmbOptions::ISHST; + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } + __ Dmb(flavor); +} + +void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr, + uint32_t offset, + vixl32::Register out_lo, + vixl32::Register out_hi) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + if (offset != 0) { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, addr, offset); + addr = temp; + } + __ Ldrexd(out_lo, out_hi, addr); +} + +void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr, + uint32_t offset, + vixl32::Register value_lo, + vixl32::Register value_hi, + vixl32::Register temp1, + vixl32::Register temp2, + HInstruction* instruction) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Label fail; + if (offset != 0) { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, addr, offset); + addr = temp; + } + __ Bind(&fail); + // We need a load followed by store. (The address used in a STREX instruction must + // be the same as the address in the most recently executed LDREX instruction.) + __ Ldrexd(temp1, temp2, addr); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ Strexd(temp1, value_lo, value_hi, addr); + __ Cbnz(temp1, &fail); +} + +void LocationsBuilderARMVIXL::HandleFieldSet( + HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + + Primitive::Type field_type = field_info.GetFieldType(); + if (Primitive::IsFloatingPointType(field_type)) { + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + + bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; + bool generate_volatile = field_info.IsVolatile() + && is_wide + && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); + // Temporary registers for the write barrier. + // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. + if (needs_write_barrier) { + locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. + locations->AddTemp(Location::RequiresRegister()); + } else if (generate_volatile) { + // ARM encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for ARM yet, and the assertion makes sure that we + // revisit this if we ever enable ARM encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + if (field_type == Primitive::kPrimDouble) { + // For doubles we need two more registers to copy the value. + locations->AddTemp(LocationFrom(r2)); + locations->AddTemp(LocationFrom(r3)); + } + } +} + +void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + bool value_can_be_null) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + + LocationSummary* locations = instruction->GetLocations(); + vixl32::Register base = InputRegisterAt(instruction, 0); + Location value = locations->InAt(1); + + bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); + + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: { + GetAssembler()->StoreToOffset(kStoreByte, RegisterFrom(value), base, offset); + break; + } + + case Primitive::kPrimShort: + case Primitive::kPrimChar: { + GetAssembler()->StoreToOffset(kStoreHalfword, RegisterFrom(value), base, offset); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + if (kPoisonHeapReferences && needs_write_barrier) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + DCHECK_EQ(field_type, Primitive::kPrimNot); + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + __ Mov(temp, RegisterFrom(value)); + GetAssembler()->PoisonHeapReference(temp); + GetAssembler()->StoreToOffset(kStoreWord, temp, base, offset); + } else { + GetAssembler()->StoreToOffset(kStoreWord, RegisterFrom(value), base, offset); + } + break; + } + + case Primitive::kPrimLong: { + if (is_volatile && !atomic_ldrd_strd) { + GenerateWideAtomicStore(base, + offset, + LowRegisterFrom(value), + HighRegisterFrom(value), + RegisterFrom(locations->GetTemp(0)), + RegisterFrom(locations->GetTemp(1)), + instruction); + } else { + GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + break; + } + + case Primitive::kPrimFloat: { + GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset); + break; + } + + case Primitive::kPrimDouble: { + vixl32::DRegister value_reg = FromLowSToD(LowSRegisterFrom(value)); + if (is_volatile && !atomic_ldrd_strd) { + vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0)); + vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1)); + + __ Vmov(value_reg_lo, value_reg_hi, value_reg); + + GenerateWideAtomicStore(base, + offset, + value_reg_lo, + value_reg_hi, + RegisterFrom(locations->GetTemp(2)), + RegisterFrom(locations->GetTemp(3)), + instruction); + } else { + GetAssembler()->StoreDToOffset(value_reg, base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + break; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << field_type; + UNREACHABLE(); + } + + // Longs and doubles are handled in the switch. + if (field_type != Primitive::kPrimLong && field_type != Primitive::kPrimDouble) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + vixl32::Register card = RegisterFrom(locations->GetTemp(1)); + codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null); + } + + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } +} + +void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::RequiresRegister()); + + bool volatile_for_double = field_info.IsVolatile() + && (field_info.GetFieldType() == Primitive::kPrimDouble) + && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + // The output overlaps in case of volatile long: we don't want the + // code generated by GenerateWideAtomicLoad to overwrite the + // object's location. Likewise, in the case of an object field get + // with read barriers enabled, we do not want the load to overwrite + // the object's location, as we need it to emit the read barrier. + bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) || + object_field_get_with_read_barrier; + + if (Primitive::IsFloatingPointType(instruction->GetType())) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetOut(Location::RequiresRegister(), + (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap)); + } + if (volatile_for_double) { + // ARM encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for ARM yet, and the assertion makes sure that we + // revisit this if we ever enable ARM encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } +} + +Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) { + DCHECK(Primitive::IsFloatingPointType(input->GetType())) << input->GetType(); + if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) || + (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) { + return Location::ConstantLocation(input->AsConstant()); + } else { + return Location::RequiresFpuRegister(); + } +} + +Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant, + Opcode opcode) { + DCHECK(!Primitive::IsFloatingPointType(constant->GetType())); + if (constant->IsConstant() && + CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) { + return Location::ConstantLocation(constant->AsConstant()); + } + return Location::RequiresRegister(); +} + +bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, + Opcode opcode) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst)); + if (Primitive::Is64BitType(input_cst->GetType())) { + Opcode high_opcode = opcode; + SetCc low_set_cc = kCcDontCare; + switch (opcode) { + case SUB: + // Flip the operation to an ADD. + value = -value; + opcode = ADD; + FALLTHROUGH_INTENDED; + case ADD: + if (Low32Bits(value) == 0u) { + return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare); + } + high_opcode = ADC; + low_set_cc = kCcSet; + break; + default: + break; + } + return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) && + CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare); + } else { + return CanEncodeConstantAsImmediate(Low32Bits(value), opcode); + } +} + +// TODO(VIXL): Replace art::arm::SetCc` with `vixl32::FlagsUpdate after flags set optimization +// enabled. +bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(uint32_t value, + Opcode opcode, + SetCc set_cc) { + ArmVIXLAssembler* assembler = codegen_->GetAssembler(); + if (assembler->ShifterOperandCanHold(opcode, value, set_cc)) { + return true; + } + Opcode neg_opcode = kNoOperand; + switch (opcode) { + case AND: neg_opcode = BIC; value = ~value; break; + case ORR: neg_opcode = ORN; value = ~value; break; + case ADD: neg_opcode = SUB; value = -value; break; + case ADC: neg_opcode = SBC; value = ~value; break; + case SUB: neg_opcode = ADD; value = -value; break; + case SBC: neg_opcode = ADC; value = ~value; break; + default: + return false; + } + return assembler->ShifterOperandCanHold(neg_opcode, value, set_cc); +} + +void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + LocationSummary* locations = instruction->GetLocations(); + vixl32::Register base = InputRegisterAt(instruction, 0); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + + switch (field_type) { + case Primitive::kPrimBoolean: + GetAssembler()->LoadFromOffset(kLoadUnsignedByte, RegisterFrom(out), base, offset); + break; + + case Primitive::kPrimByte: + GetAssembler()->LoadFromOffset(kLoadSignedByte, RegisterFrom(out), base, offset); + break; + + case Primitive::kPrimShort: + GetAssembler()->LoadFromOffset(kLoadSignedHalfword, RegisterFrom(out), base, offset); + break; + + case Primitive::kPrimChar: + GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, RegisterFrom(out), base, offset); + break; + + case Primitive::kPrimInt: + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset); + break; + + case Primitive::kPrimNot: { + // /* HeapReference<Object> */ out = *(base + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + TODO_VIXL32(FATAL); + } else { + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset); + // TODO(VIXL): Scope to guarantee the position immediately after the load. + codegen_->MaybeRecordImplicitNullCheck(instruction); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, locations->InAt(0), offset); + } + break; + } + + case Primitive::kPrimLong: + if (is_volatile && !atomic_ldrd_strd) { + GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out)); + } else { + GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset); + } + break; + + case Primitive::kPrimFloat: + GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset); + break; + + case Primitive::kPrimDouble: { + vixl32::DRegister out_dreg = FromLowSToD(LowSRegisterFrom(out)); + if (is_volatile && !atomic_ldrd_strd) { + vixl32::Register lo = RegisterFrom(locations->GetTemp(0)); + vixl32::Register hi = RegisterFrom(locations->GetTemp(1)); + GenerateWideAtomicLoad(base, offset, lo, hi); + // TODO(VIXL): Do we need to be immediately after the ldrexd instruction? If so we need a + // scope. + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ Vmov(out_dreg, lo, hi); + } else { + GetAssembler()->LoadDFromOffset(out_dreg, base, offset); + // TODO(VIXL): Scope to guarantee the position immediately after the load. + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + break; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << field_type; + UNREACHABLE(); + } + + if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) { + // Potential implicit null checks, in the case of reference or + // double fields, are handled in the previous switch statement. + } else { + // Address cases other than reference and double that may require an implicit null check. + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (is_volatile) { + if (field_type == Primitive::kPrimNot) { + // Memory barriers, in the case of references, are also handled + // in the previous switch statement. + } else { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } +} + +void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); +} + +void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) { + // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ + LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + if (instruction->HasUses()) { + locations->SetOut(Location::SameAsFirstInput()); + } +} + +void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (CanMoveNullCheckToUser(instruction)) { + return; + } + + UseScratchRegisterScope temps(GetVIXLAssembler()); + AssemblerAccurateScope aas(GetVIXLAssembler(), + kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0))); + RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) { + NullCheckSlowPathARMVIXL* slow_path = + new (GetGraph()->GetArena()) NullCheckSlowPathARMVIXL(instruction); + AddSlowPath(slow_path); + __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); +} + +void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) { + codegen_->GenerateNullCheck(instruction); +} + +void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) { + uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); + vixl32::Register obj = InputRegisterAt(instruction, 0); + vixl32::Register out = OutputRegister(instruction); + GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // TODO(VIXL): https://android-review.googlesource.com/#/c/272625/ +} + +void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, + vixl32::Register card, + vixl32::Register object, + vixl32::Register value, + bool can_be_null) { + vixl32::Label is_null; + if (can_be_null) { + __ Cbz(value, &is_null); + } + GetAssembler()->LoadFromOffset( + kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value()); + __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); + __ Strb(card, MemOperand(card, temp)); + if (can_be_null) { + __ Bind(&is_null); + } +} + +void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) { + codegen_->GetMoveResolver()->EmitNativeCode(instruction); +} + +void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) { + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ and related. +} + +void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) { + HBasicBlock* block = instruction->GetBlock(); + if (block->GetLoopInformation() != nullptr) { + DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); + // The back edge will generate the suspend check. + return; + } + if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { + // The goto will generate the suspend check. + return; + } + GenerateSuspendCheck(instruction, nullptr); +} + +void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction, + HBasicBlock* successor) { + SuspendCheckSlowPathARMVIXL* slow_path = + down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARMVIXL(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + GetAssembler()->LoadFromOffset( + kLoadUnsignedHalfword, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value()); + if (successor == nullptr) { + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetReturnLabel()); + } else { + __ Cbz(temp, codegen_->GetLabelOf(successor)); + __ B(slow_path->GetEntryLabel()); + } +} + +ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const { + return codegen_->GetAssembler(); +} + +void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); + MoveOperands* move = moves_[index]; + Location source = move->GetSource(); + Location destination = move->GetDestination(); + + if (source.IsRegister()) { + if (destination.IsRegister()) { + __ Mov(RegisterFrom(destination), RegisterFrom(source)); + } else if (destination.IsFpuRegister()) { + __ Vmov(SRegisterFrom(destination), RegisterFrom(source)); + } else { + DCHECK(destination.IsStackSlot()); + GetAssembler()->StoreToOffset(kStoreWord, + RegisterFrom(source), + sp, + destination.GetStackIndex()); + } + } else if (source.IsStackSlot()) { + if (destination.IsRegister()) { + GetAssembler()->LoadFromOffset(kLoadWord, + RegisterFrom(destination), + sp, + source.GetStackIndex()); + } else if (destination.IsFpuRegister()) { + GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex()); + } else { + DCHECK(destination.IsStackSlot()); + vixl32::Register temp = temps.Acquire(); + GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex()); + GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); + } + } else if (source.IsFpuRegister()) { + if (destination.IsRegister()) { + TODO_VIXL32(FATAL); + } else if (destination.IsFpuRegister()) { + __ Vmov(SRegisterFrom(destination), SRegisterFrom(source)); + } else { + DCHECK(destination.IsStackSlot()); + GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex()); + } + } else if (source.IsDoubleStackSlot()) { + if (destination.IsDoubleStackSlot()) { + vixl32::DRegister temp = temps.AcquireD(); + GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex()); + GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex()); + } else if (destination.IsRegisterPair()) { + DCHECK(ExpectedPairLayout(destination)); + GetAssembler()->LoadFromOffset( + kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex()); + } else { + DCHECK(destination.IsFpuRegisterPair()) << destination; + GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex()); + } + } else if (source.IsRegisterPair()) { + if (destination.IsRegisterPair()) { + __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source)); + __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source)); + } else if (destination.IsFpuRegisterPair()) { + __ Vmov(FromLowSToD(LowSRegisterFrom(destination)), + LowRegisterFrom(source), + HighRegisterFrom(source)); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + DCHECK(ExpectedPairLayout(source)); + GetAssembler()->StoreToOffset(kStoreWordPair, + LowRegisterFrom(source), + sp, + destination.GetStackIndex()); + } + } else if (source.IsFpuRegisterPair()) { + if (destination.IsRegisterPair()) { + TODO_VIXL32(FATAL); + } else if (destination.IsFpuRegisterPair()) { + __ Vmov(DRegisterFrom(destination), DRegisterFrom(source)); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex()); + } + } else { + DCHECK(source.IsConstant()) << source; + HConstant* constant = source.GetConstant(); + if (constant->IsIntConstant() || constant->IsNullConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(constant); + if (destination.IsRegister()) { + __ Mov(RegisterFrom(destination), value); + } else { + DCHECK(destination.IsStackSlot()); + vixl32::Register temp = temps.Acquire(); + __ Mov(temp, value); + GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); + } + } else if (constant->IsLongConstant()) { + int64_t value = constant->AsLongConstant()->GetValue(); + if (destination.IsRegisterPair()) { + __ Mov(LowRegisterFrom(destination), Low32Bits(value)); + __ Mov(HighRegisterFrom(destination), High32Bits(value)); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + vixl32::Register temp = temps.Acquire(); + __ Mov(temp, Low32Bits(value)); + GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); + __ Mov(temp, High32Bits(value)); + GetAssembler()->StoreToOffset(kStoreWord, + temp, + sp, + destination.GetHighStackIndex(kArmWordSize)); + } + } else if (constant->IsDoubleConstant()) { + double value = constant->AsDoubleConstant()->GetValue(); + if (destination.IsFpuRegisterPair()) { + __ Vmov(FromLowSToD(LowSRegisterFrom(destination)), value); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + uint64_t int_value = bit_cast<uint64_t, double>(value); + vixl32::Register temp = temps.Acquire(); + __ Mov(temp, Low32Bits(int_value)); + GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); + __ Mov(temp, High32Bits(int_value)); + GetAssembler()->StoreToOffset(kStoreWord, + temp, + sp, + destination.GetHighStackIndex(kArmWordSize)); + } + } else { + DCHECK(constant->IsFloatConstant()) << constant->DebugName(); + float value = constant->AsFloatConstant()->GetValue(); + if (destination.IsFpuRegister()) { + __ Vmov(SRegisterFrom(destination), value); + } else { + DCHECK(destination.IsStackSlot()); + vixl32::Register temp = temps.Acquire(); + __ Mov(temp, bit_cast<int32_t, float>(value)); + GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); + } + } + } +} + +void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) { + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Mov(temp, reg); + GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem); + GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem); +} + +void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) { + // TODO(VIXL32): Double check the performance of this implementation. + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + vixl32::SRegister temp_s = temps.AcquireS(); + + __ Ldr(temp, MemOperand(sp, mem1)); + __ Vldr(temp_s, MemOperand(sp, mem2)); + __ Str(temp, MemOperand(sp, mem2)); + __ Vstr(temp_s, MemOperand(sp, mem1)); +} + +void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) { + MoveOperands* move = moves_[index]; + Location source = move->GetSource(); + Location destination = move->GetDestination(); + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); + + if (source.IsRegister() && destination.IsRegister()) { + vixl32::Register temp = temps.Acquire(); + DCHECK(!RegisterFrom(source).Is(temp)); + DCHECK(!RegisterFrom(destination).Is(temp)); + __ Mov(temp, RegisterFrom(destination)); + __ Mov(RegisterFrom(destination), RegisterFrom(source)); + __ Mov(RegisterFrom(source), temp); + } else if (source.IsRegister() && destination.IsStackSlot()) { + Exchange(RegisterFrom(source), destination.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsRegister()) { + Exchange(RegisterFrom(destination), source.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsStackSlot()) { + TODO_VIXL32(FATAL); + } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { + TODO_VIXL32(FATAL); + } else if (source.IsRegisterPair() && destination.IsRegisterPair()) { + vixl32::DRegister temp = temps.AcquireD(); + __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source)); + __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination)); + __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination)); + __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp); + } else if (source.IsRegisterPair() || destination.IsRegisterPair()) { + vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination); + int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex(); + DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination)); + vixl32::DRegister temp = temps.AcquireD(); + __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1)); + GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem); + GetAssembler()->StoreDToOffset(temp, sp, mem); + } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) { + TODO_VIXL32(FATAL); + } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) { + TODO_VIXL32(FATAL); + } else if (source.IsFpuRegister() || destination.IsFpuRegister()) { + TODO_VIXL32(FATAL); + } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { + vixl32::DRegister temp1 = temps.AcquireD(); + vixl32::DRegister temp2 = temps.AcquireD(); + __ Vldr(temp1, MemOperand(sp, source.GetStackIndex())); + __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex())); + __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex())); + __ Vstr(temp2, MemOperand(sp, source.GetStackIndex())); + } else { + LOG(FATAL) << "Unimplemented" << source << " <-> " << destination; + } +} + +void ParallelMoveResolverARMVIXL::SpillScratch(int reg ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); +} + +void ParallelMoveResolverARMVIXL::RestoreScratch(int reg ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); +} + +// Check if the desired_class_load_kind is supported. If it is, return it, +// otherwise return a fall-back kind that should be used instead. +HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind ATTRIBUTE_UNUSED) { + // TODO(VIXL): Implement optimized code paths. + return HLoadClass::LoadKind::kDexCacheViaMethod; +} + +void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { + if (cls->NeedsAccessCheck()) { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + LocationFrom(calling_convention.GetRegisterAt(0)), + LocationFrom(r0), + /* code_generator_supports_read_barrier */ true); + return; + } + + // TODO(VIXL): read barrier code. + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kReferrersClass || + load_kind == HLoadClass::LoadKind::kDexCacheViaMethod || + load_kind == HLoadClass::LoadKind::kDexCachePcRelative) { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { + LocationSummary* locations = cls->GetLocations(); + if (cls->NeedsAccessCheck()) { + codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); + codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc()); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); + return; + } + + Location out_loc = locations->Out(); + vixl32::Register out = OutputRegister(cls); + + // TODO(VIXL): read barrier code. + bool generate_null_check = false; + switch (cls->GetLoadKind()) { + case HLoadClass::LoadKind::kReferrersClass: { + DCHECK(!cls->CanCallRuntime()); + DCHECK(!cls->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + vixl32::Register current_method = InputRegisterAt(cls, 0); + GenerateGcRootFieldLoad(cls, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + kEmitCompilerReadBarrier); + break; + } + case HLoadClass::LoadKind::kDexCacheViaMethod: { + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + vixl32::Register current_method = InputRegisterAt(cls, 0); + const int32_t resolved_types_offset = + ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value(); + GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset); + // /* GcRoot<mirror::Class> */ out = out[type_index] + size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + GenerateGcRootFieldLoad(cls, out_loc, out, offset, kEmitCompilerReadBarrier); + generate_null_check = !cls->IsInDexCache(); + break; + } + default: + TODO_VIXL32(FATAL); + } + + if (generate_null_check || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + LoadClassSlowPathARMVIXL* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (generate_null_check) { + __ Cbz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } + } +} + +void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); + locations->SetInAt(0, Location::RequiresRegister()); + if (check->HasUses()) { + locations->SetOut(Location::SameAsFirstInput()); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) { + // We assume the class is not null. + LoadClassSlowPathARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), + check, + check->GetDexPc(), + /* do_clinit */ true); + codegen_->AddSlowPath(slow_path); + GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); +} + +void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( + LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + GetAssembler()->LoadFromOffset(kLoadWord, + temp, + class_reg, + mirror::Class::StatusOffset().Int32Value()); + __ Cmp(temp, mirror::Class::kStatusInitialized); + __ B(lt, slow_path->GetEntryLabel()); + // Even if the initialized flag is set, we may be in a situation where caches are not synced + // properly. Therefore, we do a memory fence. + __ Dmb(ISH); + __ Bind(slow_path->GetExitLabel()); +} + +// Check if the desired_string_load_kind is supported. If it is, return it, +// otherwise return a fall-back kind that should be used instead. +HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( + HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) { + // TODO(VIXL): Implement optimized code paths. For now we always use the simpler fallback code. + return HLoadString::LoadKind::kDexCacheViaMethod; +} + +void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { + LocationSummary::CallKind call_kind = load->NeedsEnvironment() + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + + // TODO(VIXL): Implement optimized code paths. + // See InstructionCodeGeneratorARMVIXL::VisitLoadString. + HLoadString::LoadKind load_kind = load->GetLoadKind(); + if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + locations->SetInAt(0, Location::RequiresRegister()); + // TODO(VIXL): Use InvokeRuntimeCallingConventionARMVIXL instead. + locations->SetOut(LocationFrom(r0)); + } else { + locations->SetOut(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) { + // TODO(VIXL): Implement optimized code paths. + // We implemented the simplest solution to get first ART tests passing, we deferred the + // optimized path until later, we should implement it using ARM64 implementation as a + // reference. The same related to LocationsBuilderARMVIXL::VisitLoadString. + + // TODO: Re-add the compiler code to do string dex cache lookup again. + DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex()); + codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); +} + +static int32_t GetExceptionTlsOffset() { + return Thread::ExceptionOffset<kArmPointerSize>().Int32Value(); +} + +void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) { + vixl32::Register out = OutputRegister(load); + GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset()); +} + + +void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) { + new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); +} + +void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Mov(temp, 0); + GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset()); +} + +void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); +} + +void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) { + codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); +} + +void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) { + HandleBitwiseOperation(instruction, AND); +} + +void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) { + HandleBitwiseOperation(instruction, ORR); +} + +void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) { + HandleBitwiseOperation(instruction, EOR); +} + +void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + DCHECK(instruction->GetResultType() == Primitive::kPrimInt + || instruction->GetResultType() == Primitive::kPrimLong); + // Note: GVN reorders commutative operations to have the constant on the right hand side. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode)); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) { + HandleBitwiseOperation(instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) { + HandleBitwiseOperation(instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) { + HandleBitwiseOperation(instruction); +} + +// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl. +void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out, + vixl32::Register first, + uint32_t value) { + // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier). + if (value == 0xffffffffu) { + if (!out.Is(first)) { + __ Mov(out, first); + } + return; + } + if (value == 0u) { + __ Mov(out, 0); + return; + } + if (GetAssembler()->ShifterOperandCanHold(AND, value)) { + __ And(out, first, value); + } else { + DCHECK(GetAssembler()->ShifterOperandCanHold(BIC, ~value)); + __ Bic(out, first, ~value); + } +} + +// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl. +void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out, + vixl32::Register first, + uint32_t value) { + // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier). + if (value == 0u) { + if (!out.Is(first)) { + __ Mov(out, first); + } + return; + } + if (value == 0xffffffffu) { + __ Mvn(out, 0); + return; + } + if (GetAssembler()->ShifterOperandCanHold(ORR, value)) { + __ Orr(out, first, value); + } else { + DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value)); + __ Orn(out, first, ~value); + } +} + +// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl. +void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out, + vixl32::Register first, + uint32_t value) { + // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier). + if (value == 0u) { + if (!out.Is(first)) { + __ Mov(out, first); + } + return; + } + __ Eor(out, first, value); +} + +void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location out = locations->Out(); + + if (second.IsConstant()) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); + uint32_t value_low = Low32Bits(value); + if (instruction->GetResultType() == Primitive::kPrimInt) { + vixl32::Register first_reg = InputRegisterAt(instruction, 0); + vixl32::Register out_reg = OutputRegister(instruction); + if (instruction->IsAnd()) { + GenerateAndConst(out_reg, first_reg, value_low); + } else if (instruction->IsOr()) { + GenerateOrrConst(out_reg, first_reg, value_low); + } else { + DCHECK(instruction->IsXor()); + GenerateEorConst(out_reg, first_reg, value_low); + } + } else { + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + uint32_t value_high = High32Bits(value); + vixl32::Register first_low = LowRegisterFrom(first); + vixl32::Register first_high = HighRegisterFrom(first); + vixl32::Register out_low = LowRegisterFrom(out); + vixl32::Register out_high = HighRegisterFrom(out); + if (instruction->IsAnd()) { + GenerateAndConst(out_low, first_low, value_low); + GenerateAndConst(out_high, first_high, value_high); + } else if (instruction->IsOr()) { + GenerateOrrConst(out_low, first_low, value_low); + GenerateOrrConst(out_high, first_high, value_high); + } else { + DCHECK(instruction->IsXor()); + GenerateEorConst(out_low, first_low, value_low); + GenerateEorConst(out_high, first_high, value_high); + } + } + return; + } + + if (instruction->GetResultType() == Primitive::kPrimInt) { + vixl32::Register first_reg = InputRegisterAt(instruction, 0); + vixl32::Register second_reg = InputRegisterAt(instruction, 1); + vixl32::Register out_reg = OutputRegister(instruction); + if (instruction->IsAnd()) { + __ And(out_reg, first_reg, second_reg); + } else if (instruction->IsOr()) { + __ Orr(out_reg, first_reg, second_reg); + } else { + DCHECK(instruction->IsXor()); + __ Eor(out_reg, first_reg, second_reg); + } + } else { + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + vixl32::Register first_low = LowRegisterFrom(first); + vixl32::Register first_high = HighRegisterFrom(first); + vixl32::Register second_low = LowRegisterFrom(second); + vixl32::Register second_high = HighRegisterFrom(second); + vixl32::Register out_low = LowRegisterFrom(out); + vixl32::Register out_high = HighRegisterFrom(out); + if (instruction->IsAnd()) { + __ And(out_low, first_low, second_low); + __ And(out_high, first_high, second_high); + } else if (instruction->IsOr()) { + __ Orr(out_low, first_low, second_low); + __ Orr(out_high, first_high, second_high); + } else { + DCHECK(instruction->IsXor()); + __ Eor(out_low, first_low, second_low); + __ Eor(out_high, first_high, second_high); + } + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( + HInstruction* instruction ATTRIBUTE_UNUSED, + Location root, + vixl32::Register obj, + uint32_t offset, + bool requires_read_barrier) { + vixl32::Register root_reg = RegisterFrom(root); + if (requires_read_barrier) { + TODO_VIXL32(FATAL); + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + +void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED, + Location out, + Location ref ATTRIBUTE_UNUSED, + Location obj ATTRIBUTE_UNUSED, + uint32_t offset ATTRIBUTE_UNUSED, + Location index ATTRIBUTE_UNUSED) { + if (kEmitCompilerReadBarrier) { + DCHECK(!kUseBakerReadBarrier); + TODO_VIXL32(FATAL); + } else if (kPoisonHeapReferences) { + GetAssembler()->UnpoisonHeapReference(RegisterFrom(out)); + } +} + +// Check if the desired_dispatch_info is supported. If it is, return it, +// otherwise return a fall-back info that should be used instead. +HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch( + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info ATTRIBUTE_UNUSED, + HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + // TODO(VIXL): Implement optimized code paths. + return { + HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, + HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, + 0u, + 0u + }; +} + +vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter( + HInvokeStaticOrDirect* invoke, vixl32::Register temp) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + if (!invoke->GetLocations()->Intrinsified()) { + return RegisterFrom(location); + } + // For intrinsics we allow any location, so it may be on the stack. + if (!location.IsRegister()) { + GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, location.GetStackIndex()); + return temp; + } + // For register locations, check if the register was saved. If so, get it from the stack. + // Note: There is a chance that the register was saved but not overwritten, so we could + // save one load. However, since this is just an intrinsic slow path we prefer this + // simple and more robust approach rather that trying to determine if that's the case. + SlowPathCode* slow_path = GetCurrentSlowPath(); + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(RegisterFrom(location).GetCode())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(RegisterFrom(location).GetCode()); + GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, stack_offset); + return temp; + } + return RegisterFrom(location); +} + +void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp) { + Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. + vixl32::Register temp_reg = RegisterFrom(temp); + + switch (invoke->GetMethodLoadKind()) { + case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { + uint32_t offset = + GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); + // temp = thread->string_init_entrypoint + GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, tr, offset); + break; + } + case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + vixl32::Register method_reg; + if (current_method.IsRegister()) { + method_reg = RegisterFrom(current_method); + } else { + TODO_VIXL32(FATAL); + } + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; + GetAssembler()->LoadFromOffset( + kLoadWord, + temp_reg, + method_reg, + ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); + // temp = temp[index_in_cache]; + // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. + uint32_t index_in_cache = invoke->GetDexMethodIndex(); + GetAssembler()->LoadFromOffset( + kLoadWord, temp_reg, temp_reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); + break; + } + default: + TODO_VIXL32(FATAL); + } + + // TODO(VIXL): Support `CodePtrLocation` values other than `kCallArtMethod`. + if (invoke->GetCodePtrLocation() != HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod) { + TODO_VIXL32(FATAL); + } + + // LR = callee_method->entry_point_from_quick_compiled_code_ + GetAssembler()->LoadFromOffset( + kLoadWord, + lr, + RegisterFrom(callee_method), + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + // LR() + __ Blx(lr); + + DCHECK(!IsLeafMethod()); +} + +void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { + vixl32::Register temp = RegisterFrom(temp_location); + uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( + invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); + + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConventionARMVIXL calling_convention; + vixl32::Register receiver = calling_convention.GetRegisterAt(0); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + // /* HeapReference<Class> */ temp = receiver->klass_ + GetAssembler()->LoadFromOffset(kLoadWord, temp, receiver, class_offset); + MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // temp = temp->GetMethodAt(method_offset); + uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArmPointerSize).Int32Value(); + GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset); + // LR = temp->GetEntryPoint(); + GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point); + // LR(); + __ Blx(lr); +} + +// Copy the result of a call into the given target. +void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, + Primitive::Type type ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); +} + +#undef __ +#undef QUICK_ENTRY_POINT +#undef TODO_VIXL32 + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h new file mode 100644 index 0000000000..02bf960e18 --- /dev/null +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -0,0 +1,566 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_ +#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_ + +#include "code_generator_arm.h" +#include "utils/arm/assembler_arm_vixl.h" + +// TODO(VIXL): make vixl clean wrt -Wshadow. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#include "aarch32/constants-aarch32.h" +#include "aarch32/instructions-aarch32.h" +#include "aarch32/macro-assembler-aarch32.h" +#pragma GCC diagnostic pop + +// True if VIXL32 should be used for codegen on ARM. +#ifdef ART_USE_VIXL_ARM_BACKEND +static constexpr bool kArmUseVIXL32 = true; +#else +static constexpr bool kArmUseVIXL32 = false; +#endif + +namespace art { +namespace arm { + +static const vixl::aarch32::Register kParameterCoreRegistersVIXL[] = { + vixl::aarch32::r1, + vixl::aarch32::r2, + vixl::aarch32::r3 +}; +static const size_t kParameterCoreRegistersLengthVIXL = arraysize(kParameterCoreRegisters); +static const vixl::aarch32::SRegister kParameterFpuRegistersVIXL[] = { + vixl::aarch32::s0, + vixl::aarch32::s1, + vixl::aarch32::s2, + vixl::aarch32::s3, + vixl::aarch32::s4, + vixl::aarch32::s5, + vixl::aarch32::s6, + vixl::aarch32::s7, + vixl::aarch32::s8, + vixl::aarch32::s9, + vixl::aarch32::s10, + vixl::aarch32::s11, + vixl::aarch32::s12, + vixl::aarch32::s13, + vixl::aarch32::s14, + vixl::aarch32::s15 +}; +static const size_t kParameterFpuRegistersLengthVIXL = arraysize(kParameterFpuRegisters); + +static const vixl::aarch32::Register kMethodRegister = vixl::aarch32::r0; + +static const vixl::aarch32::Register kCoreAlwaysSpillRegister = vixl::aarch32::r5; + +// Callee saves core registers r5, r6, r7, r8, r10, r11, and lr. +static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::RegisterList::Union( + vixl::aarch32::RegisterList(vixl::aarch32::r5, + vixl::aarch32::r6, + vixl::aarch32::r7, + vixl::aarch32::r8), + vixl::aarch32::RegisterList(vixl::aarch32::r10, + vixl::aarch32::r11, + vixl::aarch32::lr)); + +// Callee saves FP registers s16 to s31 inclusive. +static const vixl::aarch32::SRegisterList kFpuCalleeSaves = + vixl::aarch32::SRegisterList(vixl::aarch32::s16, 16); + +static const vixl::aarch32::Register kRuntimeParameterCoreRegistersVIXL[] = { + vixl::aarch32::r0, + vixl::aarch32::r1, + vixl::aarch32::r2, + vixl::aarch32::r3 +}; +static const size_t kRuntimeParameterCoreRegistersLengthVIXL = + arraysize(kRuntimeParameterCoreRegisters); +static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = { + vixl::aarch32::s0, + vixl::aarch32::s1, + vixl::aarch32::s2, + vixl::aarch32::s3 +}; +static const size_t kRuntimeParameterFpuRegistersLengthVIXL = + arraysize(kRuntimeParameterFpuRegisters); + +class LoadClassSlowPathARMVIXL; + +#define FOR_EACH_IMPLEMENTED_INSTRUCTION(M) \ + M(Above) \ + M(AboveOrEqual) \ + M(Add) \ + M(And) \ + M(ArrayLength) \ + M(Below) \ + M(BelowOrEqual) \ + M(ClearException) \ + M(ClinitCheck) \ + M(Compare) \ + M(CurrentMethod) \ + M(Div) \ + M(DivZeroCheck) \ + M(DoubleConstant) \ + M(Equal) \ + M(Exit) \ + M(FloatConstant) \ + M(Goto) \ + M(GreaterThan) \ + M(GreaterThanOrEqual) \ + M(If) \ + M(InstanceFieldGet) \ + M(InstanceFieldSet) \ + M(IntConstant) \ + M(InvokeStaticOrDirect) \ + M(InvokeVirtual) \ + M(LessThan) \ + M(LessThanOrEqual) \ + M(LoadClass) \ + M(LoadException) \ + M(LoadString) \ + M(LongConstant) \ + M(MemoryBarrier) \ + M(Mul) \ + M(Neg) \ + M(NewArray) \ + M(NewInstance) \ + M(Not) \ + M(NotEqual) \ + M(NullCheck) \ + M(NullConstant) \ + M(Or) \ + M(ParallelMove) \ + M(ParameterValue) \ + M(Phi) \ + M(Return) \ + M(ReturnVoid) \ + M(Ror) \ + M(Select) \ + M(Shl) \ + M(Shr) \ + M(StaticFieldGet) \ + M(Sub) \ + M(SuspendCheck) \ + M(Throw) \ + M(TryBoundary) \ + M(TypeConversion) \ + M(UShr) \ + M(Xor) \ + +// TODO: Remove once the VIXL32 backend is implemented completely. +#define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \ + M(ArrayGet) \ + M(ArraySet) \ + M(BooleanNot) \ + M(BoundsCheck) \ + M(BoundType) \ + M(CheckCast) \ + M(ClassTableGet) \ + M(Deoptimize) \ + M(InstanceOf) \ + M(InvokeInterface) \ + M(InvokeUnresolved) \ + M(MonitorOperation) \ + M(NativeDebugInfo) \ + M(PackedSwitch) \ + M(Rem) \ + M(StaticFieldSet) \ + M(UnresolvedInstanceFieldGet) \ + M(UnresolvedInstanceFieldSet) \ + M(UnresolvedStaticFieldGet) \ + M(UnresolvedStaticFieldSet) \ + +class CodeGeneratorARMVIXL; + +class InvokeRuntimeCallingConventionARMVIXL + : public CallingConvention<vixl::aarch32::Register, vixl::aarch32::SRegister> { + public: + InvokeRuntimeCallingConventionARMVIXL() + : CallingConvention(kRuntimeParameterCoreRegistersVIXL, + kRuntimeParameterCoreRegistersLengthVIXL, + kRuntimeParameterFpuRegistersVIXL, + kRuntimeParameterFpuRegistersLengthVIXL, + kArmPointerSize) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConventionARMVIXL); +}; + +class InvokeDexCallingConventionARMVIXL + : public CallingConvention<vixl::aarch32::Register, vixl::aarch32::SRegister> { + public: + InvokeDexCallingConventionARMVIXL() + : CallingConvention(kParameterCoreRegistersVIXL, + kParameterCoreRegistersLengthVIXL, + kParameterFpuRegistersVIXL, + kParameterFpuRegistersLengthVIXL, + kArmPointerSize) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionARMVIXL); +}; + +class SlowPathCodeARMVIXL : public SlowPathCode { + public: + explicit SlowPathCodeARMVIXL(HInstruction* instruction) + : SlowPathCode(instruction), entry_label_(), exit_label_() {} + + vixl::aarch32::Label* GetEntryLabel() { return &entry_label_; } + vixl::aarch32::Label* GetExitLabel() { return &exit_label_; } + + void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE; + void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE; + + private: + vixl::aarch32::Label entry_label_; + vixl::aarch32::Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARMVIXL); +}; + +class ParallelMoveResolverARMVIXL : public ParallelMoveResolverWithSwap { + public: + ParallelMoveResolverARMVIXL(ArenaAllocator* allocator, CodeGeneratorARMVIXL* codegen) + : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} + + void EmitMove(size_t index) OVERRIDE; + void EmitSwap(size_t index) OVERRIDE; + void SpillScratch(int reg) OVERRIDE; + void RestoreScratch(int reg) OVERRIDE; + + ArmVIXLAssembler* GetAssembler() const; + + private: + void Exchange(vixl32::Register reg, int mem); + void Exchange(int mem1, int mem2); + + CodeGeneratorARMVIXL* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARMVIXL); +}; + +#define DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR(Name) \ + void Visit##Name(H##Name*) OVERRIDE; + +#define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR(Name) \ + void Visit##Name(H##Name* instr) OVERRIDE { \ + VisitUnimplemementedInstruction(instr); } + +class LocationsBuilderARMVIXL : public HGraphVisitor { + public: + LocationsBuilderARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen) + : HGraphVisitor(graph), codegen_(codegen) {} + + FOR_EACH_IMPLEMENTED_INSTRUCTION(DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR) + + FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR) + + private: + void VisitUnimplemementedInstruction(HInstruction* instruction) { + LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName(); + } + + void HandleInvoke(HInvoke* invoke); + void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode); + void HandleCondition(HCondition* condition); + void HandleIntegerRotate(LocationSummary* locations); + void HandleLongRotate(LocationSummary* locations); + void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + Location ArithmeticZeroOrFpuRegister(HInstruction* input); + Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode); + bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode); + bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare); + + CodeGeneratorARMVIXL* const codegen_; + InvokeDexCallingConventionVisitorARM parameter_visitor_; + + DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARMVIXL); +}; + +class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { + public: + InstructionCodeGeneratorARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen); + + FOR_EACH_IMPLEMENTED_INSTRUCTION(DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR) + + FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR) + + ArmVIXLAssembler* GetAssembler() const { return assembler_; } + vixl::aarch32::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } + + private: + void VisitUnimplemementedInstruction(HInstruction* instruction) { + LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName(); + } + + // Generate code for the given suspend check. If not null, `successor` + // is the block to branch to if the suspend check is not needed, and after + // the suspend call. + void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); + void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path, + vixl32::Register class_reg); + void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); + void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); + void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); + void HandleBitwiseOperation(HBinaryOperation* operation); + void HandleCondition(HCondition* condition); + void HandleIntegerRotate(HRor* ror); + void HandleLongRotate(HRor* ror); + void HandleShift(HBinaryOperation* operation); + + void GenerateWideAtomicStore(vixl::aarch32::Register addr, + uint32_t offset, + vixl::aarch32::Register value_lo, + vixl::aarch32::Register value_hi, + vixl::aarch32::Register temp1, + vixl::aarch32::Register temp2, + HInstruction* instruction); + void GenerateWideAtomicLoad(vixl::aarch32::Register addr, + uint32_t offset, + vixl::aarch32::Register out_lo, + vixl::aarch32::Register out_hi); + + void HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + bool value_can_be_null); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers if `requires_read_barrier` is true. + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::aarch32::Register obj, + uint32_t offset, + bool requires_read_barrier); + void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, + vixl::aarch32::Label* true_target, + vixl::aarch32::Label* false_target); + void GenerateCompareTestAndBranch(HCondition* condition, + vixl::aarch32::Label* true_target, + vixl::aarch32::Label* false_target); + void GenerateVcmp(HInstruction* instruction); + void GenerateFPJumps(HCondition* cond, + vixl::aarch32::Label* true_label, + vixl::aarch32::Label* false_label); + void GenerateLongComparesAndJumps(HCondition* cond, + vixl::aarch32::Label* true_label, + vixl::aarch32::Label* false_label); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivRemByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); + void GenerateDivRemConstantIntegral(HBinaryOperation* instruction); + + ArmVIXLAssembler* const assembler_; + CodeGeneratorARMVIXL* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorARMVIXL); +}; + +class CodeGeneratorARMVIXL : public CodeGenerator { + public: + CodeGeneratorARMVIXL(HGraph* graph, + const ArmInstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats = nullptr); + + virtual ~CodeGeneratorARMVIXL() {} + + void Initialize() OVERRIDE { + block_labels_.resize(GetGraph()->GetBlocks().size()); + } + + void GenerateFrameEntry() OVERRIDE; + void GenerateFrameExit() OVERRIDE; + + void Bind(HBasicBlock* block) OVERRIDE; + + vixl::aarch32::Label* GetLabelOf(HBasicBlock* block) { + block = FirstNonEmptyBlock(block); + return &(block_labels_[block->GetBlockId()]); + } + + void MoveConstant(Location destination, int32_t value) OVERRIDE; + void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; + void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + + ArmVIXLAssembler* GetAssembler() OVERRIDE { return &assembler_; } + + const ArmVIXLAssembler& GetAssembler() const OVERRIDE { return assembler_; } + + vixl::aarch32::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } + + size_t GetWordSize() const OVERRIDE { return kArmWordSize; } + + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return vixl::aarch32::kRegSizeInBytes; } + + uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + vixl::aarch32::Label* block_entry_label = GetLabelOf(block); + DCHECK(block_entry_label->IsBound()); + return block_entry_label->GetLocation(); + } + + HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } + + HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } + + void GenerateMemoryBarrier(MemBarrierKind kind); + void Finalize(CodeAllocator* allocator) OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; + + void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + + InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kThumb2; } + + // Helper method to move a 32-bit value between two locations. + void Move32(Location destination, Location source); + + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } + + vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; } + + // Saves the register in the stack. Returns the size taken on stack. + size_t SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, + uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(INFO) << "TODO: SaveCoreRegister"; + return 0; + } + + // Restores the register from the stack. Returns the size taken on stack. + size_t RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, + uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(INFO) << "TODO: RestoreCoreRegister"; + return 0; + } + + size_t SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, + uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(INFO) << "TODO: SaveFloatingPointRegister"; + return 0; + } + + size_t RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, + uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(INFO) << "TODO: RestoreFloatingPointRegister"; + return 0; + } + + bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { + return type == Primitive::kPrimDouble || type == Primitive::kPrimLong; + } + + void ComputeSpillMask() OVERRIDE; + + void GenerateImplicitNullCheck(HNullCheck* null_check) OVERRIDE; + void GenerateExplicitNullCheck(HNullCheck* null_check) OVERRIDE; + + ParallelMoveResolver* GetMoveResolver() OVERRIDE { + return &move_resolver_; + } + + // Generate code to invoke a runtime entry point. + void InvokeRuntime(QuickEntrypointEnum entrypoint, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path = nullptr) OVERRIDE; + + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path); + + void GenerateInvokeRuntime(int32_t entry_point_offset); + + // Emit a write barrier. + void MarkGCCard(vixl::aarch32::Register temp, + vixl::aarch32::Register card, + vixl::aarch32::Register object, + vixl::aarch32::Register value, + bool can_be_null); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Check if the desired_string_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadString::LoadKind GetSupportedLoadStringKind( + HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + + // Check if the desired_class_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadClass::LoadKind GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + + // Check if the desired_dispatch_info is supported. If it is, return it, + // otherwise return a fall-back info that should be used instead. + HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, + HInvokeStaticOrDirect* invoke) OVERRIDE; + + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; + void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + + void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; + + void GenerateNop() OVERRIDE; + + private: + vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, + vixl::aarch32::Register temp); + + // Labels for each block that will be compiled. + // We use a deque so that the `vixl::aarch32::Label` objects do not move in memory. + ArenaDeque<vixl::aarch32::Label> block_labels_; // Indexed by block id. + vixl::aarch32::Label frame_entry_label_; + + LocationsBuilderARMVIXL location_builder_; + InstructionCodeGeneratorARMVIXL instruction_visitor_; + ParallelMoveResolverARMVIXL move_resolver_; + + ArmVIXLAssembler assembler_; + const ArmInstructionSetFeatures& isa_features_; + + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL); +}; + +#undef FOR_EACH_IMPLEMENTED_INSTRUCTION +#undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION +#undef DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR +#undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR + + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_ diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 8dd82ef9cb..f4a804f70c 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -20,6 +20,7 @@ #include "arch/mips/instruction_set_features_mips.h" #include "art_method.h" #include "code_generator_utils.h" +#include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" @@ -145,8 +146,8 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type) return MipsReturnLocation(type); } -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value() class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { @@ -170,14 +171,10 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() - ? QUICK_ENTRY_POINT(pThrowStringBounds) - : QUICK_ENTRY_POINT(pThrowArrayBounds); - mips_codegen->InvokeRuntime(entry_point_offset, - instruction_, - instruction_->GetDexPc(), - this, - IsDirectEntrypoint(kQuickThrowArrayBounds)); + QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() + ? kQuickThrowStringBounds + : kQuickThrowArrayBounds; + mips_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -197,15 +194,7 @@ class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->CanThrowIntoCatchBlock()) { - // Live registers will be restored in the catch block if caught. - SaveLiveRegisters(codegen, instruction_->GetLocations()); - } - mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), - instruction_, - instruction_->GetDexPc(), - this, - IsDirectEntrypoint(kQuickThrowDivZero)); + mips_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } @@ -237,12 +226,9 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { InvokeRuntimeCallingConvention calling_convention; __ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex()); - int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) - : QUICK_ENTRY_POINT(pInitializeType); - bool direct = do_clinit_ ? IsDirectEntrypoint(kQuickInitializeStaticStorage) - : IsDirectEntrypoint(kQuickInitializeType); - - mips_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this, direct); + QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage + : kQuickInitializeType; + mips_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this); if (do_clinit_) { CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); } else { @@ -293,13 +279,10 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + HLoadString* load = instruction_->AsLoadString(); + const uint32_t string_index = load->GetStringIndex(); __ LoadConst32(calling_convention.GetRegisterAt(0), string_index); - mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), - instruction_, - instruction_->GetDexPc(), - this, - IsDirectEntrypoint(kQuickResolveString)); + mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); Primitive::Type type = instruction_->GetType(); mips_codegen->MoveLocation(locations->Out(), @@ -307,6 +290,19 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { type); RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the + // .bss entry address in the fast path, so that we can avoid another calculation here. + bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + DCHECK_NE(out, AT); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base); + __ StoreToOffset(kStoreWord, out, TMP, 0); + __ B(GetExitLabel()); } @@ -327,11 +323,10 @@ class NullCheckSlowPathMIPS : public SlowPathCodeMIPS { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), + mips_codegen->InvokeRuntime(kQuickThrowNullPointer, instruction_, instruction_->GetDexPc(), - this, - IsDirectEntrypoint(kQuickThrowNullPointer)); + this); CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } @@ -351,14 +346,8 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), - instruction_, - instruction_->GetDexPc(), - this, - IsDirectEntrypoint(kQuickTestSuspend)); + mips_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ B(GetReturnLabel()); } else { @@ -409,11 +398,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { - mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction_, - dex_pc, - this, - IsDirectEntrypoint(kQuickInstanceofNonTrivial)); + mips_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); CheckEntrypointTypes< kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); @@ -421,11 +406,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); } else { DCHECK(instruction_->IsCheckCast()); - mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction_, - dex_pc, - this, - IsDirectEntrypoint(kQuickCheckCast)); + mips_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } @@ -447,12 +428,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - instruction_, - instruction_->GetDexPc(), - this, - IsDirectEntrypoint(kQuickDeoptimize)); + mips_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } @@ -503,8 +479,8 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, } #undef __ -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value() void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) { @@ -699,16 +675,17 @@ void CodeGeneratorMIPS::ComputeSpillMask() { if ((fpu_spill_mask_ != 0) && (POPCOUNT(core_spill_mask_) % 2 != 0)) { core_spill_mask_ |= (1 << ZERO); } +} + +bool CodeGeneratorMIPS::HasAllocatedCalleeSaveRegisters() const { // If RA is clobbered by PC-relative operations on R2 and it's the only spilled register - // (this can happen in leaf methods), artificially spill the ZERO register in order to - // force explicit saving and restoring of RA. RA isn't saved/restored when it's the only - // spilled register. + // (this can happen in leaf methods), force CodeGenerator::InitializeCodeGeneration() + // into the path that creates a stack frame so that RA can be explicitly saved and restored. + // RA can't otherwise be saved/restored when it's the only spilled register. // TODO: Can this be improved? It causes creation of a stack frame (while RA might be // saved in an unused temporary register) and saving of RA and the current method pointer // in the frame. - if (clobbered_ra_ && core_spill_mask_ == (1u << RA) && fpu_spill_mask_ == 0) { - core_spill_mask_ |= (1 << ZERO); - } + return CodeGenerator::HasAllocatedCalleeSaveRegisters() || clobbered_ra_; } static dwarf::Reg DWARFReg(Register reg) { @@ -731,6 +708,9 @@ void CodeGeneratorMIPS::GenerateFrameEntry() { } if (HasEmptyFrame()) { + CHECK_EQ(fpu_spill_mask_, 0u); + CHECK_EQ(core_spill_mask_, 1u << RA); + CHECK(!clobbered_ra_); return; } @@ -763,8 +743,12 @@ void CodeGeneratorMIPS::GenerateFrameEntry() { // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs); } - // Store the current method pointer. - __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, kCurrentMethodStackOffset); + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, kCurrentMethodStackOffset); + } } void CodeGeneratorMIPS::GenerateFrameExit() { @@ -794,12 +778,24 @@ void CodeGeneratorMIPS::GenerateFrameExit() { // TODO: __ cfi().Restore(DWARFReg(reg)); } - __ DecreaseFrameSize(GetFrameSize()); + size_t frame_size = GetFrameSize(); + // Adjust the stack pointer in the delay slot if doing so doesn't break CFI. + bool exchange = IsInt<16>(static_cast<int32_t>(frame_size)); + bool reordering = __ SetReorder(false); + if (exchange) { + __ Jr(RA); + __ DecreaseFrameSize(frame_size); // Single instruction in delay slot. + } else { + __ DecreaseFrameSize(frame_size); + __ Jr(RA); + __ Nop(); // In delay slot. + } + __ SetReorder(reordering); + } else { + __ Jr(RA); + __ NopIfNoReordering(); } - __ Jr(RA); - __ Nop(); - __ cfi().RestoreState(); __ cfi().DefCFAOffset(GetFrameSize()); } @@ -922,7 +918,7 @@ void CodeGeneratorMIPS::MoveConstant(Location destination, HConstant* c) { } else { DCHECK(destination.IsStackSlot()) << "Cannot move " << c->DebugName() << " to " << destination; - __ StoreConst32ToOffset(value, SP, destination.GetStackIndex(), TMP); + __ StoreConstToOffset(kStoreWord, value, SP, destination.GetStackIndex(), TMP); } } else if (c->IsLongConstant()) { // Move 64 bit constant. @@ -934,7 +930,7 @@ void CodeGeneratorMIPS::MoveConstant(Location destination, HConstant* c) { } else { DCHECK(destination.IsDoubleStackSlot()) << "Cannot move " << c->DebugName() << " to " << destination; - __ StoreConst64ToOffset(value, SP, destination.GetStackIndex(), TMP); + __ StoreConstToOffset(kStoreDoubleword, value, SP, destination.GetStackIndex(), TMP); } } else if (c->IsFloatConstant()) { // Move 32 bit float constant. @@ -944,7 +940,7 @@ void CodeGeneratorMIPS::MoveConstant(Location destination, HConstant* c) { } else { DCHECK(destination.IsStackSlot()) << "Cannot move " << c->DebugName() << " to " << destination; - __ StoreConst32ToOffset(value, SP, destination.GetStackIndex(), TMP); + __ StoreConstToOffset(kStoreWord, value, SP, destination.GetStackIndex(), TMP); } } else { // Move 64 bit double constant. @@ -956,7 +952,7 @@ void CodeGeneratorMIPS::MoveConstant(Location destination, HConstant* c) { } else { DCHECK(destination.IsDoubleStackSlot()) << "Cannot move " << c->DebugName() << " to " << destination; - __ StoreConst64ToOffset(value, SP, destination.GetStackIndex(), TMP); + __ StoreConstToOffset(kStoreDoubleword, value, SP, destination.GetStackIndex(), TMP); } } } @@ -978,6 +974,24 @@ void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* lo } } +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + const DexFile& dex_file = info.target_dex_file; + size_t offset_or_index = info.offset_or_index; + DCHECK(info.high_label.IsBound()); + uint32_t high_offset = __ GetLabelLocation(&info.high_label); + // On R2 we use HMipsComputeBaseMethodAddress and patch relative to + // the assembler's base label used for PC-relative addressing. + uint32_t pc_rel_offset = info.pc_rel_label.IsBound() + ? __ GetLabelLocation(&info.pc_rel_label) + : __ GetPcRelBaseLabelLocation(); + linker_patches->push_back(Factory(high_offset, &dex_file, pc_rel_offset, offset_or_index)); + } +} + void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -1008,48 +1022,17 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch target_method.dex_file, target_method.dex_method_index)); } - for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t base_element_offset = info.offset_or_index; - DCHECK(info.high_label.IsBound()); - uint32_t high_offset = __ GetLabelLocation(&info.high_label); - DCHECK(info.pc_rel_label.IsBound()); - uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label); - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(high_offset, - &dex_file, - pc_rel_offset, - base_element_offset)); - } - for (const PcRelativePatchInfo& info : pc_relative_string_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t string_index = info.offset_or_index; - DCHECK(info.high_label.IsBound()); - uint32_t high_offset = __ GetLabelLocation(&info.high_label); - // On R2 we use HMipsComputeBaseMethodAddress and patch relative to - // the assembler's base label used for PC-relative literals. - uint32_t pc_rel_offset = info.pc_rel_label.IsBound() - ? __ GetLabelLocation(&info.pc_rel_label) - : __ GetPcRelBaseLabelLocation(); - linker_patches->push_back(LinkerPatch::RelativeStringPatch(high_offset, - &dex_file, - pc_rel_offset, - string_index)); - } - for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t type_index = info.offset_or_index; - DCHECK(info.high_label.IsBound()); - uint32_t high_offset = __ GetLabelLocation(&info.high_label); - // On R2 we use HMipsComputeBaseMethodAddress and patch relative to - // the assembler's base label used for PC-relative literals. - uint32_t pc_rel_offset = info.pc_rel_label.IsBound() - ? __ GetLabelLocation(&info.pc_rel_label) - : __ GetPcRelBaseLabelLocation(); - linker_patches->push_back(LinkerPatch::RelativeTypePatch(high_offset, - &dex_file, - pc_rel_offset, - type_index)); + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); for (const auto& entry : boot_image_string_patches_) { const StringReference& target_string = entry.first; Literal* literal = entry.second; @@ -1139,6 +1122,36 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map); } +void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholder( + PcRelativePatchInfo* info, Register out, Register base) { + bool reordering = __ SetReorder(false); + if (GetInstructionSetFeatures().IsR6()) { + DCHECK_EQ(base, ZERO); + __ Bind(&info->high_label); + __ Bind(&info->pc_rel_label); + // Add a 32-bit offset to PC. + __ Auipc(out, /* placeholder */ 0x1234); + __ Addiu(out, out, /* placeholder */ 0x5678); + } else { + // If base is ZERO, emit NAL to obtain the actual base. + if (base == ZERO) { + // Generate a dummy PC-relative call to obtain PC. + __ Nal(); + } + __ Bind(&info->high_label); + __ Lui(out, /* placeholder */ 0x1234); + // If we emitted the NAL, bind the pc_rel_label, otherwise base is a register holding + // the HMipsComputeBaseMethodAddress which has its own label stored in MipsAssembler. + if (base == ZERO) { + __ Bind(&info->pc_rel_label); + } + __ Ori(out, out, /* placeholder */ 0x5678); + // Add a 32-bit offset to PC. + __ Addu(out, out, (base == ZERO) ? RA : base); + } + __ SetReorder(reordering); +} + void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { MipsLabel done; Register card = AT; @@ -1155,9 +1168,6 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { } void CodeGeneratorMIPS::SetupBlockedRegisters() const { - // Don't allocate the dalvik style register pair passing. - blocked_register_pairs_[A1_A2] = true; - // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated. blocked_core_registers_[ZERO] = true; blocked_core_registers_[K0] = true; @@ -1192,19 +1202,6 @@ void CodeGeneratorMIPS::SetupBlockedRegisters() const { blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; } } - - UpdateBlockedPairRegisters(); -} - -void CodeGeneratorMIPS::UpdateBlockedPairRegisters() const { - for (int i = 0; i < kNumberOfRegisterPairs; i++) { - MipsManagedRegister current = - MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); - if (blocked_core_registers_[current.AsRegisterPairLow()] - || blocked_core_registers_[current.AsRegisterPairHigh()]) { - blocked_register_pairs_[i] = true; - } - } } size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { @@ -1235,27 +1232,17 @@ void CodeGeneratorMIPS::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << FRegister(reg); } -void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path) { - InvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(), - instruction, - dex_pc, - slow_path, - IsDirectEntrypoint(entrypoint)); -} - constexpr size_t kMipsDirectEntrypointRuntimeOffset = 16; -void CodeGeneratorMIPS::InvokeRuntime(int32_t entry_point_offset, +void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path, - bool is_direct_entrypoint) { - __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); + SlowPathCode* slow_path) { + ValidateInvokeRuntime(entrypoint, instruction, slow_path); + bool reordering = __ SetReorder(false); + __ LoadFromOffset(kLoadWord, T9, TR, GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value()); __ Jalr(T9); - if (is_direct_entrypoint) { + if (IsDirectEntrypoint(entrypoint)) { // Reserve argument space on stack (for $a0-$a3) for // entrypoints that directly reference native implementations. // Called function may use this space to store $a0-$a3 regs. @@ -1264,7 +1251,10 @@ void CodeGeneratorMIPS::InvokeRuntime(int32_t entry_point_offset, } else { __ Nop(); // In delay slot. } - RecordPcInfo(instruction, dex_pc, slow_path); + __ SetReorder(reordering); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } } void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, @@ -1835,11 +1825,19 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { } } +auto InstructionCodeGeneratorMIPS::GetImplicitNullChecker(HInstruction* instruction) { + auto null_checker = [this, instruction]() { + this->codegen_->MaybeRecordImplicitNullCheck(instruction); + }; + return null_checker; +} + void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + auto null_checker = GetImplicitNullChecker(instruction); Primitive::Type type = instruction->GetType(); switch (type) { @@ -1848,10 +1846,10 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker); } else { __ Addu(TMP, obj, index.AsRegister<Register>()); - __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset); + __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker); } break; } @@ -1861,10 +1859,10 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadSignedByte, out, obj, offset); + __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker); } else { __ Addu(TMP, obj, index.AsRegister<Register>()); - __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset); + __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker); } break; } @@ -1874,11 +1872,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_2); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset); + __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); } break; } @@ -1888,11 +1886,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_2); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset); + __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker); } break; } @@ -1904,11 +1902,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadFromOffset(kLoadWord, out, obj, offset); + __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadWord, out, TMP, data_offset); + __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker); } break; } @@ -1918,11 +1916,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadFromOffset(kLoadDoubleword, out, obj, offset); + __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset); + __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker); } break; } @@ -1932,11 +1930,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadSFromOffset(out, obj, offset); + __ LoadSFromOffset(out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); __ Addu(TMP, obj, TMP); - __ LoadSFromOffset(out, TMP, data_offset); + __ LoadSFromOffset(out, TMP, data_offset, null_checker); } break; } @@ -1946,11 +1944,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadDFromOffset(out, obj, offset); + __ LoadDFromOffset(out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); __ Addu(TMP, obj, TMP); - __ LoadDFromOffset(out, TMP, data_offset); + __ LoadDFromOffset(out, TMP, data_offset, null_checker); } break; } @@ -1959,7 +1957,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) { @@ -1977,6 +1974,25 @@ void InstructionCodeGeneratorMIPS::VisitArrayLength(HArrayLength* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); } +Location LocationsBuilderMIPS::RegisterOrZeroConstant(HInstruction* instruction) { + return (instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern()) + ? Location::ConstantLocation(instruction->AsConstant()) + : Location::RequiresRegister(); +} + +Location LocationsBuilderMIPS::FpuRegisterOrConstantForStore(HInstruction* instruction) { + // We can store 0.0 directly (from the ZERO register) without loading it into an FPU register. + // We can store a non-zero float or double constant without first loading it into the FPU, + // but we should only prefer this if the constant has a single use. + if (instruction->IsConstant() && + (instruction->AsConstant()->IsZeroBitPattern() || + instruction->GetUses().HasExactlyOneElement())) { + return Location::ConstantLocation(instruction->AsConstant()); + // Otherwise fall through and require an FPU register for the constant. + } + return Location::RequiresFpuRegister(); +} + void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) { bool needs_runtime_call = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( @@ -1991,9 +2007,9 @@ void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { - locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); } else { - locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); } } } @@ -2002,23 +2018,29 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); + Location value_location = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); bool needs_runtime_call = locations->WillCall(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + auto null_checker = GetImplicitNullChecker(instruction); + Register base_reg = index.IsConstant() ? obj : TMP; switch (value_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ StoreToOffset(kStoreByte, value, obj, offset); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1; } else { - __ Addu(TMP, obj, index.AsRegister<Register>()); - __ StoreToOffset(kStoreByte, value, TMP, data_offset); + __ Addu(base_reg, obj, index.AsRegister<Register>()); + } + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreByte, value, base_reg, data_offset, TMP, null_checker); + } else { + Register value = value_location.AsRegister<Register>(); + __ StoreToOffset(kStoreByte, value, base_reg, data_offset, null_checker); } break; } @@ -2026,15 +2048,18 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimShort: case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ StoreToOffset(kStoreHalfword, value, obj, offset); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2; } else { - __ Sll(TMP, index.AsRegister<Register>(), TIMES_2); - __ Addu(TMP, obj, TMP); - __ StoreToOffset(kStoreHalfword, value, TMP, data_offset); + __ Sll(base_reg, index.AsRegister<Register>(), TIMES_2); + __ Addu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreHalfword, value, base_reg, data_offset, TMP, null_checker); + } else { + Register value = value_location.AsRegister<Register>(); + __ StoreToOffset(kStoreHalfword, value, base_reg, data_offset, null_checker); } break; } @@ -2043,29 +2068,27 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimNot: { if (!needs_runtime_call) { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreToOffset(kStoreWord, value, obj, offset); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - DCHECK(index.IsRegister()) << index; - __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); - __ Addu(TMP, obj, TMP); - __ StoreToOffset(kStoreWord, value, TMP, data_offset); + __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4); + __ Addu(base_reg, obj, base_reg); } - codegen_->MaybeRecordImplicitNullCheck(instruction); - if (needs_write_barrier) { - DCHECK_EQ(value_type, Primitive::kPrimNot); - codegen_->MarkGCCard(obj, value); + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + DCHECK(!needs_write_barrier); + } else { + Register value = value_location.AsRegister<Register>(); + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + if (needs_write_barrier) { + DCHECK_EQ(value_type, Primitive::kPrimNot); + codegen_->MarkGCCard(obj, value); + } } } else { DCHECK_EQ(value_type, Primitive::kPrimNot); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction, - instruction->GetDexPc(), - nullptr, - IsDirectEntrypoint(kQuickAputObject)); + codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } break; @@ -2073,47 +2096,54 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegisterPairLow<Register>(); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreToOffset(kStoreDoubleword, value, obj, offset); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; } else { - __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); - __ Addu(TMP, obj, TMP); - __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset); + __ Sll(base_reg, index.AsRegister<Register>(), TIMES_8); + __ Addu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker); + } else { + Register value = value_location.AsRegisterPairLow<Register>(); + __ StoreToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker); } break; } case Primitive::kPrimFloat: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - FRegister value = locations->InAt(2).AsFpuRegister<FRegister>(); - DCHECK(locations->InAt(2).IsFpuRegister()); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreSToOffset(value, obj, offset); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); - __ Addu(TMP, obj, TMP); - __ StoreSToOffset(value, TMP, data_offset); + __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4); + __ Addu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + } else { + FRegister value = value_location.AsFpuRegister<FRegister>(); + __ StoreSToOffset(value, base_reg, data_offset, null_checker); } break; } case Primitive::kPrimDouble: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - FRegister value = locations->InAt(2).AsFpuRegister<FRegister>(); - DCHECK(locations->InAt(2).IsFpuRegister()); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreDToOffset(value, obj, offset); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; } else { - __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); - __ Addu(TMP, obj, TMP); - __ StoreDToOffset(value, TMP, data_offset); + __ Sll(base_reg, index.AsRegister<Register>(), TIMES_8); + __ Addu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker); + } else { + FRegister value = value_location.AsFpuRegister<FRegister>(); + __ StoreDToOffset(value, base_reg, data_offset, null_checker); } break; } @@ -2122,23 +2152,16 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - - // Ints and objects are handled in the switch. - if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } void LocationsBuilderMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -2216,6 +2239,11 @@ void LocationsBuilderMIPS::VisitCompare(HCompare* compare) { case Primitive::kPrimShort: case Primitive::kPrimChar: case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -2404,13 +2432,8 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: - // TODO: don't use branches. - GenerateFpCompareAndBranch(instruction->GetCondition(), - instruction->IsGtBias(), - type, - locations, - &true_label); - break; + GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations); + return; } // Convert the branches into the result. @@ -2636,11 +2659,7 @@ void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) { GenerateDivRemIntegral(instruction); break; case Primitive::kPrimLong: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), - instruction, - instruction->GetDexPc(), - nullptr, - IsDirectEntrypoint(kQuickLdiv)); + codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); break; } @@ -2662,14 +2681,8 @@ void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) { } void LocationsBuilderMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void InstructionCodeGeneratorMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) { @@ -2799,19 +2812,36 @@ void InstructionCodeGeneratorMIPS::GenerateIntCompare(IfCondition cond, switch (cond) { case kCondEQ: case kCondNE: - if (use_imm && IsUint<16>(rhs_imm)) { - __ Xori(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); + if (use_imm && IsInt<16>(-rhs_imm)) { + if (rhs_imm == 0) { + if (cond == kCondEQ) { + __ Sltiu(dst, lhs, 1); + } else { + __ Sltu(dst, ZERO, lhs); + } + } else { + __ Addiu(dst, lhs, -rhs_imm); + if (cond == kCondEQ) { + __ Sltiu(dst, dst, 1); + } else { + __ Sltu(dst, ZERO, dst); + } } - __ Xor(dst, lhs, rhs_reg); - } - if (cond == kCondEQ) { - __ Sltiu(dst, dst, 1); } else { - __ Sltu(dst, ZERO, dst); + if (use_imm && IsUint<16>(rhs_imm)) { + __ Xori(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + __ Xor(dst, lhs, rhs_reg); + } + if (cond == kCondEQ) { + __ Sltiu(dst, dst, 1); + } else { + __ Sltu(dst, ZERO, dst); + } } break; @@ -2911,13 +2941,111 @@ void InstructionCodeGeneratorMIPS::GenerateIntCompare(IfCondition cond, } } +bool InstructionCodeGeneratorMIPS::MaterializeIntCompare(IfCondition cond, + LocationSummary* input_locations, + Register dst) { + Register lhs = input_locations->InAt(0).AsRegister<Register>(); + Location rhs_location = input_locations->InAt(1); + Register rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } else { + rhs_reg = rhs_location.AsRegister<Register>(); + } + + switch (cond) { + case kCondEQ: + case kCondNE: + if (use_imm && IsInt<16>(-rhs_imm)) { + __ Addiu(dst, lhs, -rhs_imm); + } else if (use_imm && IsUint<16>(rhs_imm)) { + __ Xori(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + __ Xor(dst, lhs, rhs_reg); + } + return (cond == kCondEQ); + + case kCondLT: + case kCondGE: + if (use_imm && IsInt<16>(rhs_imm)) { + __ Slti(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + __ Slt(dst, lhs, rhs_reg); + } + return (cond == kCondGE); + + case kCondLE: + case kCondGT: + if (use_imm && IsInt<16>(rhs_imm + 1)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + __ Slti(dst, lhs, rhs_imm + 1); + return (cond == kCondGT); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + __ Slt(dst, rhs_reg, lhs); + return (cond == kCondLE); + } + + case kCondB: + case kCondAE: + if (use_imm && IsInt<16>(rhs_imm)) { + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0xffff8000, 0xffffffff]. + __ Sltiu(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + __ Sltu(dst, lhs, rhs_reg); + } + return (cond == kCondAE); + + case kCondBE: + case kCondA: + if (use_imm && (rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + // Note that this only works if rhs + 1 does not overflow + // to 0, hence the check above. + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0xffff8000, 0xffffffff]. + __ Sltiu(dst, lhs, rhs_imm + 1); + return (cond == kCondA); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + __ Sltu(dst, rhs_reg, lhs); + return (cond == kCondBE); + } + } +} + void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label) { Register lhs = locations->InAt(0).AsRegister<Register>(); Location rhs_location = locations->InAt(1); Register rhs_reg = ZERO; - int32_t rhs_imm = 0; + int64_t rhs_imm = 0; bool use_imm = rhs_location.IsConstant(); if (use_imm) { rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); @@ -2954,42 +3082,136 @@ void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond, break; } } else { - if (use_imm) { - // TODO: more efficient comparison with 16-bit constants without loading them into TMP. - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - switch (cond) { - case kCondEQ: - __ Beq(lhs, rhs_reg, label); - break; - case kCondNE: - __ Bne(lhs, rhs_reg, label); - break; - case kCondLT: - __ Blt(lhs, rhs_reg, label); - break; - case kCondGE: - __ Bge(lhs, rhs_reg, label); - break; - case kCondLE: - __ Bge(rhs_reg, lhs, label); - break; - case kCondGT: - __ Blt(rhs_reg, lhs, label); - break; - case kCondB: - __ Bltu(lhs, rhs_reg, label); - break; - case kCondAE: - __ Bgeu(lhs, rhs_reg, label); - break; - case kCondBE: - __ Bgeu(rhs_reg, lhs, label); - break; - case kCondA: - __ Bltu(rhs_reg, lhs, label); - break; + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + if (isR6 || !use_imm) { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + switch (cond) { + case kCondEQ: + __ Beq(lhs, rhs_reg, label); + break; + case kCondNE: + __ Bne(lhs, rhs_reg, label); + break; + case kCondLT: + __ Blt(lhs, rhs_reg, label); + break; + case kCondGE: + __ Bge(lhs, rhs_reg, label); + break; + case kCondLE: + __ Bge(rhs_reg, lhs, label); + break; + case kCondGT: + __ Blt(rhs_reg, lhs, label); + break; + case kCondB: + __ Bltu(lhs, rhs_reg, label); + break; + case kCondAE: + __ Bgeu(lhs, rhs_reg, label); + break; + case kCondBE: + __ Bgeu(rhs_reg, lhs, label); + break; + case kCondA: + __ Bltu(rhs_reg, lhs, label); + break; + } + } else { + // Special cases for more efficient comparison with constants on R2. + switch (cond) { + case kCondEQ: + __ LoadConst32(TMP, rhs_imm); + __ Beq(lhs, TMP, label); + break; + case kCondNE: + __ LoadConst32(TMP, rhs_imm); + __ Bne(lhs, TMP, label); + break; + case kCondLT: + if (IsInt<16>(rhs_imm)) { + __ Slti(TMP, lhs, rhs_imm); + __ Bnez(TMP, label); + } else { + __ LoadConst32(TMP, rhs_imm); + __ Blt(lhs, TMP, label); + } + break; + case kCondGE: + if (IsInt<16>(rhs_imm)) { + __ Slti(TMP, lhs, rhs_imm); + __ Beqz(TMP, label); + } else { + __ LoadConst32(TMP, rhs_imm); + __ Bge(lhs, TMP, label); + } + break; + case kCondLE: + if (IsInt<16>(rhs_imm + 1)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + __ Slti(TMP, lhs, rhs_imm + 1); + __ Bnez(TMP, label); + } else { + __ LoadConst32(TMP, rhs_imm); + __ Bge(TMP, lhs, label); + } + break; + case kCondGT: + if (IsInt<16>(rhs_imm + 1)) { + // Simulate lhs > rhs via !(lhs < rhs + 1). + __ Slti(TMP, lhs, rhs_imm + 1); + __ Beqz(TMP, label); + } else { + __ LoadConst32(TMP, rhs_imm); + __ Blt(TMP, lhs, label); + } + break; + case kCondB: + if (IsInt<16>(rhs_imm)) { + __ Sltiu(TMP, lhs, rhs_imm); + __ Bnez(TMP, label); + } else { + __ LoadConst32(TMP, rhs_imm); + __ Bltu(lhs, TMP, label); + } + break; + case kCondAE: + if (IsInt<16>(rhs_imm)) { + __ Sltiu(TMP, lhs, rhs_imm); + __ Beqz(TMP, label); + } else { + __ LoadConst32(TMP, rhs_imm); + __ Bgeu(lhs, TMP, label); + } + break; + case kCondBE: + if ((rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + // Note that this only works if rhs + 1 does not overflow + // to 0, hence the check above. + __ Sltiu(TMP, lhs, rhs_imm + 1); + __ Bnez(TMP, label); + } else { + __ LoadConst32(TMP, rhs_imm); + __ Bgeu(TMP, lhs, label); + } + break; + case kCondA: + if ((rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) { + // Simulate lhs > rhs via !(lhs < rhs + 1). + // Note that this only works if rhs + 1 does not overflow + // to 0, hence the check above. + __ Sltiu(TMP, lhs, rhs_imm + 1); + __ Beqz(TMP, label); + } else { + __ LoadConst32(TMP, rhs_imm); + __ Bltu(TMP, lhs, label); + } + break; + } } } } @@ -3207,6 +3429,414 @@ void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond } } +void InstructionCodeGeneratorMIPS::GenerateFpCompare(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations) { + Register dst = locations->Out().AsRegister<Register>(); + FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + if (type == Primitive::kPrimFloat) { + if (isR6) { + switch (cond) { + case kCondEQ: + __ CmpEqS(FTMP, lhs, rhs); + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondNE: + __ CmpEqS(FTMP, lhs, rhs); + __ Mfc1(dst, FTMP); + __ Addiu(dst, dst, 1); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + } else { + __ CmpUltS(FTMP, lhs, rhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeS(FTMP, lhs, rhs); + } else { + __ CmpUleS(FTMP, lhs, rhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltS(FTMP, rhs, lhs); + } else { + __ CmpLtS(FTMP, rhs, lhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleS(FTMP, rhs, lhs); + } else { + __ CmpLeS(FTMP, rhs, lhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition " << cond; + UNREACHABLE(); + } + } else { + switch (cond) { + case kCondEQ: + __ CeqS(0, lhs, rhs); + __ LoadConst32(dst, 1); + __ Movf(dst, ZERO, 0); + break; + case kCondNE: + __ CeqS(0, lhs, rhs); + __ LoadConst32(dst, 1); + __ Movt(dst, ZERO, 0); + break; + case kCondLT: + if (gt_bias) { + __ ColtS(0, lhs, rhs); + } else { + __ CultS(0, lhs, rhs); + } + __ LoadConst32(dst, 1); + __ Movf(dst, ZERO, 0); + break; + case kCondLE: + if (gt_bias) { + __ ColeS(0, lhs, rhs); + } else { + __ CuleS(0, lhs, rhs); + } + __ LoadConst32(dst, 1); + __ Movf(dst, ZERO, 0); + break; + case kCondGT: + if (gt_bias) { + __ CultS(0, rhs, lhs); + } else { + __ ColtS(0, rhs, lhs); + } + __ LoadConst32(dst, 1); + __ Movf(dst, ZERO, 0); + break; + case kCondGE: + if (gt_bias) { + __ CuleS(0, rhs, lhs); + } else { + __ ColeS(0, rhs, lhs); + } + __ LoadConst32(dst, 1); + __ Movf(dst, ZERO, 0); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition " << cond; + UNREACHABLE(); + } + } + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + if (isR6) { + switch (cond) { + case kCondEQ: + __ CmpEqD(FTMP, lhs, rhs); + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondNE: + __ CmpEqD(FTMP, lhs, rhs); + __ Mfc1(dst, FTMP); + __ Addiu(dst, dst, 1); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + } else { + __ CmpUltD(FTMP, lhs, rhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeD(FTMP, lhs, rhs); + } else { + __ CmpUleD(FTMP, lhs, rhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltD(FTMP, rhs, lhs); + } else { + __ CmpLtD(FTMP, rhs, lhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleD(FTMP, rhs, lhs); + } else { + __ CmpLeD(FTMP, rhs, lhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition " << cond; + UNREACHABLE(); + } + } else { + switch (cond) { + case kCondEQ: + __ CeqD(0, lhs, rhs); + __ LoadConst32(dst, 1); + __ Movf(dst, ZERO, 0); + break; + case kCondNE: + __ CeqD(0, lhs, rhs); + __ LoadConst32(dst, 1); + __ Movt(dst, ZERO, 0); + break; + case kCondLT: + if (gt_bias) { + __ ColtD(0, lhs, rhs); + } else { + __ CultD(0, lhs, rhs); + } + __ LoadConst32(dst, 1); + __ Movf(dst, ZERO, 0); + break; + case kCondLE: + if (gt_bias) { + __ ColeD(0, lhs, rhs); + } else { + __ CuleD(0, lhs, rhs); + } + __ LoadConst32(dst, 1); + __ Movf(dst, ZERO, 0); + break; + case kCondGT: + if (gt_bias) { + __ CultD(0, rhs, lhs); + } else { + __ ColtD(0, rhs, lhs); + } + __ LoadConst32(dst, 1); + __ Movf(dst, ZERO, 0); + break; + case kCondGE: + if (gt_bias) { + __ CuleD(0, rhs, lhs); + } else { + __ ColeD(0, rhs, lhs); + } + __ LoadConst32(dst, 1); + __ Movf(dst, ZERO, 0); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition " << cond; + UNREACHABLE(); + } + } + } +} + +bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR2(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* input_locations, + int cc) { + FRegister lhs = input_locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rhs = input_locations->InAt(1).AsFpuRegister<FRegister>(); + CHECK(!codegen_->GetInstructionSetFeatures().IsR6()); + if (type == Primitive::kPrimFloat) { + switch (cond) { + case kCondEQ: + __ CeqS(cc, lhs, rhs); + return false; + case kCondNE: + __ CeqS(cc, lhs, rhs); + return true; + case kCondLT: + if (gt_bias) { + __ ColtS(cc, lhs, rhs); + } else { + __ CultS(cc, lhs, rhs); + } + return false; + case kCondLE: + if (gt_bias) { + __ ColeS(cc, lhs, rhs); + } else { + __ CuleS(cc, lhs, rhs); + } + return false; + case kCondGT: + if (gt_bias) { + __ CultS(cc, rhs, lhs); + } else { + __ ColtS(cc, rhs, lhs); + } + return false; + case kCondGE: + if (gt_bias) { + __ CuleS(cc, rhs, lhs); + } else { + __ ColeS(cc, rhs, lhs); + } + return false; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); + } + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + switch (cond) { + case kCondEQ: + __ CeqD(cc, lhs, rhs); + return false; + case kCondNE: + __ CeqD(cc, lhs, rhs); + return true; + case kCondLT: + if (gt_bias) { + __ ColtD(cc, lhs, rhs); + } else { + __ CultD(cc, lhs, rhs); + } + return false; + case kCondLE: + if (gt_bias) { + __ ColeD(cc, lhs, rhs); + } else { + __ CuleD(cc, lhs, rhs); + } + return false; + case kCondGT: + if (gt_bias) { + __ CultD(cc, rhs, lhs); + } else { + __ ColtD(cc, rhs, lhs); + } + return false; + case kCondGE: + if (gt_bias) { + __ CuleD(cc, rhs, lhs); + } else { + __ ColeD(cc, rhs, lhs); + } + return false; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); + } + } +} + +bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR6(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* input_locations, + FRegister dst) { + FRegister lhs = input_locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rhs = input_locations->InAt(1).AsFpuRegister<FRegister>(); + CHECK(codegen_->GetInstructionSetFeatures().IsR6()); + if (type == Primitive::kPrimFloat) { + switch (cond) { + case kCondEQ: + __ CmpEqS(dst, lhs, rhs); + return false; + case kCondNE: + __ CmpEqS(dst, lhs, rhs); + return true; + case kCondLT: + if (gt_bias) { + __ CmpLtS(dst, lhs, rhs); + } else { + __ CmpUltS(dst, lhs, rhs); + } + return false; + case kCondLE: + if (gt_bias) { + __ CmpLeS(dst, lhs, rhs); + } else { + __ CmpUleS(dst, lhs, rhs); + } + return false; + case kCondGT: + if (gt_bias) { + __ CmpUltS(dst, rhs, lhs); + } else { + __ CmpLtS(dst, rhs, lhs); + } + return false; + case kCondGE: + if (gt_bias) { + __ CmpUleS(dst, rhs, lhs); + } else { + __ CmpLeS(dst, rhs, lhs); + } + return false; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); + } + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + switch (cond) { + case kCondEQ: + __ CmpEqD(dst, lhs, rhs); + return false; + case kCondNE: + __ CmpEqD(dst, lhs, rhs); + return true; + case kCondLT: + if (gt_bias) { + __ CmpLtD(dst, lhs, rhs); + } else { + __ CmpUltD(dst, lhs, rhs); + } + return false; + case kCondLE: + if (gt_bias) { + __ CmpLeD(dst, lhs, rhs); + } else { + __ CmpUleD(dst, lhs, rhs); + } + return false; + case kCondGT: + if (gt_bias) { + __ CmpUltD(dst, rhs, lhs); + } else { + __ CmpLtD(dst, rhs, lhs); + } + return false; + case kCondGE: + if (gt_bias) { + __ CmpUleD(dst, rhs, lhs); + } else { + __ CmpLeD(dst, rhs, lhs); + } + return false; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); + } + } +} + void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond, bool gt_bias, Primitive::Type type, @@ -3260,6 +3890,7 @@ void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond, break; default: LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); } } else { switch (cond) { @@ -3305,6 +3936,7 @@ void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond, break; default: LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); } } } else { @@ -3353,6 +3985,7 @@ void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond, break; default: LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); } } else { switch (cond) { @@ -3398,6 +4031,7 @@ void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond, break; default: LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); } } } @@ -3499,6 +4133,7 @@ void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) { void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -3513,30 +4148,562 @@ void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } -void LocationsBuilderMIPS::VisitSelect(HSelect* select) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); - if (Primitive::IsFloatingPointType(select->GetType())) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); +// This function returns true if a conditional move can be generated for HSelect. +// Otherwise it returns false and HSelect must be implemented in terms of conditonal +// branches and regular moves. +// +// If `locations_to_set` isn't nullptr, its inputs and outputs are set for HSelect. +// +// While determining feasibility of a conditional move and setting inputs/outputs +// are two distinct tasks, this function does both because they share quite a bit +// of common logic. +static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* locations_to_set) { + bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition()); + HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HCondition* condition = cond->AsCondition(); + + Primitive::Type cond_type = materialized ? Primitive::kPrimInt : condition->InputAt(0)->GetType(); + Primitive::Type dst_type = select->GetType(); + + HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); + HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); + bool is_true_value_zero_constant = + (cst_true_value != nullptr && cst_true_value->IsZeroBitPattern()); + bool is_false_value_zero_constant = + (cst_false_value != nullptr && cst_false_value->IsZeroBitPattern()); + + bool can_move_conditionally = false; + bool use_const_for_false_in = false; + bool use_const_for_true_in = false; + + if (!cond->IsConstant()) { + switch (cond_type) { + default: + switch (dst_type) { + default: + // Moving int on int condition. + if (is_r6) { + if (is_true_value_zero_constant) { + // seleqz out_reg, false_reg, cond_reg + can_move_conditionally = true; + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // selnez out_reg, true_reg, cond_reg + can_move_conditionally = true; + use_const_for_false_in = true; + } else if (materialized) { + // Not materializing unmaterialized int conditions + // to keep the instruction count low. + // selnez AT, true_reg, cond_reg + // seleqz TMP, false_reg, cond_reg + // or out_reg, AT, TMP + can_move_conditionally = true; + } + } else { + // movn out_reg, true_reg/ZERO, cond_reg + can_move_conditionally = true; + use_const_for_true_in = is_true_value_zero_constant; + } + break; + case Primitive::kPrimLong: + // Moving long on int condition. + if (is_r6) { + if (is_true_value_zero_constant) { + // seleqz out_reg_lo, false_reg_lo, cond_reg + // seleqz out_reg_hi, false_reg_hi, cond_reg + can_move_conditionally = true; + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // selnez out_reg_lo, true_reg_lo, cond_reg + // selnez out_reg_hi, true_reg_hi, cond_reg + can_move_conditionally = true; + use_const_for_false_in = true; + } + // Other long conditional moves would generate 6+ instructions, + // which is too many. + } else { + // movn out_reg_lo, true_reg_lo/ZERO, cond_reg + // movn out_reg_hi, true_reg_hi/ZERO, cond_reg + can_move_conditionally = true; + use_const_for_true_in = is_true_value_zero_constant; + } + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + // Moving float/double on int condition. + if (is_r6) { + if (materialized) { + // Not materializing unmaterialized int conditions + // to keep the instruction count low. + can_move_conditionally = true; + if (is_true_value_zero_constant) { + // sltu TMP, ZERO, cond_reg + // mtc1 TMP, temp_cond_reg + // seleqz.fmt out_reg, false_reg, temp_cond_reg + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // sltu TMP, ZERO, cond_reg + // mtc1 TMP, temp_cond_reg + // selnez.fmt out_reg, true_reg, temp_cond_reg + use_const_for_false_in = true; + } else { + // sltu TMP, ZERO, cond_reg + // mtc1 TMP, temp_cond_reg + // sel.fmt temp_cond_reg, false_reg, true_reg + // mov.fmt out_reg, temp_cond_reg + } + } + } else { + // movn.fmt out_reg, true_reg, cond_reg + can_move_conditionally = true; + } + break; + } + break; + case Primitive::kPrimLong: + // We don't materialize long comparison now + // and use conditional branches instead. + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + switch (dst_type) { + default: + // Moving int on float/double condition. + if (is_r6) { + if (is_true_value_zero_constant) { + // mfc1 TMP, temp_cond_reg + // seleqz out_reg, false_reg, TMP + can_move_conditionally = true; + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // mfc1 TMP, temp_cond_reg + // selnez out_reg, true_reg, TMP + can_move_conditionally = true; + use_const_for_false_in = true; + } else { + // mfc1 TMP, temp_cond_reg + // selnez AT, true_reg, TMP + // seleqz TMP, false_reg, TMP + // or out_reg, AT, TMP + can_move_conditionally = true; + } + } else { + // movt out_reg, true_reg/ZERO, cc + can_move_conditionally = true; + use_const_for_true_in = is_true_value_zero_constant; + } + break; + case Primitive::kPrimLong: + // Moving long on float/double condition. + if (is_r6) { + if (is_true_value_zero_constant) { + // mfc1 TMP, temp_cond_reg + // seleqz out_reg_lo, false_reg_lo, TMP + // seleqz out_reg_hi, false_reg_hi, TMP + can_move_conditionally = true; + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // mfc1 TMP, temp_cond_reg + // selnez out_reg_lo, true_reg_lo, TMP + // selnez out_reg_hi, true_reg_hi, TMP + can_move_conditionally = true; + use_const_for_false_in = true; + } + // Other long conditional moves would generate 6+ instructions, + // which is too many. + } else { + // movt out_reg_lo, true_reg_lo/ZERO, cc + // movt out_reg_hi, true_reg_hi/ZERO, cc + can_move_conditionally = true; + use_const_for_true_in = is_true_value_zero_constant; + } + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + // Moving float/double on float/double condition. + if (is_r6) { + can_move_conditionally = true; + if (is_true_value_zero_constant) { + // seleqz.fmt out_reg, false_reg, temp_cond_reg + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // selnez.fmt out_reg, true_reg, temp_cond_reg + use_const_for_false_in = true; + } else { + // sel.fmt temp_cond_reg, false_reg, true_reg + // mov.fmt out_reg, temp_cond_reg + } + } else { + // movt.fmt out_reg, true_reg, cc + can_move_conditionally = true; + } + break; + } + break; + } + } + + if (can_move_conditionally) { + DCHECK(!use_const_for_false_in || !use_const_for_true_in); } else { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + DCHECK(!use_const_for_false_in); + DCHECK(!use_const_for_true_in); } - if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { - locations->SetInAt(2, Location::RequiresRegister()); + + if (locations_to_set != nullptr) { + if (use_const_for_false_in) { + locations_to_set->SetInAt(0, Location::ConstantLocation(cst_false_value)); + } else { + locations_to_set->SetInAt(0, + Primitive::IsFloatingPointType(dst_type) + ? Location::RequiresFpuRegister() + : Location::RequiresRegister()); + } + if (use_const_for_true_in) { + locations_to_set->SetInAt(1, Location::ConstantLocation(cst_true_value)); + } else { + locations_to_set->SetInAt(1, + Primitive::IsFloatingPointType(dst_type) + ? Location::RequiresFpuRegister() + : Location::RequiresRegister()); + } + if (materialized) { + locations_to_set->SetInAt(2, Location::RequiresRegister()); + } + // On R6 we don't require the output to be the same as the + // first input for conditional moves unlike on R2. + bool is_out_same_as_first_in = !can_move_conditionally || !is_r6; + if (is_out_same_as_first_in) { + locations_to_set->SetOut(Location::SameAsFirstInput()); + } else { + locations_to_set->SetOut(Primitive::IsFloatingPointType(dst_type) + ? Location::RequiresFpuRegister() + : Location::RequiresRegister()); + } } - locations->SetOut(Location::SameAsFirstInput()); + + return can_move_conditionally; } -void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) { +void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { + LocationSummary* locations = select->GetLocations(); + Location dst = locations->Out(); + Location src = locations->InAt(1); + Register src_reg = ZERO; + Register src_reg_high = ZERO; + HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + Register cond_reg = TMP; + int cond_cc = 0; + Primitive::Type cond_type = Primitive::kPrimInt; + bool cond_inverted = false; + Primitive::Type dst_type = select->GetType(); + + if (IsBooleanValueOrMaterializedCondition(cond)) { + cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>(); + } else { + HCondition* condition = cond->AsCondition(); + LocationSummary* cond_locations = cond->GetLocations(); + IfCondition if_cond = condition->GetCondition(); + cond_type = condition->InputAt(0)->GetType(); + switch (cond_type) { + default: + DCHECK_NE(cond_type, Primitive::kPrimLong); + cond_inverted = MaterializeIntCompare(if_cond, cond_locations, cond_reg); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + cond_inverted = MaterializeFpCompareR2(if_cond, + condition->IsGtBias(), + cond_type, + cond_locations, + cond_cc); + break; + } + } + + DCHECK(dst.Equals(locations->InAt(0))); + if (src.IsRegister()) { + src_reg = src.AsRegister<Register>(); + } else if (src.IsRegisterPair()) { + src_reg = src.AsRegisterPairLow<Register>(); + src_reg_high = src.AsRegisterPairHigh<Register>(); + } else if (src.IsConstant()) { + DCHECK(src.GetConstant()->IsZeroBitPattern()); + } + + switch (cond_type) { + default: + switch (dst_type) { + default: + if (cond_inverted) { + __ Movz(dst.AsRegister<Register>(), src_reg, cond_reg); + } else { + __ Movn(dst.AsRegister<Register>(), src_reg, cond_reg); + } + break; + case Primitive::kPrimLong: + if (cond_inverted) { + __ Movz(dst.AsRegisterPairLow<Register>(), src_reg, cond_reg); + __ Movz(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_reg); + } else { + __ Movn(dst.AsRegisterPairLow<Register>(), src_reg, cond_reg); + __ Movn(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_reg); + } + break; + case Primitive::kPrimFloat: + if (cond_inverted) { + __ MovzS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg); + } else { + __ MovnS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg); + } + break; + case Primitive::kPrimDouble: + if (cond_inverted) { + __ MovzD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg); + } else { + __ MovnD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg); + } + break; + } + break; + case Primitive::kPrimLong: + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + switch (dst_type) { + default: + if (cond_inverted) { + __ Movf(dst.AsRegister<Register>(), src_reg, cond_cc); + } else { + __ Movt(dst.AsRegister<Register>(), src_reg, cond_cc); + } + break; + case Primitive::kPrimLong: + if (cond_inverted) { + __ Movf(dst.AsRegisterPairLow<Register>(), src_reg, cond_cc); + __ Movf(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_cc); + } else { + __ Movt(dst.AsRegisterPairLow<Register>(), src_reg, cond_cc); + __ Movt(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_cc); + } + break; + case Primitive::kPrimFloat: + if (cond_inverted) { + __ MovfS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc); + } else { + __ MovtS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc); + } + break; + case Primitive::kPrimDouble: + if (cond_inverted) { + __ MovfD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc); + } else { + __ MovtD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc); + } + break; + } + break; + } +} + +void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { LocationSummary* locations = select->GetLocations(); - MipsLabel false_target; - GenerateTestAndBranch(select, - /* condition_input_index */ 2, - /* true_target */ nullptr, - &false_target); - codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); - __ Bind(&false_target); + Location dst = locations->Out(); + Location false_src = locations->InAt(0); + Location true_src = locations->InAt(1); + HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + Register cond_reg = TMP; + FRegister fcond_reg = FTMP; + Primitive::Type cond_type = Primitive::kPrimInt; + bool cond_inverted = false; + Primitive::Type dst_type = select->GetType(); + + if (IsBooleanValueOrMaterializedCondition(cond)) { + cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>(); + } else { + HCondition* condition = cond->AsCondition(); + LocationSummary* cond_locations = cond->GetLocations(); + IfCondition if_cond = condition->GetCondition(); + cond_type = condition->InputAt(0)->GetType(); + switch (cond_type) { + default: + DCHECK_NE(cond_type, Primitive::kPrimLong); + cond_inverted = MaterializeIntCompare(if_cond, cond_locations, cond_reg); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + cond_inverted = MaterializeFpCompareR6(if_cond, + condition->IsGtBias(), + cond_type, + cond_locations, + fcond_reg); + break; + } + } + + if (true_src.IsConstant()) { + DCHECK(true_src.GetConstant()->IsZeroBitPattern()); + } + if (false_src.IsConstant()) { + DCHECK(false_src.GetConstant()->IsZeroBitPattern()); + } + + switch (dst_type) { + default: + if (Primitive::IsFloatingPointType(cond_type)) { + __ Mfc1(cond_reg, fcond_reg); + } + if (true_src.IsConstant()) { + if (cond_inverted) { + __ Selnez(dst.AsRegister<Register>(), false_src.AsRegister<Register>(), cond_reg); + } else { + __ Seleqz(dst.AsRegister<Register>(), false_src.AsRegister<Register>(), cond_reg); + } + } else if (false_src.IsConstant()) { + if (cond_inverted) { + __ Seleqz(dst.AsRegister<Register>(), true_src.AsRegister<Register>(), cond_reg); + } else { + __ Selnez(dst.AsRegister<Register>(), true_src.AsRegister<Register>(), cond_reg); + } + } else { + DCHECK_NE(cond_reg, AT); + if (cond_inverted) { + __ Seleqz(AT, true_src.AsRegister<Register>(), cond_reg); + __ Selnez(TMP, false_src.AsRegister<Register>(), cond_reg); + } else { + __ Selnez(AT, true_src.AsRegister<Register>(), cond_reg); + __ Seleqz(TMP, false_src.AsRegister<Register>(), cond_reg); + } + __ Or(dst.AsRegister<Register>(), AT, TMP); + } + break; + case Primitive::kPrimLong: { + if (Primitive::IsFloatingPointType(cond_type)) { + __ Mfc1(cond_reg, fcond_reg); + } + Register dst_lo = dst.AsRegisterPairLow<Register>(); + Register dst_hi = dst.AsRegisterPairHigh<Register>(); + if (true_src.IsConstant()) { + Register src_lo = false_src.AsRegisterPairLow<Register>(); + Register src_hi = false_src.AsRegisterPairHigh<Register>(); + if (cond_inverted) { + __ Selnez(dst_lo, src_lo, cond_reg); + __ Selnez(dst_hi, src_hi, cond_reg); + } else { + __ Seleqz(dst_lo, src_lo, cond_reg); + __ Seleqz(dst_hi, src_hi, cond_reg); + } + } else { + DCHECK(false_src.IsConstant()); + Register src_lo = true_src.AsRegisterPairLow<Register>(); + Register src_hi = true_src.AsRegisterPairHigh<Register>(); + if (cond_inverted) { + __ Seleqz(dst_lo, src_lo, cond_reg); + __ Seleqz(dst_hi, src_hi, cond_reg); + } else { + __ Selnez(dst_lo, src_lo, cond_reg); + __ Selnez(dst_hi, src_hi, cond_reg); + } + } + break; + } + case Primitive::kPrimFloat: { + if (!Primitive::IsFloatingPointType(cond_type)) { + // sel*.fmt tests bit 0 of the condition register, account for that. + __ Sltu(TMP, ZERO, cond_reg); + __ Mtc1(TMP, fcond_reg); + } + FRegister dst_reg = dst.AsFpuRegister<FRegister>(); + if (true_src.IsConstant()) { + FRegister src_reg = false_src.AsFpuRegister<FRegister>(); + if (cond_inverted) { + __ SelnezS(dst_reg, src_reg, fcond_reg); + } else { + __ SeleqzS(dst_reg, src_reg, fcond_reg); + } + } else if (false_src.IsConstant()) { + FRegister src_reg = true_src.AsFpuRegister<FRegister>(); + if (cond_inverted) { + __ SeleqzS(dst_reg, src_reg, fcond_reg); + } else { + __ SelnezS(dst_reg, src_reg, fcond_reg); + } + } else { + if (cond_inverted) { + __ SelS(fcond_reg, + true_src.AsFpuRegister<FRegister>(), + false_src.AsFpuRegister<FRegister>()); + } else { + __ SelS(fcond_reg, + false_src.AsFpuRegister<FRegister>(), + true_src.AsFpuRegister<FRegister>()); + } + __ MovS(dst_reg, fcond_reg); + } + break; + } + case Primitive::kPrimDouble: { + if (!Primitive::IsFloatingPointType(cond_type)) { + // sel*.fmt tests bit 0 of the condition register, account for that. + __ Sltu(TMP, ZERO, cond_reg); + __ Mtc1(TMP, fcond_reg); + } + FRegister dst_reg = dst.AsFpuRegister<FRegister>(); + if (true_src.IsConstant()) { + FRegister src_reg = false_src.AsFpuRegister<FRegister>(); + if (cond_inverted) { + __ SelnezD(dst_reg, src_reg, fcond_reg); + } else { + __ SeleqzD(dst_reg, src_reg, fcond_reg); + } + } else if (false_src.IsConstant()) { + FRegister src_reg = true_src.AsFpuRegister<FRegister>(); + if (cond_inverted) { + __ SeleqzD(dst_reg, src_reg, fcond_reg); + } else { + __ SelnezD(dst_reg, src_reg, fcond_reg); + } + } else { + if (cond_inverted) { + __ SelD(fcond_reg, + true_src.AsFpuRegister<FRegister>(), + false_src.AsFpuRegister<FRegister>()); + } else { + __ SelD(fcond_reg, + false_src.AsFpuRegister<FRegister>(), + true_src.AsFpuRegister<FRegister>()); + } + __ MovD(dst_reg, fcond_reg); + } + break; + } + } +} + +void LocationsBuilderMIPS::VisitSelect(HSelect* select) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); + CanMoveConditionally(select, codegen_->GetInstructionSetFeatures().IsR6(), locations); +} + +void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) { + bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); + if (CanMoveConditionally(select, is_r6, /* locations_to_set */ nullptr)) { + if (is_r6) { + GenConditionalMoveR6(select); + } else { + GenConditionalMoveR2(select); + } + } else { + LocationSummary* locations = select->GetLocations(); + MipsLabel false_target; + GenerateTestAndBranch(select, + /* condition_input_index */ 2, + /* true_target */ nullptr, + &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); + } } void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { @@ -3591,6 +4758,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, LoadOperandType load_type = kLoadUnsignedByte; bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + auto null_checker = GetImplicitNullChecker(instruction); switch (type) { case Primitive::kPrimBoolean: @@ -3625,11 +4793,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, // Do implicit Null check __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Load), - instruction, - dex_pc, - nullptr, - IsDirectEntrypoint(kQuickA64Load)); + codegen_->InvokeRuntime(kQuickA64Load, instruction, dex_pc); CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>(); if (type == Primitive::kPrimDouble) { // FP results are returned in core registers. Need to move them. @@ -3656,34 +4820,20 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->Out().IsRegisterPair()); dst = locations->Out().AsRegisterPairLow<Register>(); - Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); - if (obj == dst) { - __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ LoadFromOffset(kLoadWord, dst, obj, offset); - } else { - __ LoadFromOffset(kLoadWord, dst, obj, offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); - } } else { DCHECK(locations->Out().IsRegister()); dst = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(load_type, dst, obj, offset); } + __ LoadFromOffset(load_type, dst, obj, offset, null_checker); } else { DCHECK(locations->Out().IsFpuRegister()); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ LoadSFromOffset(dst, obj, offset); + __ LoadSFromOffset(dst, obj, offset, null_checker); } else { - __ LoadDFromOffset(dst, obj, offset); + __ LoadDFromOffset(dst, obj, offset, null_checker); } } - // Longs are handled earlier. - if (type != Primitive::kPrimLong) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } if (is_volatile) { @@ -3715,9 +4865,9 @@ void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const Field } } else { if (Primitive::IsFloatingPointType(field_type)) { - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, FpuRegisterOrConstantForStore(instruction->InputAt(1))); } else { - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, RegisterOrZeroConstant(instruction->InputAt(1))); } } } @@ -3728,9 +4878,11 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, Primitive::Type type = field_info.GetFieldType(); LocationSummary* locations = instruction->GetLocations(); Register obj = locations->InAt(0).AsRegister<Register>(); + Location value_location = locations->InAt(1); StoreOperandType store_type = kStoreByte; bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + auto null_checker = GetImplicitNullChecker(instruction); switch (type) { case Primitive::kPrimBoolean: @@ -3767,69 +4919,56 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); if (type == Primitive::kPrimDouble) { // Pass FP parameters in core registers. - Location in = locations->InAt(1); - if (in.IsFpuRegister()) { - __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), in.AsFpuRegister<FRegister>()); + if (value_location.IsFpuRegister()) { + __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), + value_location.AsFpuRegister<FRegister>()); __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(), - in.AsFpuRegister<FRegister>()); - } else if (in.IsDoubleStackSlot()) { + value_location.AsFpuRegister<FRegister>()); + } else if (value_location.IsDoubleStackSlot()) { __ LoadFromOffset(kLoadWord, locations->GetTemp(1).AsRegister<Register>(), SP, - in.GetStackIndex()); + value_location.GetStackIndex()); __ LoadFromOffset(kLoadWord, locations->GetTemp(2).AsRegister<Register>(), SP, - in.GetStackIndex() + 4); + value_location.GetStackIndex() + 4); } else { - DCHECK(in.IsConstant()); - DCHECK(in.GetConstant()->IsDoubleConstant()); - int64_t value = bit_cast<int64_t, double>(in.GetConstant()->AsDoubleConstant()->GetValue()); + DCHECK(value_location.IsConstant()); + DCHECK(value_location.GetConstant()->IsDoubleConstant()); + int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); __ LoadConst64(locations->GetTemp(2).AsRegister<Register>(), locations->GetTemp(1).AsRegister<Register>(), value); } } - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store), - instruction, - dex_pc, - nullptr, - IsDirectEntrypoint(kQuickA64Store)); + codegen_->InvokeRuntime(kQuickA64Store, instruction, dex_pc); CheckEntrypointTypes<kQuickA64Store, void, volatile int64_t *, int64_t>(); } else { - if (!Primitive::IsFloatingPointType(type)) { + if (value_location.IsConstant()) { + int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker); + } else if (!Primitive::IsFloatingPointType(type)) { Register src; if (type == Primitive::kPrimLong) { - DCHECK(locations->InAt(1).IsRegisterPair()); - src = locations->InAt(1).AsRegisterPairLow<Register>(); - Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>(); - __ StoreToOffset(kStoreWord, src, obj, offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize); + src = value_location.AsRegisterPairLow<Register>(); } else { - DCHECK(locations->InAt(1).IsRegister()); - src = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(store_type, src, obj, offset); + src = value_location.AsRegister<Register>(); } + __ StoreToOffset(store_type, src, obj, offset, null_checker); } else { - DCHECK(locations->InAt(1).IsFpuRegister()); - FRegister src = locations->InAt(1).AsFpuRegister<FRegister>(); + FRegister src = value_location.AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ StoreSToOffset(src, obj, offset); + __ StoreSToOffset(src, obj, offset, null_checker); } else { - __ StoreDToOffset(src, obj, offset); + __ StoreDToOffset(src, obj, offset, null_checker); } } - // Longs are handled earlier. - if (type != Primitive::kPrimLong) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } // TODO: memory barriers? if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) { - DCHECK(locations->InAt(1).IsRegister()); - Register src = locations->InAt(1).AsRegister<Register>(); + Register src = value_location.AsRegister<Register>(); codegen_->MarkGCCard(obj, src); } @@ -3973,7 +5112,7 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -4042,6 +5181,8 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( } // We disable PC-relative load when there is an irreducible loop, as the optimization // is incompatible with it. + // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods + // with irreducible loops. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); bool fallback_load = has_irreducible_loops; switch (desired_string_load_kind) { @@ -4057,10 +5198,8 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); fallback_load = false; break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); - // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods - // with irreducible loops. break; case HLoadString::LoadKind::kDexCacheViaMethod: fallback_load = false; @@ -4140,7 +5279,7 @@ Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticO HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method ATTRIBUTE_UNUSED) { + HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; // We disable PC-relative load when there is an irreducible loop, as the optimization // is incompatible with it. @@ -4209,13 +5348,16 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke } switch (method_load_kind) { - case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: + case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { // temp = thread->string_init_entrypoint + uint32_t offset = + GetThreadOffset<kMipsPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, - invoke->GetStringInitOffset()); + offset); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; @@ -4274,7 +5416,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke // T9 prepared above for better instruction scheduling. // T9() __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: // TODO: Implement this type. @@ -4290,7 +5432,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke kMipsPointerSize).Int32Value()); // T9() __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); break; } DCHECK(!IsLeafMethod()); @@ -4314,8 +5456,13 @@ void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDire } void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + Register receiver = calling_convention.GetRegisterAt(0); + Register temp = temp_location.AsRegister<Register>(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kMipsPointerSize).SizeValue(); @@ -4323,8 +5470,7 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize); // temp = object->GetClass(); - DCHECK(receiver.IsRegister()); - __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); + __ LoadFromOffset(kLoadWord, temp, receiver, class_offset); MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); @@ -4332,7 +5478,7 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); } void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -4386,11 +5532,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { LocationSummary* locations = cls->GetLocations(); if (cls->NeedsAccessCheck()) { codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), - cls, - cls->GetDexPc(), - nullptr, - IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess)); + codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc()); CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -4441,21 +5583,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { DCHECK(!kEmitCompilerReadBarrier); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); - if (isR6) { - __ Bind(&info->high_label); - __ Bind(&info->pc_rel_label); - // Add a 32-bit offset to PC. - __ Auipc(out, /* placeholder */ 0x1234); - __ Addiu(out, out, /* placeholder */ 0x5678); - } else { - __ Bind(&info->high_label); - __ Lui(out, /* placeholder */ 0x1234); - // We do not bind info->pc_rel_label here, we'll use the assembler's label - // for PC-relative literals and the base from HMipsComputeBaseMethodAddress. - __ Ori(out, out, /* placeholder */ 0x5678); - // Add a 32-bit offset to PC. - __ Addu(out, out, base_or_current_method_reg); - } + codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -4544,7 +5672,9 @@ void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear AT void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); @@ -4553,12 +5683,12 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: if (codegen_->GetInstructionSetFeatures().IsR6()) { break; } FALLTHROUGH_INTENDED; // We need an extra register for PC-relative dex cache accesses. - case HLoadString::LoadKind::kDexCachePcRelative: case HLoadString::LoadKind::kDexCacheViaMethod: locations->SetInAt(0, Location::RequiresRegister()); break; @@ -4580,13 +5710,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); break; - // We need an extra register for PC-relative dex cache accesses. - case HLoadString::LoadKind::kDexCachePcRelative: - case HLoadString::LoadKind::kDexCacheViaMethod: - base_or_current_method_reg = locations->InAt(0).AsRegister<Register>(); - break; default: base_or_current_method_reg = ZERO; break; @@ -4602,23 +5728,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(!kEmitCompilerReadBarrier); + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); - if (isR6) { - __ Bind(&info->high_label); - __ Bind(&info->pc_rel_label); - // Add a 32-bit offset to PC. - __ Auipc(out, /* placeholder */ 0x1234); - __ Addiu(out, out, /* placeholder */ 0x5678); - } else { - __ Bind(&info->high_label); - __ Lui(out, /* placeholder */ 0x1234); - // We do not bind info->pc_rel_label here, we'll use the assembler's label - // for PC-relative literals and the base from HMipsComputeBaseMethodAddress. - __ Ori(out, out, /* placeholder */ 0x5678); - // Add a 32-bit offset to PC. - __ Addu(out, out, base_or_current_method_reg); - } + codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { @@ -4630,52 +5743,28 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); - static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes."); - DCHECK_ALIGNED(load->GetAddress(), 4u); - int16_t offset = Low16Bits(address); - uint32_t base_address = address - offset; // This accounts for offset sign extension. - __ Lui(out, High16Bits(base_address)); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) - GenerateGcRootFieldLoad(load, out_loc, out, offset); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - HMipsDexCacheArraysBase* base = load->InputAt(0)->AsMipsDexCacheArraysBase(); - int32_t offset = - load->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; - // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset) - GenerateGcRootFieldLoad(load, out_loc, base_or_current_method_reg, offset); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad(load, - out_loc, - base_or_current_method_reg, - ArtMethod::DeclaringClassOffset().Int32Value()); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad(load, - out_loc, - out, - CodeGenerator::GetCacheOffset(load->GetStringIndex())); - break; + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); + __ LoadFromOffset(kLoadWord, out, out, 0); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); + codegen_->AddSlowPath(slow_path); + __ Beqz(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); - codegen_->AddSlowPath(slow_path); - __ Beqz(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + InvokeRuntimeCallingConvention calling_convention; + __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex()); + codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); } void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) { @@ -4696,18 +5785,10 @@ void LocationsBuilderMIPS::VisitMonitorOperation(HMonitorOperation* instruction) void InstructionCodeGeneratorMIPS::VisitMonitorOperation(HMonitorOperation* instruction) { if (instruction->IsEnter()) { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLockObject), - instruction, - instruction->GetDexPc(), - nullptr, - IsDirectEntrypoint(kQuickLockObject)); + codegen_->InvokeRuntime(kQuickLockObject, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); } else { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pUnlockObject), - instruction, - instruction->GetDexPc(), - nullptr, - IsDirectEntrypoint(kQuickUnlockObject)); + codegen_->InvokeRuntime(kQuickUnlockObject, instruction, instruction->GetDexPc()); } CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); } @@ -4882,12 +5963,7 @@ void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) { __ Lw(current_method_register, SP, kCurrentMethodStackOffset); // Move an uint16_t value to a register. __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); - codegen_->InvokeRuntime( - GetThreadOffset<kMipsPointerSize>(instruction->GetEntrypoint()).Int32Value(), - instruction, - instruction->GetDexPc(), - nullptr, - IsDirectEntrypoint(kQuickAllocArrayWithAccessCheck)); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } @@ -4913,15 +5989,10 @@ void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString)); __ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value()); __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { - codegen_->InvokeRuntime( - GetThreadOffset<kMipsPointerSize>(instruction->GetEntrypoint()).Int32Value(), - instruction, - instruction->GetDexPc(), - nullptr, - IsDirectEntrypoint(kQuickAllocObjectWithAccessCheck)); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } } @@ -4973,14 +6044,8 @@ void InstructionCodeGeneratorMIPS::VisitBooleanNot(HBooleanNot* instruction) { } void LocationsBuilderMIPS::VisitNullCheck(HNullCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::RequiresRegister()); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void CodeGeneratorMIPS::GenerateImplicitNullCheck(HNullCheck* instruction) { @@ -5106,27 +6171,17 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { GenerateDivRemIntegral(instruction); break; case Primitive::kPrimLong: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), - instruction, - instruction->GetDexPc(), - nullptr, - IsDirectEntrypoint(kQuickLmod)); + codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); break; } case Primitive::kPrimFloat: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), - instruction, instruction->GetDexPc(), - nullptr, - IsDirectEntrypoint(kQuickFmodf)); + codegen_->InvokeRuntime(kQuickFmodf, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), - instruction, instruction->GetDexPc(), - nullptr, - IsDirectEntrypoint(kQuickFmod)); + codegen_->InvokeRuntime(kQuickFmod, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } @@ -5282,7 +6337,9 @@ void InstructionCodeGeneratorMIPS::VisitUnresolvedStaticFieldSet( } void LocationsBuilderMIPS::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } void InstructionCodeGeneratorMIPS::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -5307,11 +6364,7 @@ void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) { } void InstructionCodeGeneratorMIPS::VisitThrow(HThrow* instruction) { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException), - instruction, - instruction->GetDexPc(), - nullptr, - IsDirectEntrypoint(kQuickDeliverException)); + codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } @@ -5432,15 +6485,9 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi __ Cvtdl(dst, FTMP); } } else { - int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) - : QUICK_ENTRY_POINT(pL2d); - bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f) - : IsDirectEntrypoint(kQuickL2d); - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr, - direct); + QuickEntrypointEnum entrypoint = (result_type == Primitive::kPrimFloat) ? kQuickL2f + : kQuickL2d; + codegen_->InvokeRuntime(entrypoint, conversion, conversion->GetDexPc()); if (result_type == Primitive::kPrimFloat) { CheckEntrypointTypes<kQuickL2f, float, int64_t>(); } else { @@ -5533,11 +6580,9 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi __ Bind(&done); } else { - int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) - : QUICK_ENTRY_POINT(pD2l); - bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l) - : IsDirectEntrypoint(kQuickD2l); - codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct); + QuickEntrypointEnum entrypoint = (input_type == Primitive::kPrimFloat) ? kQuickF2l + : kQuickD2l; + codegen_->InvokeRuntime(entrypoint, conversion, conversion->GetDexPc()); if (input_type == Primitive::kPrimFloat) { CheckEntrypointTypes<kQuickF2l, int64_t, float>(); } else { @@ -5742,13 +6787,11 @@ void LocationsBuilderMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) { locations->SetInAt(0, Location::RequiresRegister()); } -void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - Register value_reg = locations->InAt(0).AsRegister<Register>(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - +void InstructionCodeGeneratorMIPS::GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { // Create a set of compare/jumps. Register temp_reg = TMP; __ Addiu32(temp_reg, value_reg, -lower_bound); @@ -5757,7 +6800,7 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr // this case, index >= num_entries must be true. So that we can save one branch instruction. __ Bltz(temp_reg, codegen_->GetLabelOf(default_block)); - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); // Jump to successors[0] if value == lower_bound. __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0])); int32_t last_index = 0; @@ -5775,11 +6818,107 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr } // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + if (!codegen_->GoesToNextBlock(switch_block, default_block)) { __ B(codegen_->GetLabelOf(default_block)); } } +void InstructionCodeGeneratorMIPS::GenTableBasedPackedSwitch(Register value_reg, + Register constant_area, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { + // Create a jump table. + std::vector<MipsLabel*> labels(num_entries); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + labels[i] = codegen_->GetLabelOf(successors[i]); + } + JumpTable* table = __ CreateJumpTable(std::move(labels)); + + // Is the value in range? + __ Addiu32(TMP, value_reg, -lower_bound); + if (IsInt<16>(static_cast<int32_t>(num_entries))) { + __ Sltiu(AT, TMP, num_entries); + __ Beqz(AT, codegen_->GetLabelOf(default_block)); + } else { + __ LoadConst32(AT, num_entries); + __ Bgeu(TMP, AT, codegen_->GetLabelOf(default_block)); + } + + // We are in the range of the table. + // Load the target address from the jump table, indexing by the value. + __ LoadLabelAddress(AT, constant_area, table->GetLabel()); + __ Sll(TMP, TMP, 2); + __ Addu(TMP, TMP, AT); + __ Lw(TMP, TMP, 0); + // Compute the absolute target address by adding the table start address + // (the table contains offsets to targets relative to its start). + __ Addu(TMP, TMP, AT); + // And jump. + __ Jr(TMP); + __ NopIfNoReordering(); +} + +void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + uint32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + HBasicBlock* switch_block = switch_instr->GetBlock(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + if (codegen_->GetInstructionSetFeatures().IsR6() && + num_entries > kPackedSwitchJumpTableThreshold) { + // R6 uses PC-relative addressing to access the jump table. + // R2, OTOH, requires an HMipsComputeBaseMethodAddress input to access + // the jump table and it is implemented by changing HPackedSwitch to + // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress. + // See VisitMipsPackedSwitch() for the table-based implementation on R2. + GenTableBasedPackedSwitch(value_reg, + ZERO, + lower_bound, + num_entries, + switch_block, + default_block); + } else { + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_block, + default_block); + } +} + +void LocationsBuilderMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* switch_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + // Constant area pointer (HMipsComputeBaseMethodAddress). + locations->SetInAt(1, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + uint32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + Register constant_area = locations->InAt(1).AsRegister<Register>(); + HBasicBlock* switch_block = switch_instr->GetBlock(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // This is an R2-only path. HPackedSwitch has been changed to + // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress + // required to address the jump table relative to PC. + GenTableBasedPackedSwitch(value_reg, + constant_area, + lower_bound, + num_entries, + switch_block, + default_block); +} + void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress( HMipsComputeBaseMethodAddress* insn) { LocationSummary* locations = @@ -5813,24 +6952,8 @@ void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArra Register reg = base->GetLocations()->Out().AsRegister<Register>(); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); - - if (codegen_->GetInstructionSetFeatures().IsR6()) { - __ Bind(&info->high_label); - __ Bind(&info->pc_rel_label); - // Add a 32-bit offset to PC. - __ Auipc(reg, /* placeholder */ 0x1234); - __ Addiu(reg, reg, /* placeholder */ 0x5678); - } else { - // Generate a dummy PC-relative call to obtain PC. - __ Nal(); - __ Bind(&info->high_label); - __ Lui(reg, /* placeholder */ 0x1234); - __ Bind(&info->pc_rel_label); - __ Ori(reg, reg, /* placeholder */ 0x5678); - // Add a 32-bit offset to PC. - __ Addu(reg, reg, RA); - // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()? - } + // TODO: Reuse MipsComputeBaseMethodAddress on R2 instead of passing ZERO to force emitting NAL. + codegen_->EmitPcRelativeAddressPlaceholder(info, reg, ZERO); } void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 63a0345c1c..e132819c24 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -191,6 +191,8 @@ class LocationsBuilderMIPS : public HGraphVisitor { void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + Location RegisterOrZeroConstant(HInstruction* instruction); + Location FpuRegisterOrConstantForStore(HInstruction* instruction); InvokeDexCallingConventionVisitorMIPS parameter_visitor_; @@ -218,6 +220,14 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { MipsAssembler* GetAssembler() const { return assembler_; } + // Compare-and-jump packed switch generates approx. 3 + 2.5 * N 32-bit + // instructions for N cases. + // Table-based packed switch generates approx. 11 32-bit instructions + // and N 32-bit data words for N cases. + // At N = 6 they come out as 18 and 17 32-bit words respectively. + // We switch to the table-based method starting with 7 cases. + static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; + private: void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg); void GenerateMemoryBarrier(MemBarrierKind kind); @@ -237,12 +247,38 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { Register obj, uint32_t offset); void GenerateIntCompare(IfCondition cond, LocationSummary* locations); + // When the function returns `false` it means that the condition holds if `dst` is non-zero + // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero + // `dst` are exchanged. + bool MaterializeIntCompare(IfCondition cond, + LocationSummary* input_locations, + Register dst); void GenerateIntCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label); void GenerateLongCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label); + void GenerateFpCompare(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations); + // When the function returns `false` it means that the condition holds if the condition + // code flag `cc` is non-zero and doesn't hold if `cc` is zero. If it returns `true`, + // the roles of zero and non-zero values of the `cc` flag are exchanged. + bool MaterializeFpCompareR2(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* input_locations, + int cc); + // When the function returns `false` it means that the condition holds if `dst` is non-zero + // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero + // `dst` are exchanged. + bool MaterializeFpCompareR6(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* input_locations, + FRegister dst); void GenerateFpCompareAndBranch(IfCondition cond, bool gt_bias, Primitive::Type type, @@ -257,6 +293,20 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); + auto GetImplicitNullChecker(HInstruction* instruction); + void GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); + void GenTableBasedPackedSwitch(Register value_reg, + Register constant_area, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); + void GenConditionalMoveR2(HSelect* select); + void GenConditionalMoveR6(HSelect* select); MipsAssembler* const assembler_; CodeGeneratorMIPS* const codegen_; @@ -273,6 +323,7 @@ class CodeGeneratorMIPS : public CodeGenerator { virtual ~CodeGeneratorMIPS() {} void ComputeSpillMask() OVERRIDE; + bool HasAllocatedCalleeSaveRegisters() const OVERRIDE; void GenerateFrameEntry() OVERRIDE; void GenerateFrameExit() OVERRIDE; @@ -304,10 +355,10 @@ class CodeGeneratorMIPS : public CodeGenerator { void SetupBlockedRegisters() const OVERRIDE; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id); - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id); - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id); - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id); + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; void ClobberRA() { clobbered_ra_ = true; } @@ -315,9 +366,6 @@ class CodeGeneratorMIPS : public CodeGenerator { void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; - // Blocks all register pairs made out of blocked core registers. - void UpdateBlockedPairRegisters() const; - InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips; } const MipsInstructionSetFeatures& GetInstructionSetFeatures() const { @@ -338,7 +386,7 @@ class CodeGeneratorMIPS : public CodeGenerator { void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; - void MoveConstant(Location destination, int32_t value); + void MoveConstant(Location destination, int32_t value) OVERRIDE; void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; @@ -346,17 +394,11 @@ class CodeGeneratorMIPS : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path) OVERRIDE; - - void InvokeRuntime(int32_t offset, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path, - bool is_direct_entrypoint); + SlowPathCode* slow_path = nullptr) OVERRIDE; ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } - bool NeedsTwoRegisters(Primitive::Type type) const { + bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { return type == Primitive::kPrimLong; } @@ -374,7 +416,7 @@ class CodeGeneratorMIPS : public CodeGenerator { // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method) OVERRIDE; + HInvokeStaticOrDirect* invoke) OVERRIDE; void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; @@ -384,9 +426,9 @@ class CodeGeneratorMIPS : public CodeGenerator { UNIMPLEMENTED(FATAL) << "Not implemented on MIPS"; } - void GenerateNop(); - void GenerateImplicitNullCheck(HNullCheck* instruction); - void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateNop() OVERRIDE; + void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays // and boot image strings. The only difference is the interpretation of the offset_or_index. @@ -414,6 +456,8 @@ class CodeGeneratorMIPS : public CodeGenerator { Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); + void EmitPcRelativeAddressPlaceholder(PcRelativePatchInfo* info, Register out, Register base); + private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); @@ -434,6 +478,10 @@ class CodeGeneratorMIPS : public CodeGenerator { uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches); + // Labels for each block that will be compiled. MipsLabel* block_labels_; MipsLabel frame_entry_label_; @@ -452,7 +500,7 @@ class CodeGeneratorMIPS : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. BootStringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info. + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Deduplication map for boot type literals for kBootImageLinkTimeAddress. BootTypeToLiteralMap boot_image_type_patches_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 3472830379..010bf24232 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -102,8 +102,8 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type) return Mips64ReturnLocation(type); } -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value() class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { @@ -127,13 +127,10 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() - ? QUICK_ENTRY_POINT(pThrowStringBounds) - : QUICK_ENTRY_POINT(pThrowArrayBounds); - mips64_codegen->InvokeRuntime(entry_point_offset, - instruction_, - instruction_->GetDexPc(), - this); + QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() + ? kQuickThrowStringBounds + : kQuickThrowArrayBounds; + mips64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -153,14 +150,7 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->CanThrowIntoCatchBlock()) { - // Live registers will be restored in the catch block if caught. - SaveLiveRegisters(codegen, instruction_->GetLocations()); - } - mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), - instruction_, - instruction_->GetDexPc(), - this); + mips64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } @@ -191,9 +181,9 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { InvokeRuntimeCallingConvention calling_convention; __ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex()); - int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) - : QUICK_ENTRY_POINT(pInitializeType); - mips64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); + QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage + : kQuickInitializeType; + mips64_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this); if (do_clinit_) { CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); } else { @@ -246,7 +236,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { InvokeRuntimeCallingConvention calling_convention; const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); __ LoadConst32(calling_convention.GetRegisterAt(0), string_index); - mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), + mips64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); @@ -277,7 +267,7 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), + mips64_codegen->InvokeRuntime(kQuickThrowNullPointer, instruction_, instruction_->GetDexPc(), this); @@ -300,13 +290,8 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), - instruction_, - instruction_->GetDexPc(), - this); + mips64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ Bc(GetReturnLabel()); } else { @@ -357,10 +342,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { - mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction_, - dex_pc, - this); + mips64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); CheckEntrypointTypes< kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); @@ -368,7 +350,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); } else { DCHECK(instruction_->IsCheckCast()); - mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); + mips64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } @@ -390,11 +372,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - instruction_, - instruction_->GetDexPc(), - this); + mips64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } @@ -429,8 +407,8 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, } #undef __ -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value() void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { @@ -578,9 +556,14 @@ void CodeGeneratorMIPS64::GenerateFrameEntry() { __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize()); - static_assert(IsInt<16>(kCurrentMethodStackOffset), - "kCurrentMethodStackOffset must fit into int16_t"); - __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset); + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + static_assert(IsInt<16>(kCurrentMethodStackOffset), + "kCurrentMethodStackOffset must fit into int16_t"); + __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset); + } } void CodeGeneratorMIPS64::GenerateFrameExit() { @@ -961,25 +944,20 @@ void CodeGeneratorMIPS64::DumpFloatingPointRegister(std::ostream& stream, int re } void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path) { - InvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value(), - instruction, - dex_pc, - slow_path); -} - -void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { - ValidateInvokeRuntime(instruction, slow_path); + ValidateInvokeRuntime(entrypoint, instruction, slow_path); // TODO: anything related to T9/GP/GOT/PIC/.so's? - __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + __ LoadFromOffset(kLoadDoubleword, + T9, + TR, + GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value()); __ Jalr(T9); __ Nop(); - RecordPcInfo(instruction, dex_pc, slow_path); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } } void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, @@ -1516,10 +1494,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { } } else { DCHECK_EQ(value_type, Primitive::kPrimNot); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } break; @@ -1584,15 +1559,13 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { } void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -2136,14 +2109,8 @@ void InstructionCodeGeneratorMIPS64::VisitDiv(HDiv* instruction) { } void LocationsBuilderMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) { @@ -2656,6 +2623,7 @@ void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -3009,7 +2977,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method ATTRIBUTE_UNUSED) { + HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { switch (desired_dispatch_info.method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: @@ -3043,13 +3011,16 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { - case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: + case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { // temp = thread->string_init_entrypoint + uint32_t offset = + GetThreadOffset<kMips64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, - invoke->GetStringInitOffset()); + offset); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; @@ -3189,10 +3160,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { LocationSummary* locations = cls->GetLocations(); if (cls->NeedsAccessCheck()) { codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), - cls, - cls->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc()); CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -3263,22 +3231,11 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { } void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) { - LocationSummary* locations = load->GetLocations(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>(); - __ LoadFromOffset(kLoadUnsignedWord, out, current_method, - ArtMethod::DeclaringClassOffset().Int32Value()); - __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - __ LoadFromOffset( - kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - // TODO: We will need a read barrier here. - - if (!load->IsInDexCache()) { - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); - codegen_->AddSlowPath(slow_path); - __ Beqzc(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); + codegen_->AddSlowPath(slow_path); + __ Bc(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) { @@ -3298,12 +3255,9 @@ void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instructio } void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) { - codegen_->InvokeRuntime(instruction->IsEnter() - ? QUICK_ENTRY_POINT(pLockObject) - : QUICK_ENTRY_POINT(pUnlockObject), + codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, instruction, - instruction->GetDexPc(), - nullptr); + instruction->GetDexPc()); if (instruction->IsEnter()) { CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); } else { @@ -3431,10 +3385,7 @@ void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) { LocationSummary* locations = instruction->GetLocations(); // Move an uint16_t value to a register. __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex()); - codegen_->InvokeRuntime(instruction->GetEntrypoint(), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } @@ -3463,10 +3414,7 @@ void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) __ Nop(); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } } @@ -3509,14 +3457,8 @@ void InstructionCodeGeneratorMIPS64::VisitBooleanNot(HBooleanNot* instruction) { } void LocationsBuilderMIPS64::VisitNullCheck(HNullCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::RequiresRegister()); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void CodeGeneratorMIPS64::GenerateImplicitNullCheck(HNullCheck* instruction) { @@ -3637,9 +3579,8 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) - : QUICK_ENTRY_POINT(pFmod); - codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr); + QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod; + codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); if (type == Primitive::kPrimFloat) { CheckEntrypointTypes<kQuickFmodf, float, float, float>(); } else { @@ -3795,7 +3736,9 @@ void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldSet( } void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -3820,10 +3763,7 @@ void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) { } void InstructionCodeGeneratorMIPS64::VisitThrow(HThrow* instruction) { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 197f86b22b..690eccb7d8 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -285,10 +285,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void SetupBlockedRegisters() const OVERRIDE; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id); - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id); - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id); - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id); + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; @@ -323,16 +323,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path) OVERRIDE; - - void InvokeRuntime(int32_t offset, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path); + SlowPathCode* slow_path = nullptr) OVERRIDE; ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } - bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const { return false; } + bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. @@ -348,7 +343,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method) OVERRIDE; + HInvokeStaticOrDirect* invoke) OVERRIDE; void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; @@ -358,9 +353,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator { UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64"; } - void GenerateNop(); - void GenerateImplicitNullCheck(HNullCheck* instruction); - void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateNop() OVERRIDE; + void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; private: // Labels for each block that will be compiled. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index a2fa24542c..efd33c7025 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -47,8 +47,8 @@ static constexpr int kC2ConditionMask = 0x400; static constexpr int kFakeReturnRegister = Register(8); -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value() class NullCheckSlowPathX86 : public SlowPathCode { @@ -62,7 +62,7 @@ class NullCheckSlowPathX86 : public SlowPathCode { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), + x86_codegen->InvokeRuntime(kQuickThrowNullPointer, instruction_, instruction_->GetDexPc(), this); @@ -84,14 +84,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->CanThrowIntoCatchBlock()) { - // Live registers will be restored in the catch block if caught. - SaveLiveRegisters(codegen, instruction_->GetLocations()); - } - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), - instruction_, - instruction_->GetDexPc(), - this); + x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } @@ -157,6 +150,9 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); } __ movl(length_loc.AsRegister<Register>(), array_len); + if (mirror::kUseStringCompression) { + __ andl(length_loc.AsRegister<Register>(), Immediate(INT32_MAX)); + } } x86_codegen->EmitParallelMoves( locations->InAt(0), @@ -165,13 +161,10 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { length_loc, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() - ? QUICK_ENTRY_POINT(pThrowStringBounds) - : QUICK_ENTRY_POINT(pThrowArrayBounds); - x86_codegen->InvokeRuntime(entry_point_offset, - instruction_, - instruction_->GetDexPc(), - this); + QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() + ? kQuickThrowStringBounds + : kQuickThrowArrayBounds; + x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -192,13 +185,8 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), - instruction_, - instruction_->GetDexPc(), - this); + x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -239,14 +227,18 @@ class LoadStringSlowPathX86 : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index)); - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), - instruction_, - instruction_->GetDexPc(), - this); + x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); RestoreLiveRegisters(codegen, locations); + // Store the resolved String to the BSS entry. + Register method_address = locations->InAt(0).AsRegister<Register>(); + __ movl(Address(method_address, CodeGeneratorX86::kDummy32BitOffset), + locations->Out().AsRegister<Register>()); + Label* fixup_label = x86_codegen->NewStringBssEntryPatch(instruction_->AsLoadString()); + __ Bind(fixup_label); + __ jmp(GetExitLabel()); } @@ -274,8 +266,8 @@ class LoadClassSlowPathX86 : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; __ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex())); - x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) - : QUICK_ENTRY_POINT(pInitializeType), + x86_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage + : kQuickInitializeType, at_, dex_pc_, this); if (do_clinit_) { CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); @@ -344,7 +336,7 @@ class TypeCheckSlowPathX86 : public SlowPathCode { Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), + x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, instruction_->GetDexPc(), this); @@ -352,10 +344,7 @@ class TypeCheckSlowPathX86 : public SlowPathCode { kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction_, - instruction_->GetDexPc(), - this); + x86_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } @@ -386,11 +375,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - instruction_, - instruction_->GetDexPc(), - this); + x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } @@ -429,10 +414,7 @@ class ArraySetSlowPathX86 : public SlowPathCode { codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction_, - instruction_->GetDexPc(), - this); + x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); @@ -444,11 +426,25 @@ class ArraySetSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); }; -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathX86 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj) - : SlowPathCode(instruction), obj_(obj) { + ReadBarrierMarkSlowPathX86(HInstruction* instruction, + Location ref, + bool unpoison_ref_before_marking) + : SlowPathCode(instruction), + ref_(ref), + unpoison_ref_before_marking_(unpoison_ref_before_marking) { DCHECK(kEmitCompilerReadBarrier); } @@ -456,54 +452,228 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg = obj_.AsRegister<Register>(); + Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || + instruction_->IsArraySet() || instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); __ Bind(GetEntryLabel()); + if (unpoison_ref_before_marking_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); - DCHECK_NE(reg, ESP); - DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; + DCHECK_NE(ref_reg, ESP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in EAX): // - // EAX <- obj + // EAX <- ref // EAX <- ReadBarrierMark(EAX) - // obj <- EAX + // ref <- EAX // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); // This runtime call does not require a stack map. x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); } private: - const Location obj_; + // The location (register) of the marked object reference. + const Location ref_; + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathX86 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { + public: + ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction, + Location ref, + Register obj, + const Address& field_addr, + bool unpoison_ref_before_marking, + Register temp) + : SlowPathCode(instruction), + ref_(ref), + obj_(obj), + field_addr_(field_addr), + unpoison_ref_before_marking_(unpoison_ref_before_marking), + temp_(temp) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + + __ Bind(GetEntryLabel()); + if (unpoison_ref_before_marking_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + } + + // Save the old (unpoisoned) reference. + __ movl(temp_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + DCHECK_NE(ref_reg, ESP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in EAX): + // + // EAX <- ref + // EAX <- ReadBarrierMark(EAX) + // ref <- EAX + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); + // This runtime call does not require a stack map. + x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*field_addr`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LOCK CMPXCHGL instruction in the compare-and-set (CAS) + // operation below would abort the CAS, leaving the field as-is. + NearLabel done; + __ cmpl(temp_, ref_reg); + __ j(kEqual, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + // This operation is implemented with a 32-bit LOCK CMPXLCHG + // instruction, which requires the expected value (the old + // reference) to be in EAX. Save EAX beforehand, and move the + // expected value (stored in `temp_`) into EAX. + __ pushl(EAX); + __ movl(EAX, temp_); + + // Convenience aliases. + Register base = obj_; + Register expected = EAX; + Register value = ref_reg; + + bool base_equals_value = (base == value); + if (kPoisonHeapReferences) { + if (base_equals_value) { + // If `base` and `value` are the same register location, move + // `value` to a temporary register. This way, poisoning + // `value` won't invalidate `base`. + value = temp_; + __ movl(value, base); + } + + // Check that the register allocator did not assign the location + // of `expected` (EAX) to `value` nor to `base`, so that heap + // poisoning (when enabled) works as intended below. + // - If `value` were equal to `expected`, both references would + // be poisoned twice, meaning they would not be poisoned at + // all, as heap poisoning uses address negation. + // - If `base` were equal to `expected`, poisoning `expected` + // would invalidate `base`. + DCHECK_NE(value, expected); + DCHECK_NE(base, expected); + + __ PoisonHeapReference(expected); + __ PoisonHeapReference(value); + } + + __ LockCmpxchgl(field_addr_, value); + + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. + if (kPoisonHeapReferences) { + if (base_equals_value) { + // `value` has been moved to a temporary register, no need + // to unpoison it. + } else { + __ UnpoisonHeapReference(value); + } + // No need to unpoison `expected` (EAX), as it is be overwritten below. + } + + // Restore EAX. + __ popl(EAX); + + __ Bind(&done); + __ jmp(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The address of the marked reference field. The base of this address must be `obj_`. + const Address field_addr_; + + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; + + const Register temp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { public: @@ -643,10 +813,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_)); } - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), - instruction_, - instruction_->GetDexPc(), - this); + x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes< kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); @@ -710,7 +877,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, instruction_->GetDexPc(), this); @@ -731,8 +898,8 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { }; #undef __ -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<X86Assembler*>(GetAssembler())-> /* NOLINT */ +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT inline Condition X86Condition(IfCondition cond) { switch (cond) { @@ -803,25 +970,21 @@ void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { - InvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value(), - instruction, - dex_pc, - slow_path); -} - -void CodeGeneratorX86::InvokeRuntime(int32_t entry_point_offset, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path) { - ValidateInvokeRuntime(instruction, slow_path); - __ fs()->call(Address::Absolute(entry_point_offset)); - RecordPcInfo(instruction, dex_pc, slow_path); + ValidateInvokeRuntime(entrypoint, instruction, slow_path); + GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value()); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } } void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, HInstruction* instruction, SlowPathCode* slow_path) { ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + GenerateInvokeRuntime(entry_point_offset); +} + +void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) { __ fs()->call(Address::Absolute(entry_point_offset)); } @@ -859,24 +1022,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, } void CodeGeneratorX86::SetupBlockedRegisters() const { - // Don't allocate the dalvik style register pair passing. - blocked_register_pairs_[ECX_EDX] = true; - // Stack register is always reserved. blocked_core_registers_[ESP] = true; - - UpdateBlockedPairRegisters(); -} - -void CodeGeneratorX86::UpdateBlockedPairRegisters() const { - for (int i = 0; i < kNumberOfRegisterPairs; i++) { - X86ManagedRegister current = - X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); - if (blocked_core_registers_[current.AsRegisterPairLow()] - || blocked_core_registers_[current.AsRegisterPairHigh()]) { - blocked_register_pairs_[i] = true; - } - } } InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen) @@ -916,7 +1063,12 @@ void CodeGeneratorX86::GenerateFrameEntry() { int adjust = GetFrameSize() - FrameEntrySpillSize(); __ subl(ESP, Immediate(adjust)); __ cfi().AdjustCFAOffset(adjust); - __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); + } } void CodeGeneratorX86::GenerateFrameExit() { @@ -1122,15 +1274,11 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - int64_t value; - if (constant->IsLongConstant()) { - value = constant->AsLongConstant()->GetValue(); - } else { - DCHECK(constant->IsDoubleConstant()); - value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); - } + DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant()); + int64_t value = GetInt64ValueOf(constant); __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value))); - __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value))); + __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), + Immediate(High32Bits(value))); } else { DCHECK(source.IsDoubleStackSlot()) << source; EmitParallelMoves( @@ -1480,14 +1628,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio Location lhs = condition->GetLocations()->InAt(0); Location rhs = condition->GetLocations()->InAt(1); // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition). - if (rhs.IsRegister()) { - __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); - } else if (rhs.IsConstant()) { - int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant); - } else { - __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); - } + codegen_->GenerateIntCompare(lhs, rhs); if (true_target == nullptr) { __ j(X86Condition(condition->GetOppositeCondition()), false_target); } else { @@ -1522,6 +1663,7 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::Any()); } @@ -1580,18 +1722,6 @@ void LocationsBuilderX86::VisitSelect(HSelect* select) { locations->SetOut(Location::SameAsFirstInput()); } -void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) { - Register lhs_reg = lhs.AsRegister<Register>(); - if (rhs.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - codegen_->Compare32BitValue(lhs_reg, value); - } else if (rhs.IsStackSlot()) { - __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex())); - } else { - __ cmpl(lhs_reg, rhs.AsRegister<Register>()); - } -} - void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { LocationSummary* locations = select->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); @@ -1621,7 +1751,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong); DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())); LocationSummary* cond_locations = condition->GetLocations(); - GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1)); + codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1)); cond = X86Condition(condition->GetCondition()); } } else { @@ -1730,7 +1860,7 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) { // Clear output register: setb only sets the low byte. __ xorl(reg, reg); - GenerateIntCompare(lhs, rhs); + codegen_->GenerateIntCompare(lhs, rhs); __ setb(X86Condition(cond->GetCondition()), reg); return; } @@ -2592,19 +2722,13 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimFloat: // Processing a Dex `float-to-long' instruction. - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pF2l), - conversion, - conversion->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc()); CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: // Processing a Dex `double-to-long' instruction. - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pD2l), - conversion, - conversion->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc()); CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; @@ -3450,16 +3574,10 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>()); if (is_div) { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); } else { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); } break; @@ -3635,10 +3753,7 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { } void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); switch (instruction->GetType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -3658,9 +3773,6 @@ void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { default: LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType(); } - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { @@ -3685,7 +3797,7 @@ void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) } else { DCHECK(value.IsConstant()) << value; if (value.GetConstant()->AsIntConstant()->GetValue() == 0) { - __ jmp(slow_path->GetEntryLabel()); + __ jmp(slow_path->GetEntryLabel()); } } break; @@ -4039,10 +4151,7 @@ void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { __ call(Address(temp, code_offset.Int32Value())); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -4063,10 +4172,7 @@ void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - codegen_->InvokeRuntime(instruction->GetEntrypoint(), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -4212,7 +4318,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { case Primitive::kPrimShort: case Primitive::kPrimChar: case Primitive::kPrimInt: { - GenerateIntCompare(left, right); + codegen_->GenerateIntCompare(left, right); break; } case Primitive::kPrimLong: { @@ -4320,7 +4426,7 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method ATTRIBUTE_UNUSED) { + HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; // We disable pc-relative load when there is an irreducible loop, as the optimization @@ -4379,10 +4485,13 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO Location temp) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { - case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: + case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { // temp = thread->string_init_entrypoint - __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset())); + uint32_t offset = + GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); + __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset)); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; @@ -4391,7 +4500,8 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: __ movl(temp.AsRegister<Register>(), Immediate(/* placeholder */ 0)); - method_patches_.emplace_back(invoke->GetTargetMethod()); + method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { @@ -4400,7 +4510,7 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); // Bind a new fixup label at the end of the "movl" insn. uint32_t offset = invoke->GetDexCacheArrayOffset(); - __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset)); + __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFile(), offset)); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { @@ -4436,7 +4546,8 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ call(GetFrameEntryLabel()); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); Label* label = &relative_call_patches_.back().label; __ call(label); // Bind to the patch label, override at link time. __ Bind(label); // Bind the label at the end of the "call" insn. @@ -4495,7 +4606,8 @@ void CodeGeneratorX86::RecordSimplePatch() { } } -void CodeGeneratorX86::RecordStringPatch(HLoadString* load_string) { +void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); __ Bind(&string_patches_.back().label); } @@ -4505,6 +4617,12 @@ void CodeGeneratorX86::RecordTypePatch(HLoadClass* load_class) { __ Bind(&type_patches_.back().label); } +Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { + DCHECK(!GetCompilerOptions().IsBootImage()); + string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); + return &string_patches_.back().label; +} + Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset) { // Add the patch entry and bind its label at the end of the instruction. @@ -4512,6 +4630,21 @@ Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file return &pc_relative_dex_cache_patches_.back().label; } +// The label points to the end of the "movl" or another instruction but the literal offset +// for method patch needs to point to the embedded constant which occupies the last 4 bytes. +constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; + +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( + const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PatchInfo<Label>& info : infos) { + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; + linker_patches->push_back( + Factory(literal_offset, &info.dex_file, GetMethodAddressOffset(), info.index)); + } +} + void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -4522,59 +4655,38 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche string_patches_.size() + type_patches_.size(); linker_patches->reserve(size); - // The label points to the end of the "movl" insn but the literal offset for method - // patch needs to point to the embedded constant which occupies the last 4 bytes. - constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; - for (const MethodPatchInfo<Label>& info : method_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); - } - for (const MethodPatchInfo<Label>& info : relative_call_patches_) { + for (const PatchInfo<Label>& info : method_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); + linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, &info.dex_file, info.index)); } - for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { + for (const PatchInfo<Label>& info : relative_call_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset, - &info.target_dex_file, - GetMethodAddressOffset(), - info.element_offset)); + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); } + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); for (const Label& label : simple_patches_) { uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset)); } - if (GetCompilerOptions().GetCompilePic()) { - for (const StringPatchInfo<Label>& info : string_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset, - &info.dex_file, - GetMethodAddressOffset(), - info.string_index)); - } - for (const TypePatchInfo<Label>& info : type_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset, - &info.dex_file, - GetMethodAddressOffset(), - info.type_index)); - } + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); + } else if (GetCompilerOptions().GetCompilePic()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } else { - for (const StringPatchInfo<Label>& info : string_patches_) { + for (const PatchInfo<Label>& info : string_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, - &info.dex_file, - info.string_index)); + linker_patches->push_back( + LinkerPatch::StringPatch(literal_offset, &info.dex_file, info.index)); } - for (const TypePatchInfo<Label>& info : type_patches_) { + } + if (GetCompilerOptions().GetCompilePic()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(type_patches_, linker_patches); + } else { + for (const PatchInfo<Label>& info : type_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, - &info.dex_file, - info.type_index)); + linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, &info.dex_file, info.index)); } } } @@ -4609,6 +4721,9 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { @@ -4632,10 +4747,6 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI // load the temp into the XMM and then copy the XMM into the // output, 32 bits at a time). locations->AddTemp(Location::RequiresFpuRegister()); - } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); } } @@ -4679,11 +4790,10 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimNot: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -5022,17 +5132,11 @@ void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet( } void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - Location loc = codegen_->IsImplicitNullCheckAllowed(instruction) + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); + Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks() ? Location::RequiresRegister() : Location::Any(); locations->SetInAt(0, loc); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) { @@ -5078,6 +5182,9 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { @@ -5094,11 +5201,6 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { @@ -5113,56 +5215,47 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { switch (type) { case Primitive::kPrimBoolean: { Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - __ movzxb(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); - } else { - __ movzxb(out, Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset)); - } + __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset)); break; } case Primitive::kPrimByte: { Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - __ movsxb(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); - } else { - __ movsxb(out, Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset)); - } + __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset)); break; } case Primitive::kPrimShort: { Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - __ movsxw(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); - } else { - __ movsxw(out, Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset)); - } + __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset)); break; } case Primitive::kPrimChar: { Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - __ movzxw(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Branch cases into compressed and uncompressed for each index's type. + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + NearLabel done, not_compressed; + __ cmpl(Address(obj, count_offset), Immediate(0)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ j(kGreaterEqual, ¬_compressed); + __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset)); + __ jmp(&done); + __ Bind(¬_compressed); + __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset)); + __ Bind(&done); } else { - __ movzxw(out, Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset)); + // Common case for charAt of array of char or when string compression's + // feature is turned off. + __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset)); } break; } case Primitive::kPrimInt: { Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - __ movl(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); - } else { - __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset)); - } + __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset)); break; } @@ -5173,28 +5266,22 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); } else { Register out = out_loc.AsRegister<Register>(); + __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). if (index.IsConstant()) { uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ movl(out, Address(obj, offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - // If read barriers are enabled, emit read barriers other than - // Baker's using a slow path (and also unpoison the loaded - // reference, if heap poisoning is enabled). codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); } else { - __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - // If read barriers are enabled, emit read barriers other than - // Baker's using a slow path (and also unpoison the loaded - // reference, if heap poisoning is enabled). codegen_->MaybeGenerateReadBarrierSlow( instruction, out_loc, out_loc, obj_loc, data_offset, index); } @@ -5204,40 +5291,23 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimLong: { DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>()); - if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ movl(out_loc.AsRegisterPairLow<Register>(), Address(obj, offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize)); - } else { - __ movl(out_loc.AsRegisterPairLow<Register>(), - Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(out_loc.AsRegisterPairHigh<Register>(), - Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize)); - } + __ movl(out_loc.AsRegisterPairLow<Register>(), + CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(out_loc.AsRegisterPairHigh<Register>(), + CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize)); break; } case Primitive::kPrimFloat: { XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - if (index.IsConstant()) { - __ movss(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); - } else { - __ movss(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset)); - } + __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset)); break; } case Primitive::kPrimDouble: { XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - if (index.IsConstant()) { - __ movsd(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); - } else { - __ movsd(out, Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset)); - } + __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset)); break; } @@ -5260,12 +5330,10 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - bool object_array_set_with_read_barrier = - kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + may_need_runtime_call_for_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); @@ -5310,9 +5378,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset) - : Address(array, index.AsRegister<Register>(), TIMES_1, offset); + Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset); if (value.IsRegister()) { __ movb(address, value.AsRegister<ByteRegister>()); } else { @@ -5325,9 +5391,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimShort: case Primitive::kPrimChar: { uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset) - : Address(array, index.AsRegister<Register>(), TIMES_2, offset); + Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset); if (value.IsRegister()) { __ movw(address, value.AsRegister<Register>()); } else { @@ -5339,9 +5403,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimNot: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) - : Address(array, index.AsRegister<Register>(), TIMES_4, offset); + Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset); if (!value.IsRegister()) { // Just setting null. @@ -5356,9 +5418,13 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { DCHECK(needs_write_barrier); Register register_value = value.AsRegister<Register>(); - NearLabel done, not_null, do_put; + // We cannot use a NearLabel for `done`, as its range may be too + // short when Baker read barriers are enabled. + Label done; + NearLabel not_null, do_put; SlowPathCode* slow_path = nullptr; - Register temp = locations->GetTemp(0).AsRegister<Register>(); + Location temp_loc = locations->GetTemp(0); + Register temp = temp_loc.AsRegister<Register>(); if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); @@ -5371,62 +5437,40 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { __ Bind(¬_null); } - if (kEmitCompilerReadBarrier) { - // When read barriers are enabled, the type checking - // instrumentation requires two read barriers: - // - // __ movl(temp2, temp); - // // /* HeapReference<Class> */ temp = temp->component_type_ - // __ movl(temp, Address(temp, component_offset)); - // codegen_->GenerateReadBarrierSlow( - // instruction, temp_loc, temp_loc, temp2_loc, component_offset); - // - // // /* HeapReference<Class> */ temp2 = register_value->klass_ - // __ movl(temp2, Address(register_value, class_offset)); - // codegen_->GenerateReadBarrierSlow( - // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); - // - // __ cmpl(temp, temp2); - // - // However, the second read barrier may trash `temp`, as it - // is a temporary register, and as such would not be saved - // along with live registers before calling the runtime (nor - // restored afterwards). So in this case, we bail out and - // delegate the work to the array set slow path. - // - // TODO: Extend the register allocator to support a new - // "(locally) live temp" location so as to avoid always - // going into the slow path when read barriers are enabled. - __ jmp(slow_path->GetEntryLabel()); - } else { - // /* HeapReference<Class> */ temp = array->klass_ - __ movl(temp, Address(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Note that when Baker read barriers are enabled, the type + // checks are performed without read barriers. This is fine, + // even in the case where a class object is in the from-space + // after the flip, as a comparison involving such a type would + // not produce a false positive; it may of course produce a + // false negative, in which case we would take the ArraySet + // slow path. + + // /* HeapReference<Class> */ temp = array->klass_ + __ movl(temp, Address(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ movl(temp, Address(temp, component_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor the object reference in `register_value->klass`, as + // we are comparing two poisoned references. + __ cmpl(temp, Address(register_value, class_offset)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + __ j(kEqual, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. __ MaybeUnpoisonHeapReference(temp); - // /* HeapReference<Class> */ temp = temp->component_type_ - __ movl(temp, Address(temp, component_offset)); - // If heap poisoning is enabled, no need to unpoison `temp` - // nor the object reference in `register_value->klass`, as - // we are comparing two poisoned references. - __ cmpl(temp, Address(register_value, class_offset)); - - if (instruction->StaticTypeOfArrayIsObjectArray()) { - __ j(kEqual, &do_put); - // If heap poisoning is enabled, the `temp` reference has - // not been unpoisoned yet; unpoison it now. - __ MaybeUnpoisonHeapReference(temp); - - // /* HeapReference<Class> */ temp = temp->super_class_ - __ movl(temp, Address(temp, super_offset)); - // If heap poisoning is enabled, no need to unpoison - // `temp`, as we are comparing against null below. - __ testl(temp, temp); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ Bind(&do_put); - } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); - } + // If heap poisoning is enabled, no need to unpoison the + // heap reference loaded below, as it is only used for a + // comparison with null. + __ cmpl(Address(temp, super_offset), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ j(kNotEqual, slow_path->GetEntryLabel()); } } @@ -5455,9 +5499,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimInt: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) - : Address(array, index.AsRegister<Register>(), TIMES_4, offset); + Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset); if (value.IsRegister()) { __ movl(address, value.AsRegister<Register>()); } else { @@ -5471,44 +5513,27 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - if (value.IsRegisterPair()) { - __ movl(Address(array, offset), value.AsRegisterPairLow<Register>()); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(Address(array, offset + kX86WordSize), value.AsRegisterPairHigh<Register>()); - } else { - DCHECK(value.IsConstant()); - int64_t val = value.GetConstant()->AsLongConstant()->GetValue(); - __ movl(Address(array, offset), Immediate(Low32Bits(val))); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(Address(array, offset + kX86WordSize), Immediate(High32Bits(val))); - } + if (value.IsRegisterPair()) { + __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset), + value.AsRegisterPairLow<Register>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize), + value.AsRegisterPairHigh<Register>()); } else { - if (value.IsRegisterPair()) { - __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset), - value.AsRegisterPairLow<Register>()); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), - value.AsRegisterPairHigh<Register>()); - } else { - DCHECK(value.IsConstant()); - int64_t val = value.GetConstant()->AsLongConstant()->GetValue(); - __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset), - Immediate(Low32Bits(val))); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), - Immediate(High32Bits(val))); - } + DCHECK(value.IsConstant()); + int64_t val = value.GetConstant()->AsLongConstant()->GetValue(); + __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset), + Immediate(Low32Bits(val))); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize), + Immediate(High32Bits(val))); } break; } case Primitive::kPrimFloat: { uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) - : Address(array, index.AsRegister<Register>(), TIMES_4, offset); + Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset); if (value.IsFpuRegister()) { __ movss(address, value.AsFpuRegister<XmmRegister>()); } else { @@ -5522,17 +5547,13 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimDouble: { uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) - : Address(array, index.AsRegister<Register>(), TIMES_8, offset); + Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset); if (value.IsFpuRegister()) { __ movsd(address, value.AsFpuRegister<XmmRegister>()); } else { DCHECK(value.IsConstant()); - Address address_hi = index.IsConstant() ? - Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + - offset + kX86WordSize) : - Address(array, index.AsRegister<Register>(), TIMES_8, offset + kX86WordSize); + Address address_hi = + CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize); int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); __ movl(address, Immediate(Low32Bits(v))); codegen_->MaybeRecordImplicitNullCheck(instruction); @@ -5566,24 +5587,32 @@ void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) { Register out = locations->Out().AsRegister<Register>(); __ movl(out, Address(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out most significant bit in case the array is String's array of char. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ andl(out, Immediate(INT32_MAX)); + } } void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); HInstruction* length = instruction->InputAt(1); if (!length->IsEmittedAtUseSite()) { locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); } - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); + // Need register to see array's length. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { + const bool is_string_compressed_char_at = + mirror::kUseStringCompression && instruction->IsStringCharAt(); LocationSummary* locations = instruction->GetLocations(); Location index_loc = locations->InAt(0); Location length_loc = locations->InAt(1); @@ -5618,21 +5647,25 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); Location array_loc = array_length->GetLocations()->InAt(0); Address array_len(array_loc.AsRegister<Register>(), len_offset); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(array_len, Immediate(value)); + if (is_string_compressed_char_at) { + Register length_reg = locations->GetTemp(0).AsRegister<Register>(); + __ movl(length_reg, array_len); + codegen_->MaybeRecordImplicitNullCheck(array_length); + __ andl(length_reg, Immediate(INT32_MAX)); + codegen_->GenerateIntCompare(length_reg, index_loc); } else { - __ cmpl(array_len, index_loc.AsRegister<Register>()); + // Checking bounds for general case: + // Array of char or string's array with feature compression off. + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(array_len, Immediate(value)); + } else { + __ cmpl(array_len, index_loc.AsRegister<Register>()); + } + codegen_->MaybeRecordImplicitNullCheck(array_length); } - codegen_->MaybeRecordImplicitNullCheck(array_length); } else { - Register length = length_loc.AsRegister<Register>(); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(length, Immediate(value)); - } else { - __ cmpl(length, index_loc.AsRegister<Register>()); - } + codegen_->GenerateIntCompare(length_loc, index_loc); } codegen_->AddSlowPath(slow_path); __ j(kBelowEqual, slow_path->GetEntryLabel()); @@ -5648,7 +5681,9 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) } void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -5959,17 +5994,6 @@ void ParallelMoveResolverX86::RestoreScratch(int reg) { HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - if (kEmitCompilerReadBarrier) { - switch (desired_class_load_kind) { - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageAddress: - // TODO: Implement for read barrier. - return HLoadClass::LoadKind::kDexCacheViaMethod; - default: - break; - } - } switch (desired_class_load_kind) { case HLoadClass::LoadKind::kReferrersClass: break; @@ -6011,10 +6035,15 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { return; } - LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kDexCacheViaMethod || @@ -6029,10 +6058,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { LocationSummary* locations = cls->GetLocations(); if (cls->NeedsAccessCheck()) { codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), - cls, - cls->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc()); CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -6041,6 +6067,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { Register out = out_loc.AsRegister<Register>(); bool generate_null_check = false; + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); switch (cls->GetLoadKind()) { case HLoadClass::LoadKind::kReferrersClass: { DCHECK(!cls->CanCallRuntime()); @@ -6048,24 +6075,28 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ Register current_method = locations->InAt(0).AsRegister<Register>(); GenerateGcRootFieldLoad( - cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + cls, + out_loc, + Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), + /* fixup_label */ nullptr, + requires_read_barrier); break; } case HLoadClass::LoadKind::kBootImageLinkTimeAddress: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!requires_read_barrier); __ movl(out, Immediate(/* placeholder */ 0)); codegen_->RecordTypePatch(cls); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!requires_read_barrier); Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); codegen_->RecordTypePatch(cls); break; } case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!requires_read_barrier); DCHECK_NE(cls->GetAddress(), 0u); uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); __ movl(out, Immediate(address)); @@ -6076,7 +6107,11 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { DCHECK_NE(cls->GetAddress(), 0u); uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); // /* GcRoot<mirror::Class> */ out = *address - GenerateGcRootFieldLoad(cls, out_loc, Address::Absolute(address)); + GenerateGcRootFieldLoad(cls, + out_loc, + Address::Absolute(address), + /* fixup_label */ nullptr, + requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; } @@ -6085,8 +6120,11 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { uint32_t offset = cls->GetDexCacheElementOffset(); Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset); // /* GcRoot<mirror::Class> */ out = *(base + offset) /* PC-relative */ - GenerateGcRootFieldLoad( - cls, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label); + GenerateGcRootFieldLoad(cls, + out_loc, + Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), + fixup_label, + requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; } @@ -6097,8 +6135,11 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value())); // /* GcRoot<mirror::Class> */ out = out[type_index] - GenerateGcRootFieldLoad( - cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); + GenerateGcRootFieldLoad(cls, + out_loc, + Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())), + /* fixup_label */ nullptr, + requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; } @@ -6152,17 +6193,6 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { - if (kEmitCompilerReadBarrier) { - switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageAddress: - // TODO: Implement for read barrier. - return HLoadString::LoadKind::kDexCacheViaMethod; - default: - break; - } - } switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: DCHECK(!GetCompilerOptions().GetCompilePic()); @@ -6170,7 +6200,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( case HLoadString::LoadKind::kBootImageLinkTimePcRelative: DCHECK(GetCompilerOptions().GetCompilePic()); FALLTHROUGH_INTENDED; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT. // We disable pc-relative load when there is an irreducible loop, as the optimization // is incompatible with it. @@ -6193,16 +6223,32 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod || - load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kDexCachePcRelative) { + if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } - locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + locations->SetOut(Location::RegisterLocation(EAX)); + } else { + locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } + } } void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { @@ -6212,68 +6258,46 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: { - DCHECK(!kEmitCompilerReadBarrier); __ movl(out, Immediate(/* placeholder */ 0)); - codegen_->RecordStringPatch(load); + codegen_->RecordBootStringPatch(load); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(!kEmitCompilerReadBarrier); Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordStringPatch(load); + codegen_->RecordBootStringPatch(load); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); DCHECK_NE(load->GetAddress(), 0u); uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); __ movl(out, Immediate(address)); codegen_->RecordSimplePatch(); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); - // /* GcRoot<mirror::String> */ out = *address - GenerateGcRootFieldLoad(load, out_loc, Address::Absolute(address)); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - Register base_reg = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = load->GetDexCacheElementOffset(); - Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset); - // /* GcRoot<mirror::String> */ out = *(base + offset) /* PC-relative */ - GenerateGcRootFieldLoad( - load, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - Register current_method = locations->InAt(0).AsRegister<Register>(); - - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - break; + case HLoadString::LoadKind::kBssEntry: { + Register method_address = locations->InAt(0).AsRegister<Register>(); + Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset); + Label* fixup_label = codegen_->NewStringBssEntryPatch(load); + // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); + codegen_->AddSlowPath(slow_path); + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); + __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex())); + codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); } static Address GetExceptionTlsAddress() { @@ -6306,17 +6330,14 @@ void LocationsBuilderX86::VisitThrow(HThrow* instruction) { } void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && - (kUseBakerReadBarrier || - type_check_kind == TypeCheckKind::kAbstractClassCheck || + !kUseBakerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || type_check_kind == TypeCheckKind::kArrayObjectCheck); } @@ -6324,6 +6345,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -6331,6 +6353,7 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -6340,6 +6363,9 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); // Note that TypeCheckSlowPathX86 uses this "out" register too. @@ -6377,7 +6403,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -6599,7 +6625,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -6635,8 +6661,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6675,8 +6700,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6708,8 +6732,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6717,8 +6740,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6758,11 +6780,10 @@ void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) } void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) { - codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject) - : QUICK_ENTRY_POINT(pUnlockObject), + codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject + : kQuickUnlockObject, instruction, - instruction->GetDexPc(), - nullptr); + instruction->GetDexPc()); if (instruction->IsEnter()) { CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); } else { @@ -6909,17 +6930,17 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* Location maybe_temp) { Register out_reg = out.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it // in the following move operation, as we will need it for the // read barrier below. + DCHECK(maybe_temp.IsRegister()) << maybe_temp; __ movl(maybe_temp.AsRegister<Register>(), out_reg); // /* HeapReference<Object> */ out = *(out + offset) __ movl(out_reg, Address(out_reg, offset)); @@ -6936,17 +6957,15 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp) { + uint32_t offset) { Register out_reg = out.AsRegister<Register>(); Register obj_reg = obj.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6964,9 +6983,11 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction, Location root, const Address& address, - Label* fixup_label) { + Label* fixup_label, + bool requires_read_barrier) { Register root_reg = root.AsRegister<Register>(); - if (kEmitCompilerReadBarrier) { + if (requires_read_barrier) { + DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: @@ -6989,9 +7010,9 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root); + // Slow path marking the GC root `root`. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + instruction, root, /* unpoison_ref_before_marking */ false); codegen_->AddSlowPath(slow_path); __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()), @@ -7025,14 +7046,13 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr Location ref, Register obj, uint32_t offset, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) Address src(obj, offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -7040,7 +7060,6 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Register obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -7050,18 +7069,17 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - Address src = index.IsConstant() ? - Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : - Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, const Address& src, - Location temp, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + Register* temp) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -7090,23 +7108,23 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // performance reasons. Register ref_reg = ref.AsRegister<Register>(); - Register temp_reg = temp.AsRegister<Register>(); uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // /* int32_t */ monitor = obj->monitor_ - __ movl(temp_reg, Address(obj, monitor_offset)); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); - __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86 memory model. @@ -7114,20 +7132,27 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The actual reference load. // /* HeapReference<Object> */ ref = *src - __ movl(ref_reg, src); + __ movl(ref_reg, src); // Flags are unaffected. + + // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. + // Slow path marking the object `ref` when it is gray. + SlowPathCode* slow_path; + if (always_update_field) { + DCHECK(temp != nullptr); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86( + instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + instruction, ref, /* unpoison_ref_before_marking */ true); + } + AddSlowPath(slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, slow_path->GetEntryLabel()); // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref); - AddSlowPath(slow_path); - - // if (rb_state == ReadBarrier::gray_ptr_) - // ref = ReadBarrier::Mark(ref); - __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); - __ j(kEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -7534,6 +7559,31 @@ void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) { } } +void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) { + Register lhs_reg = lhs.AsRegister<Register>(); + GenerateIntCompare(lhs_reg, rhs); +} + +void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) { + if (rhs.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); + Compare32BitValue(lhs, value); + } else if (rhs.IsStackSlot()) { + __ cmpl(lhs, Address(ESP, rhs.GetStackIndex())); + } else { + __ cmpl(lhs, rhs.AsRegister<Register>()); + } +} + +Address CodeGeneratorX86::ArrayAddress(Register obj, + Location index, + ScaleFactor scale, + uint32_t data_offset) { + return index.IsConstant() ? + Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) : + Address(obj, index.AsRegister<Register>(), scale, data_offset); +} + Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f306b33247..1b51999546 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -254,17 +254,17 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp); + uint32_t offset); // Generate a GC root reference load: // // root <- *address // - // while honoring read barriers (if any). + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, const Address& address, - Label* fixup_label = nullptr); + Label* fixup_label, + bool requires_read_barrier); // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. // `is_wide` specifies whether it is long/double or not. @@ -295,7 +295,6 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { HBasicBlock* default_block); void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double); - void GenerateIntCompare(Location lhs, Location rhs); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -329,12 +328,7 @@ class CodeGeneratorX86 : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path) OVERRIDE; - - void InvokeRuntime(int32_t entry_point_offset, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path); + SlowPathCode* slow_path = nullptr) OVERRIDE; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -342,6 +336,8 @@ class CodeGeneratorX86 : public CodeGenerator { HInstruction* instruction, SlowPathCode* slow_path); + void GenerateInvokeRuntime(int32_t entry_point_offset); + size_t GetWordSize() const OVERRIDE { return kX86WordSize; } @@ -376,9 +372,6 @@ class CodeGeneratorX86 : public CodeGenerator { void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; - // Blocks all register pairs made out of blocked core registers. - void UpdateBlockedPairRegisters() const; - ParallelMoveResolverX86* GetMoveResolver() OVERRIDE { return &move_resolver_; } @@ -406,7 +399,7 @@ class CodeGeneratorX86 : public CodeGenerator { // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method) OVERRIDE; + HInvokeStaticOrDirect* invoke) OVERRIDE; // Generate a call to a static or direct method. Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); @@ -415,8 +408,9 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; void RecordSimplePatch(); - void RecordStringPatch(HLoadString* load_string); + void RecordBootStringPatch(HLoadString* load_string); void RecordTypePatch(HLoadClass* load_class); + Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -476,6 +470,16 @@ class CodeGeneratorX86 : public CodeGenerator { // Compare a register with a 32-bit value in the most efficient manner. void Compare32BitValue(Register dest, int32_t value); + // Compare int values. Supports only register locations for `lhs`. + void GenerateIntCompare(Location lhs, Location rhs); + void GenerateIntCompare(Register lhs, Location rhs); + + // Construct address for array access. + static Address ArrayAddress(Register obj, + Location index, + ScaleFactor scale, + uint32_t data_offset); + Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value); void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -486,7 +490,6 @@ class CodeGeneratorX86 : public CodeGenerator { Location ref, Register obj, uint32_t offset, - Location temp, bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. @@ -495,16 +498,25 @@ class CodeGeneratorX86 : public CodeGenerator { Register obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check); - // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at address `src`, held by + // object `obj`, into `ref`, and mark it if needed. The base of + // address `src` must be `obj`. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires a temporary register, which must be provided as a + // non-null pointer (`temp`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, const Address& src, - Location temp, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + Register* temp = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -566,9 +578,9 @@ class CodeGeneratorX86 : public CodeGenerator { } } - void GenerateNop(); - void GenerateImplicitNullCheck(HNullCheck* instruction); - void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateNop() OVERRIDE; + void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; // When we don't know the proper offset for the value, we use kDummy32BitOffset. // The correct value will be inserted when processing Assembler fixups. @@ -577,15 +589,9 @@ class CodeGeneratorX86 : public CodeGenerator { private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); - struct PcRelativeDexCacheAccessInfo { - PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) - : target_dex_file(dex_file), element_offset(element_off), label() { } - - const DexFile& target_dex_file; - uint32_t element_offset; - // NOTE: Label is bound to the end of the instruction that has an embedded 32-bit offset. - Label label; - }; + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches); // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. @@ -597,16 +603,16 @@ class CodeGeneratorX86 : public CodeGenerator { const X86InstructionSetFeatures& isa_features_; // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<Label>> method_patches_; - ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + ArenaDeque<PatchInfo<Label>> method_patches_; + ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative DexCache access info. - ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_; + ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; // Patch locations for patchoat where the linker doesn't do any other work. ArenaDeque<Label> simple_patches_; - // String patch locations. - ArenaDeque<StringPatchInfo<Label>> string_patches_; + // String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC). + ArenaDeque<PatchInfo<Label>> string_patches_; // Type patch locations. - ArenaDeque<TypePatchInfo<Label>> type_patches_; + ArenaDeque<PatchInfo<Label>> type_patches_; // Offset to the start of the constant area in the assembled code. // Used for fixups to the constant area. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5d5fa8504a..fcabeeae5d 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -51,8 +51,8 @@ static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 static constexpr int kC2ConditionMask = 0x400; -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value() class NullCheckSlowPathX86_64 : public SlowPathCode { @@ -66,7 +66,7 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), + x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer, instruction_, instruction_->GetDexPc(), this); @@ -88,14 +88,7 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->CanThrowIntoCatchBlock()) { - // Live registers will be restored in the catch block if caught. - SaveLiveRegisters(codegen, instruction_->GetLocations()); - } - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } @@ -149,13 +142,8 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -210,6 +198,9 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); } __ movl(length_loc.AsRegister<CpuRegister>(), array_len); + if (mirror::kUseStringCompression) { + __ andl(length_loc.AsRegister<CpuRegister>(), Immediate(INT32_MAX)); + } } // We're moving two locations to locations that could overlap, so we need a parallel @@ -221,13 +212,10 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { length_loc, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() - ? QUICK_ENTRY_POINT(pThrowStringBounds) - : QUICK_ENTRY_POINT(pThrowArrayBounds); - x86_64_codegen->InvokeRuntime(entry_point_offset, - instruction_, - instruction_->GetDexPc(), - this); + QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() + ? kQuickThrowStringBounds + : kQuickThrowArrayBounds; + x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -259,9 +247,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex())); - x86_64_codegen->InvokeRuntime(do_clinit_ ? - QUICK_ENTRY_POINT(pInitializeStaticStorage) : - QUICK_ENTRY_POINT(pInitializeType), + x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType, at_, dex_pc_, this); @@ -313,16 +299,23 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); - __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index)); - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), + // Custom calling convention: RAX serves as both input and output. + __ movl(CpuRegister(RAX), Immediate(string_index)); + x86_64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false), + locations->Out().AsRegister<CpuRegister>()); + Label* fixup_label = x86_64_codegen->NewStringBssEntryPatch(instruction_->AsLoadString()); + __ Bind(fixup_label); + __ jmp(GetExitLabel()); } @@ -364,18 +357,12 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction_, - dex_pc, - this); + x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); CheckEntrypointTypes< kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction_, - dex_pc, - this); + x86_64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } @@ -407,11 +394,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } @@ -450,10 +433,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); @@ -465,11 +445,25 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); }; -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj) - : SlowPathCode(instruction), obj_(obj) { + ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, + Location ref, + bool unpoison_ref_before_marking) + : SlowPathCode(instruction), + ref_(ref), + unpoison_ref_before_marking_(unpoison_ref_before_marking) { DCHECK(kEmitCompilerReadBarrier); } @@ -477,54 +471,236 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg = obj_.AsRegister<Register>(); + CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); + Register ref_reg = ref_cpu_reg.AsRegister(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || + instruction_->IsArraySet() || instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); __ Bind(GetEntryLabel()); + if (unpoison_ref_before_marking_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_cpu_reg); + } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); - DCHECK_NE(reg, RSP); - DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; + DCHECK_NE(ref_reg, RSP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in R0): // - // RDI <- obj + // RDI <- ref // RAX <- ReadBarrierMark(RDI) - // obj <- RAX + // ref <- RAX // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); // This runtime call does not require a stack map. x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); } private: - const Location obj_; + // The location (register) of the marked object reference. + const Location ref_; + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathX86_64 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction, + Location ref, + CpuRegister obj, + const Address& field_addr, + bool unpoison_ref_before_marking, + CpuRegister temp1, + CpuRegister temp2) + : SlowPathCode(instruction), + ref_(ref), + obj_(obj), + field_addr_(field_addr), + unpoison_ref_before_marking_(unpoison_ref_before_marking), + temp1_(temp1), + temp2_(temp2) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); + Register ref_reg = ref_cpu_reg.AsRegister(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + + __ Bind(GetEntryLabel()); + if (unpoison_ref_before_marking_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_cpu_reg); + } + + // Save the old (unpoisoned) reference. + __ movl(temp1_, ref_cpu_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + DCHECK_NE(ref_reg, RSP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // RDI <- ref + // RAX <- ReadBarrierMark(RDI) + // ref <- RAX + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); + // This runtime call does not require a stack map. + x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*field_addr`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LOCK CMPXCHGL instruction in the compare-and-set (CAS) + // operation below would abort the CAS, leaving the field as-is. + NearLabel done; + __ cmpl(temp1_, ref_cpu_reg); + __ j(kEqual, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achived + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + // This operation is implemented with a 32-bit LOCK CMPXLCHG + // instruction, which requires the expected value (the old + // reference) to be in EAX. Save RAX beforehand, and move the + // expected value (stored in `temp1_`) into EAX. + __ movq(temp2_, CpuRegister(RAX)); + __ movl(CpuRegister(RAX), temp1_); + + // Convenience aliases. + CpuRegister base = obj_; + CpuRegister expected = CpuRegister(RAX); + CpuRegister value = ref_cpu_reg; + + bool base_equals_value = (base.AsRegister() == value.AsRegister()); + Register value_reg = ref_reg; + if (kPoisonHeapReferences) { + if (base_equals_value) { + // If `base` and `value` are the same register location, move + // `value_reg` to a temporary register. This way, poisoning + // `value_reg` won't invalidate `base`. + value_reg = temp1_.AsRegister(); + __ movl(CpuRegister(value_reg), base); + } + + // Check that the register allocator did not assign the location + // of `expected` (RAX) to `value` nor to `base`, so that heap + // poisoning (when enabled) works as intended below. + // - If `value` were equal to `expected`, both references would + // be poisoned twice, meaning they would not be poisoned at + // all, as heap poisoning uses address negation. + // - If `base` were equal to `expected`, poisoning `expected` + // would invalidate `base`. + DCHECK_NE(value_reg, expected.AsRegister()); + DCHECK_NE(base.AsRegister(), expected.AsRegister()); + + __ PoisonHeapReference(expected); + __ PoisonHeapReference(CpuRegister(value_reg)); + } + + __ LockCmpxchgl(field_addr_, CpuRegister(value_reg)); + + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. + if (kPoisonHeapReferences) { + if (base_equals_value) { + // `value_reg` has been moved to a temporary register, no need + // to unpoison it. + } else { + __ UnpoisonHeapReference(CpuRegister(value_reg)); + } + // No need to unpoison `expected` (RAX), as it is be overwritten below. + } + + // Restore RAX. + __ movq(CpuRegister(RAX), temp2_); + + __ Bind(&done); + __ jmp(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const CpuRegister obj_; + // The address of the marked reference field. The base of this address must be `obj_`. + const Address field_addr_; + + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; + + const CpuRegister temp1_; + const CpuRegister temp2_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { public: @@ -661,7 +837,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_)); } - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this); @@ -729,7 +905,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, instruction_->GetDexPc(), this); @@ -750,8 +926,8 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { }; #undef __ -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT inline Condition X86_64IntegerCondition(IfCondition cond) { switch (cond) { @@ -787,7 +963,7 @@ inline Condition X86_64FPCondition(IfCondition cond) { HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method ATTRIBUTE_UNUSED) { + HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { switch (desired_dispatch_info.code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: @@ -808,11 +984,13 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat // All registers are assumed to be correctly set up. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { - case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: + case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { // temp = thread->string_init_entrypoint - __ gs()->movq(temp.AsRegister<CpuRegister>(), - Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true)); + uint32_t offset = + GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); + __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true)); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; @@ -821,7 +999,8 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: __ movl(temp.AsRegister<CpuRegister>(), Immediate(0)); // Placeholder. - method_patches_.emplace_back(invoke->GetTargetMethod()); + method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { @@ -829,7 +1008,7 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); // Bind a new fixup label at the end of the "movl" insn. uint32_t offset = invoke->GetDexCacheArrayOffset(); - __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset)); + __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFile(), offset)); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { @@ -868,7 +1047,8 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo __ call(&frame_entry_label_); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); Label* label = &relative_call_patches_.back().label; __ call(label); // Bind to the patch label, override at link time. __ Bind(label); // Bind the label at the end of the "call" insn. @@ -928,7 +1108,8 @@ void CodeGeneratorX86_64::RecordSimplePatch() { } } -void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) { +void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); __ Bind(&string_patches_.back().label); } @@ -938,6 +1119,12 @@ void CodeGeneratorX86_64::RecordTypePatch(HLoadClass* load_class) { __ Bind(&type_patches_.back().label); } +Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { + DCHECK(!GetCompilerOptions().IsBootImage()); + string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); + return &string_patches_.back().label; +} + Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset) { // Add a patch entry and return the label. @@ -945,6 +1132,21 @@ Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_f return &pc_relative_dex_cache_patches_.back().label; } +// The label points to the end of the "movl" or another instruction but the literal offset +// for method patch needs to point to the embedded constant which occupies the last 4 bytes. +constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; + +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( + const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PatchInfo<Label>& info : infos) { + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; + linker_patches->push_back( + Factory(literal_offset, &info.dex_file, info.label.Position(), info.index)); + } +} + void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -955,48 +1157,29 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat string_patches_.size() + type_patches_.size(); linker_patches->reserve(size); - // The label points to the end of the "movl" insn but the literal offset for method - // patch needs to point to the embedded constant which occupies the last 4 bytes. - constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; - for (const MethodPatchInfo<Label>& info : method_patches_) { + for (const PatchInfo<Label>& info : method_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); + linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, &info.dex_file, info.index)); } - for (const MethodPatchInfo<Label>& info : relative_call_patches_) { + for (const PatchInfo<Label>& info : relative_call_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); - } - for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset, - &info.target_dex_file, - info.label.Position(), - info.element_offset)); + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); } + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); for (const Label& label : simple_patches_) { uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset)); } - for (const StringPatchInfo<Label>& info : string_patches_) { + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); + } else { // These are always PC-relative, see GetSupportedLoadStringKind(). - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset, - &info.dex_file, - info.label.Position(), - info.string_index)); - } - for (const TypePatchInfo<Label>& info : type_patches_) { - // These are always PC-relative, see GetSupportedLoadClassKind(). - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset, - &info.dex_file, - info.label.Position(), - info.type_index)); + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } + // These are always PC-relative, see GetSupportedLoadClassKind(). + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(type_patches_, linker_patches); } void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { @@ -1031,25 +1214,21 @@ void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { - InvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value(), - instruction, - dex_pc, - slow_path); -} - -void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path) { - ValidateInvokeRuntime(instruction, slow_path); - __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true)); - RecordPcInfo(instruction, dex_pc, slow_path); + ValidateInvokeRuntime(entrypoint, instruction, slow_path); + GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value()); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } } void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, HInstruction* instruction, SlowPathCode* slow_path) { ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + GenerateInvokeRuntime(entry_point_offset); +} + +void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true)); } @@ -1150,8 +1329,13 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { } } - __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), - CpuRegister(kMethodRegisterArgument)); + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), + CpuRegister(kMethodRegisterArgument)); + } } void CodeGeneratorX86_64::GenerateFrameExit() { @@ -1258,13 +1442,8 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { source.AsFpuRegister<XmmRegister>()); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - int64_t value; - if (constant->IsDoubleConstant()) { - value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); - } else { - DCHECK(constant->IsLongConstant()); - value = constant->AsLongConstant()->GetValue(); - } + DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant()); + int64_t value = GetInt64ValueOf(constant); Store64BitValueToStack(destination, value); } else { DCHECK(source.IsDoubleStackSlot()); @@ -1363,31 +1542,11 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister left_reg = left.AsRegister<CpuRegister>(); - if (right.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant()); - if (value == 0) { - __ testl(left_reg, left_reg); - } else { - __ cmpl(left_reg, Immediate(value)); - } - } else if (right.IsStackSlot()) { - __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); - } else { - __ cmpl(left_reg, right.AsRegister<CpuRegister>()); - } + codegen_->GenerateIntCompare(left, right); break; } case Primitive::kPrimLong: { - CpuRegister left_reg = left.AsRegister<CpuRegister>(); - if (right.IsConstant()) { - int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); - codegen_->Compare64BitValue(left_reg, value); - } else if (right.IsDoubleStackSlot()) { - __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); - } else { - __ cmpq(left_reg, right.AsRegister<CpuRegister>()); - } + codegen_->GenerateLongCompare(left, right); break; } case Primitive::kPrimFloat: { @@ -1542,15 +1701,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc Location lhs = condition->GetLocations()->InAt(0); Location rhs = condition->GetLocations()->InAt(1); - if (rhs.IsRegister()) { - __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); - } else if (rhs.IsConstant()) { - int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant); - } else { - __ cmpl(lhs.AsRegister<CpuRegister>(), - Address(CpuRegister(RSP), rhs.GetStackIndex())); - } + codegen_->GenerateIntCompare(lhs, rhs); if (true_target == nullptr) { __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); } else { @@ -1585,6 +1736,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::Any()); } @@ -1749,28 +1901,14 @@ void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { // Clear output register: setcc only sets the low byte. __ xorl(reg, reg); - if (rhs.IsRegister()) { - __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); - } else if (rhs.IsConstant()) { - int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant); - } else { - __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); - } + codegen_->GenerateIntCompare(lhs, rhs); __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); return; case Primitive::kPrimLong: // Clear output register: setcc only sets the low byte. __ xorl(reg, reg); - if (rhs.IsRegister()) { - __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); - } else if (rhs.IsConstant()) { - int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue(); - codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value); - } else { - __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); - } + codegen_->GenerateLongCompare(lhs, rhs); __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); return; case Primitive::kPrimFloat: { @@ -1938,27 +2076,11 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { case Primitive::kPrimShort: case Primitive::kPrimChar: case Primitive::kPrimInt: { - CpuRegister left_reg = left.AsRegister<CpuRegister>(); - if (right.IsConstant()) { - int32_t value = right.GetConstant()->AsIntConstant()->GetValue(); - codegen_->Compare32BitValue(left_reg, value); - } else if (right.IsStackSlot()) { - __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); - } else { - __ cmpl(left_reg, right.AsRegister<CpuRegister>()); - } + codegen_->GenerateIntCompare(left, right); break; } case Primitive::kPrimLong: { - CpuRegister left_reg = left.AsRegister<CpuRegister>(); - if (right.IsConstant()) { - int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); - codegen_->Compare64BitValue(left_reg, value); - } else if (right.IsDoubleStackSlot()) { - __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); - } else { - __ cmpq(left_reg, right.AsRegister<CpuRegister>()); - } + codegen_->GenerateLongCompare(left, right); break; } case Primitive::kPrimFloat: { @@ -3734,14 +3856,8 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { } void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::Any()); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { @@ -3767,7 +3883,7 @@ void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instructio } else { DCHECK(value.IsConstant()) << value; if (value.GetConstant()->AsIntConstant()->GetValue() == 0) { - __ jmp(slow_path->GetEntryLabel()); + __ jmp(slow_path->GetEntryLabel()); } } break; @@ -3782,7 +3898,7 @@ void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instructio } else { DCHECK(value.IsConstant()) << value; if (value.GetConstant()->AsLongConstant()->GetValue() == 0) { - __ jmp(slow_path->GetEntryLabel()); + __ jmp(slow_path->GetEntryLabel()); } } break; @@ -3967,10 +4083,7 @@ void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) __ call(Address(temp, code_offset.SizeValue())); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3992,10 +4105,7 @@ void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - codegen_->InvokeRuntime(instruction->GetEntrypoint(), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); @@ -4142,6 +4252,9 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { object_field_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); @@ -4153,11 +4266,6 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { Location::RequiresRegister(), object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, @@ -4201,11 +4309,10 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimNot: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); // Note that a potential implicit null check is handled in this - // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. + // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -4521,17 +4628,11 @@ void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet( } void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - Location loc = codegen_->IsImplicitNullCheckAllowed(instruction) + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); + Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks() ? Location::RequiresRegister() : Location::Any(); locations->SetInAt(0, loc); - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) { @@ -4577,6 +4678,9 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { @@ -4589,11 +4693,6 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { @@ -4608,56 +4707,45 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { switch (type) { case Primitive::kPrimBoolean: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); - if (index.IsConstant()) { - __ movzxb(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); - } else { - __ movzxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset)); - } + __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); break; } case Primitive::kPrimByte: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); - if (index.IsConstant()) { - __ movsxb(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); - } else { - __ movsxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset)); - } + __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); break; } case Primitive::kPrimShort: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); - if (index.IsConstant()) { - __ movsxw(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); - } else { - __ movsxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset)); - } + __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); break; } case Primitive::kPrimChar: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); - if (index.IsConstant()) { - __ movzxw(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Branch cases into compressed and uncompressed for each index's type. + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + NearLabel done, not_compressed; + __ cmpl(Address(obj, count_offset), Immediate(0)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ j(kGreaterEqual, ¬_compressed); + __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); + __ jmp(&done); + __ Bind(¬_compressed); + __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); + __ Bind(&done); } else { - __ movzxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset)); + __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); } break; } case Primitive::kPrimInt: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); - if (index.IsConstant()) { - __ movl(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); - } else { - __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset)); - } + __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); break; } @@ -4668,28 +4756,22 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this - // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. + // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); } else { CpuRegister out = out_loc.AsRegister<CpuRegister>(); + __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). if (index.IsConstant()) { uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ movl(out, Address(obj, offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - // If read barriers are enabled, emit read barriers other than - // Baker's using a slow path (and also unpoison the loaded - // reference, if heap poisoning is enabled). codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); } else { - __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - // If read barriers are enabled, emit read barriers other than - // Baker's using a slow path (and also unpoison the loaded - // reference, if heap poisoning is enabled). codegen_->MaybeGenerateReadBarrierSlow( instruction, out_loc, out_loc, obj_loc, data_offset, index); } @@ -4699,34 +4781,19 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimLong: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); - if (index.IsConstant()) { - __ movq(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); - } else { - __ movq(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset)); - } + __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); break; } case Primitive::kPrimFloat: { XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - if (index.IsConstant()) { - __ movss(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); - } else { - __ movss(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset)); - } + __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); break; } case Primitive::kPrimDouble: { XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - if (index.IsConstant()) { - __ movsd(out, Address(obj, - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); - } else { - __ movsd(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset)); - } + __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); break; } @@ -4749,12 +4816,10 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - bool object_array_set_with_read_barrier = - kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + may_need_runtime_call_for_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); @@ -4768,10 +4833,7 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { if (needs_write_barrier) { // Temporary registers for the write barrier. - - // This first temporary register is possibly used for heap - // reference poisoning and/or read barrier emission too. - locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. locations->AddTemp(Location::RequiresRegister()); } } @@ -4794,9 +4856,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset) - : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset); + Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset); if (value.IsRegister()) { __ movb(address, value.AsRegister<CpuRegister>()); } else { @@ -4809,9 +4869,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimShort: case Primitive::kPrimChar: { uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset) - : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset); + Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset); if (value.IsRegister()) { __ movw(address, value.AsRegister<CpuRegister>()); } else { @@ -4824,9 +4882,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimNot: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) - : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); + Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); if (!value.IsRegister()) { // Just setting null. @@ -4841,9 +4897,13 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { DCHECK(needs_write_barrier); CpuRegister register_value = value.AsRegister<CpuRegister>(); - NearLabel done, not_null, do_put; + // We cannot use a NearLabel for `done`, as its range may be too + // short when Baker read barriers are enabled. + Label done; + NearLabel not_null, do_put; SlowPathCode* slow_path = nullptr; - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + Location temp_loc = locations->GetTemp(0); + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction); codegen_->AddSlowPath(slow_path); @@ -4856,62 +4916,40 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ Bind(¬_null); } - if (kEmitCompilerReadBarrier) { - // When read barriers are enabled, the type checking - // instrumentation requires two read barriers: - // - // __ movl(temp2, temp); - // // /* HeapReference<Class> */ temp = temp->component_type_ - // __ movl(temp, Address(temp, component_offset)); - // codegen_->GenerateReadBarrierSlow( - // instruction, temp_loc, temp_loc, temp2_loc, component_offset); - // - // // /* HeapReference<Class> */ temp2 = register_value->klass_ - // __ movl(temp2, Address(register_value, class_offset)); - // codegen_->GenerateReadBarrierSlow( - // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); - // - // __ cmpl(temp, temp2); - // - // However, the second read barrier may trash `temp`, as it - // is a temporary register, and as such would not be saved - // along with live registers before calling the runtime (nor - // restored afterwards). So in this case, we bail out and - // delegate the work to the array set slow path. - // - // TODO: Extend the register allocator to support a new - // "(locally) live temp" location so as to avoid always - // going into the slow path when read barriers are enabled. - __ jmp(slow_path->GetEntryLabel()); - } else { - // /* HeapReference<Class> */ temp = array->klass_ - __ movl(temp, Address(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Note that when Baker read barriers are enabled, the type + // checks are performed without read barriers. This is fine, + // even in the case where a class object is in the from-space + // after the flip, as a comparison involving such a type would + // not produce a false positive; it may of course produce a + // false negative, in which case we would take the ArraySet + // slow path. + + // /* HeapReference<Class> */ temp = array->klass_ + __ movl(temp, Address(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ movl(temp, Address(temp, component_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor the object reference in `register_value->klass`, as + // we are comparing two poisoned references. + __ cmpl(temp, Address(register_value, class_offset)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + __ j(kEqual, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. __ MaybeUnpoisonHeapReference(temp); - // /* HeapReference<Class> */ temp = temp->component_type_ - __ movl(temp, Address(temp, component_offset)); - // If heap poisoning is enabled, no need to unpoison `temp` - // nor the object reference in `register_value->klass`, as - // we are comparing two poisoned references. - __ cmpl(temp, Address(register_value, class_offset)); - - if (instruction->StaticTypeOfArrayIsObjectArray()) { - __ j(kEqual, &do_put); - // If heap poisoning is enabled, the `temp` reference has - // not been unpoisoned yet; unpoison it now. - __ MaybeUnpoisonHeapReference(temp); - - // /* HeapReference<Class> */ temp = temp->super_class_ - __ movl(temp, Address(temp, super_offset)); - // If heap poisoning is enabled, no need to unpoison - // `temp`, as we are comparing against null below. - __ testl(temp, temp); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ Bind(&do_put); - } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); - } + // If heap poisoning is enabled, no need to unpoison the + // heap reference loaded below, as it is only used for a + // comparison with null. + __ cmpl(Address(temp, super_offset), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ j(kNotEqual, slow_path->GetEntryLabel()); } } @@ -4940,9 +4978,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimInt: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) - : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); + Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); if (value.IsRegister()) { __ movl(address, value.AsRegister<CpuRegister>()); } else { @@ -4956,18 +4992,14 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimLong: { uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) - : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset); + Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); if (value.IsRegister()) { __ movq(address, value.AsRegister<CpuRegister>()); codegen_->MaybeRecordImplicitNullCheck(instruction); } else { int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); - Address address_high = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + - offset + sizeof(int32_t)) - : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t)); + Address address_high = + CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); codegen_->MoveInt64ToAddress(address, address_high, v, instruction); } break; @@ -4975,15 +5007,12 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimFloat: { uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) - : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); + Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); if (value.IsFpuRegister()) { __ movss(address, value.AsFpuRegister<XmmRegister>()); } else { DCHECK(value.IsConstant()); - int32_t v = - bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); + int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); __ movl(address, Immediate(v)); } codegen_->MaybeRecordImplicitNullCheck(instruction); @@ -4992,19 +5021,15 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimDouble: { uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - Address address = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) - : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset); + Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); if (value.IsFpuRegister()) { __ movsd(address, value.AsFpuRegister<XmmRegister>()); codegen_->MaybeRecordImplicitNullCheck(instruction); } else { int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); - Address address_high = index.IsConstant() - ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + - offset + sizeof(int32_t)) - : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t)); + Address address_high = + CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); codegen_->MoveInt64ToAddress(address, address_high, v, instruction); } break; @@ -5036,21 +5061,23 @@ void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movl(out, Address(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out most significant bit in case the array is String's array of char. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ andl(out, Immediate(INT32_MAX)); + } } void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { - LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); HInstruction* length = instruction->InputAt(1); if (!length->IsEmittedAtUseSite()) { locations->SetInAt(1, Location::RegisterOrConstant(length)); } - if (instruction->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } } void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -5087,21 +5114,25 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); Location array_loc = array_length->GetLocations()->InAt(0); Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(array_len, Immediate(value)); + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + CpuRegister length_reg = CpuRegister(TMP); + __ movl(length_reg, array_len); + codegen_->MaybeRecordImplicitNullCheck(array_length); + __ andl(length_reg, Immediate(INT32_MAX)); + codegen_->GenerateIntCompare(length_reg, index_loc); } else { - __ cmpl(array_len, index_loc.AsRegister<CpuRegister>()); + // Checking the bound for general case: + // Array of char or String's array when the compression feature off. + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(array_len, Immediate(value)); + } else { + __ cmpl(array_len, index_loc.AsRegister<CpuRegister>()); + } + codegen_->MaybeRecordImplicitNullCheck(array_length); } - codegen_->MaybeRecordImplicitNullCheck(array_length); } else { - CpuRegister length = length_loc.AsRegister<CpuRegister>(); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(length, Immediate(value)); - } else { - __ cmpl(length, index_loc.AsRegister<CpuRegister>()); - } + codegen_->GenerateIntCompare(length_loc, index_loc); } codegen_->AddSlowPath(slow_path); __ j(kBelowEqual, slow_path->GetEntryLabel()); @@ -5137,7 +5168,9 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio } void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -5397,17 +5430,6 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - if (kEmitCompilerReadBarrier) { - switch (desired_class_load_kind) { - case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageAddress: - // TODO: Implement for read barrier. - return HLoadClass::LoadKind::kDexCacheViaMethod; - default: - break; - } - } switch (desired_class_load_kind) { case HLoadClass::LoadKind::kReferrersClass: break; @@ -5443,10 +5465,15 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { return; } - LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { @@ -5459,10 +5486,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { LocationSummary* locations = cls->GetLocations(); if (cls->NeedsAccessCheck()) { codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), - cls, - cls->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc()); CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -5470,6 +5494,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); bool generate_null_check = false; switch (cls->GetLoadKind()) { case HLoadClass::LoadKind::kReferrersClass: { @@ -5478,16 +5503,20 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); GenerateGcRootFieldLoad( - cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + cls, + out_loc, + Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), + /* fixup_label */ nullptr, + requires_read_barrier); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!requires_read_barrier); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); codegen_->RecordTypePatch(cls); break; case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!requires_read_barrier); DCHECK_NE(cls->GetAddress(), 0u); uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); __ movl(out, Immediate(address)); // Zero-extended. @@ -5499,11 +5528,19 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { // /* GcRoot<mirror::Class> */ out = *address if (IsUint<32>(cls->GetAddress())) { Address address = Address::Absolute(cls->GetAddress(), /* no_rip */ true); - GenerateGcRootFieldLoad(cls, out_loc, address); + GenerateGcRootFieldLoad(cls, + out_loc, + address, + /* fixup_label */ nullptr, + requires_read_barrier); } else { // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address). __ movq(out, Immediate(cls->GetAddress())); - GenerateGcRootFieldLoad(cls, out_loc, Address(out, 0)); + GenerateGcRootFieldLoad(cls, + out_loc, + Address(out, 0), + /* fixup_label */ nullptr, + requires_read_barrier); } generate_null_check = !cls->IsInDexCache(); break; @@ -5514,7 +5551,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label); + GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; } @@ -5527,7 +5564,11 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value())); // /* GcRoot<mirror::Class> */ out = out[type_index] GenerateGcRootFieldLoad( - cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); + cls, + out_loc, + Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())), + /* fixup_label */ nullptr, + requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; } @@ -5573,17 +5614,6 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { - if (kEmitCompilerReadBarrier) { - switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageAddress: - // TODO: Implement for read barrier. - return HLoadString::LoadKind::kDexCacheViaMethod; - default: - break; - } - } switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: DCHECK(!GetCompilerOptions().GetCompilePic()); @@ -5597,7 +5627,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( case HLoadString::LoadKind::kDexCacheAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kDexCacheViaMethod: @@ -5607,14 +5637,28 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( } void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath + LocationSummary::CallKind call_kind = load->NeedsEnvironment() + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RegisterLocation(RAX)); + } else { + locations->SetOut(Location::RequiresRegister()); + if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything. + // Custom calling convention: RAX serves as both input and output. + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(RAX)); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } - locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { @@ -5624,66 +5668,41 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(!kEmitCompilerReadBarrier); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordStringPatch(load); + codegen_->RecordBootStringPatch(load); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); DCHECK_NE(load->GetAddress(), 0u); uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); __ movl(out, Immediate(address)); // Zero-extended. codegen_->RecordSimplePatch(); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - // /* GcRoot<mirror::String> */ out = *address - if (IsUint<32>(load->GetAddress())) { - Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true); - GenerateGcRootFieldLoad(load, out_loc, address); - } else { - // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address). - __ movq(out, Immediate(load->GetAddress())); - GenerateGcRootFieldLoad(load, out_loc, Address(out, 0)); - } - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - uint32_t offset = load->GetDexCacheElementOffset(); - Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset); + case HLoadString::LoadKind::kBssEntry: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false); - // /* GcRoot<mirror::String> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); - - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - break; + Label* fixup_label = codegen_->NewStringBssEntryPatch(load); + // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); + codegen_->AddSlowPath(slow_path); + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + // Custom calling convention: RAX serves as both input and output. + __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex())); + codegen_->InvokeRuntime(kQuickResolveString, + load, + load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); } static Address GetExceptionTlsAddress() { @@ -5717,17 +5736,14 @@ void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) { } void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException), - instruction, - instruction->GetDexPc(), - nullptr); + codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && - (kUseBakerReadBarrier || - type_check_kind == TypeCheckKind::kAbstractClassCheck || + !kUseBakerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || type_check_kind == TypeCheckKind::kArrayObjectCheck); } @@ -5735,6 +5751,7 @@ static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -5742,6 +5759,7 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -5751,6 +5769,9 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); // Note that TypeCheckSlowPathX86_64 uses this "out" register too. @@ -5788,7 +5809,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -6018,8 +6039,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); @@ -6043,8 +6063,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. @@ -6064,8 +6083,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6089,8 +6107,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // Walk over the class hierarchy to find a match. NearLabel loop; @@ -6116,8 +6133,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6136,8 +6152,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // Do an exact check. NearLabel check_non_primitive_component_type; @@ -6165,8 +6180,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6174,8 +6188,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6191,8 +6204,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // We always go into the type check slow path for the unresolved // and interface check cases. @@ -6227,11 +6239,9 @@ void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instructio } void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { - codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject) - : QUICK_ENTRY_POINT(pUnlockObject), + codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, instruction, - instruction->GetDexPc(), - nullptr); + instruction->GetDexPc()); if (instruction->IsEnter()) { CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); } else { @@ -6360,17 +6370,17 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi Location maybe_temp) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); if (kEmitCompilerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it // in the following move operation, as we will need it for the // read barrier below. + DCHECK(maybe_temp.IsRegister()) << maybe_temp; __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg); // /* HeapReference<Object> */ out = *(out + offset) __ movl(out_reg, Address(out_reg, offset)); @@ -6387,17 +6397,15 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp) { + uint32_t offset) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6415,9 +6423,11 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruct void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction, Location root, const Address& address, - Label* fixup_label) { + Label* fixup_label, + bool requires_read_barrier) { CpuRegister root_reg = root.AsRegister<CpuRegister>(); - if (kEmitCompilerReadBarrier) { + if (requires_read_barrier) { + DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: @@ -6440,9 +6450,9 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root); + // Slow path marking the GC root `root`. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + instruction, root, /* unpoison_ref_before_marking */ false); codegen_->AddSlowPath(slow_path); __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(), @@ -6477,14 +6487,13 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in Location ref, CpuRegister obj, uint32_t offset, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) Address src(obj, offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6492,7 +6501,6 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in CpuRegister obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6502,18 +6510,18 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - Address src = index.IsConstant() ? - Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : - Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, CpuRegister obj, const Address& src, - Location temp, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + CpuRegister* temp1, + CpuRegister* temp2) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6542,23 +6550,23 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // performance reasons. CpuRegister ref_reg = ref.AsRegister<CpuRegister>(); - CpuRegister temp_reg = temp.AsRegister<CpuRegister>(); uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // /* int32_t */ monitor = obj->monitor_ - __ movl(temp_reg, Address(obj, monitor_offset)); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); - __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86-64 memory model. @@ -6566,20 +6574,28 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // The actual reference load. // /* HeapReference<Object> */ ref = *src - __ movl(ref_reg, src); + __ movl(ref_reg, src); // Flags are unaffected. + + // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. + // Slow path marking the object `ref` when it is gray. + SlowPathCode* slow_path; + if (always_update_field) { + DCHECK(temp1 != nullptr); + DCHECK(temp2 != nullptr); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( + instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + instruction, ref, /* unpoison_ref_before_marking */ true); + } + AddSlowPath(slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, slow_path->GetEntryLabel()); // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref); - AddSlowPath(slow_path); - - // if (rb_state == ReadBarrier::gray_ptr_) - // ref = ReadBarrier::Mark(ref); - __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); - __ j(kEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -6810,6 +6826,43 @@ void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) { } } +void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) { + CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>(); + GenerateIntCompare(lhs_reg, rhs); +} + +void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) { + if (rhs.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); + Compare32BitValue(lhs, value); + } else if (rhs.IsStackSlot()) { + __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex())); + } else { + __ cmpl(lhs, rhs.AsRegister<CpuRegister>()); + } +} + +void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) { + CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>(); + if (rhs.IsConstant()) { + int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue(); + Compare64BitValue(lhs_reg, value); + } else if (rhs.IsDoubleStackSlot()) { + __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); + } else { + __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>()); + } +} + +Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj, + Location index, + ScaleFactor scale, + uint32_t data_offset) { + return index.IsConstant() ? + Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) : + Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset); +} + void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { DCHECK(dest.IsDoubleStackSlot()); if (IsInt<32>(value)) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 4e0e34ce38..8b19dad0d0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -248,17 +248,17 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp); + uint32_t offset); // Generate a GC root reference load: // // root <- *address // - // while honoring read barriers (if any). + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, const Address& address, - Label* fixup_label = nullptr); + Label* fixup_label, + bool requires_read_barrier); void PushOntoFPStack(Location source, uint32_t temp_offset, uint32_t stack_adjustment, bool is_float); @@ -310,12 +310,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path) OVERRIDE; - - void InvokeRuntime(int32_t entry_point_offset, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path); + SlowPathCode* slow_path = nullptr) OVERRIDE; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -323,6 +318,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { HInstruction* instruction, SlowPathCode* slow_path); + void GenerateInvokeRuntime(int32_t entry_point_offset); + size_t GetWordSize() const OVERRIDE { return kX86_64WordSize; } @@ -402,15 +399,16 @@ class CodeGeneratorX86_64 : public CodeGenerator { // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - MethodReference target_method) OVERRIDE; + HInvokeStaticOrDirect* invoke) OVERRIDE; Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; void RecordSimplePatch(); - void RecordStringPatch(HLoadString* load_string); + void RecordBootStringPatch(HLoadString* load_string); void RecordTypePatch(HLoadClass* load_class); + Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -427,7 +425,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location ref, CpuRegister obj, uint32_t offset, - Location temp, bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. @@ -436,16 +433,26 @@ class CodeGeneratorX86_64 : public CodeGenerator { CpuRegister obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check); - // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at address `src`, held by + // object `obj`, into `ref`, and mark it if needed. The base of + // address `src` must be `obj`. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires two temporary registers, which must be provided as + // non-null pointers (`temp1` and `temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, CpuRegister obj, const Address& src, - Location temp, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + CpuRegister* temp1 = nullptr, + CpuRegister* temp2 = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -516,6 +523,19 @@ class CodeGeneratorX86_64 : public CodeGenerator { void Compare32BitValue(CpuRegister dest, int32_t value); void Compare64BitValue(CpuRegister dest, int64_t value); + // Compare int values. Supports register locations for `lhs`. + void GenerateIntCompare(Location lhs, Location rhs); + void GenerateIntCompare(CpuRegister lhs, Location rhs); + + // Compare long values. Supports only register locations for `lhs`. + void GenerateLongCompare(Location lhs, Location rhs); + + // Construct address for array access. + static Address ArrayAddress(CpuRegister obj, + Location index, + ScaleFactor scale, + uint32_t data_offset); + Address LiteralCaseTable(HPackedSwitch* switch_instr); // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. @@ -539,23 +559,18 @@ class CodeGeneratorX86_64 : public CodeGenerator { } } - void GenerateNop(); - void GenerateImplicitNullCheck(HNullCheck* instruction); - void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateNop() OVERRIDE; + void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; // When we don't know the proper offset for the value, we use kDummy32BitOffset. // We will fix this up in the linker later to have the right value. static constexpr int32_t kDummy32BitOffset = 256; private: - struct PcRelativeDexCacheAccessInfo { - PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) - : target_dex_file(dex_file), element_offset(element_off), label() { } - - const DexFile& target_dex_file; - uint32_t element_offset; - Label label; - }; + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches); // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. @@ -571,16 +586,16 @@ class CodeGeneratorX86_64 : public CodeGenerator { int constant_area_start_; // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<Label>> method_patches_; - ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + ArenaDeque<PatchInfo<Label>> method_patches_; + ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative DexCache access info. - ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_; + ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; // Patch locations for patchoat where the linker doesn't do any other work. ArenaDeque<Label> simple_patches_; - // String patch locations. - ArenaDeque<StringPatchInfo<Label>> string_patches_; + // String patch locations; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PatchInfo<Label>> string_patches_; // Type patch locations. - ArenaDeque<TypePatchInfo<Label>> type_patches_; + ArenaDeque<PatchInfo<Label>> type_patches_; // Fixups for jump tables need to be handled specially. ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index fe9a7af250..9ec32df578 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -29,12 +29,6 @@ #include "arch/x86_64/instruction_set_features_x86_64.h" #include "base/macros.h" #include "builder.h" -#include "code_generator_arm.h" -#include "code_generator_arm64.h" -#include "code_generator_mips.h" -#include "code_generator_mips64.h" -#include "code_generator_x86.h" -#include "code_generator_x86_64.h" #include "code_simulator_container.h" #include "common_compiler_test.h" #include "dex_file.h" @@ -47,15 +41,60 @@ #include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" #include "utils.h" +#include "utils/arm/assembler_arm_vixl.h" #include "utils/arm/managed_register_arm.h" #include "utils/mips/managed_register_mips.h" #include "utils/mips64/managed_register_mips64.h" #include "utils/x86/managed_register_x86.h" +#ifdef ART_ENABLE_CODEGEN_arm +#include "code_generator_arm.h" +#include "code_generator_arm_vixl.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_arm64 +#include "code_generator_arm64.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_x86 +#include "code_generator_x86.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_x86_64 +#include "code_generator_x86_64.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_mips +#include "code_generator_mips.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_mips64 +#include "code_generator_mips64.h" +#endif + #include "gtest/gtest.h" namespace art { +typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&); + +class CodegenTargetConfig { + public: + CodegenTargetConfig(InstructionSet isa, CreateCodegenFn create_codegen) + : isa_(isa), create_codegen_(create_codegen) { + } + InstructionSet GetInstructionSet() const { return isa_; } + CodeGenerator* CreateCodeGenerator(HGraph* graph, const CompilerOptions& compiler_options) { + return create_codegen_(graph, compiler_options); + } + + private: + CodegenTargetConfig() {} + InstructionSet isa_; + CreateCodegenFn create_codegen_; +}; + +#ifdef ART_ENABLE_CODEGEN_arm // Provide our own codegen, that ensures the C calling conventions // are preserved. Currently, ART and C do not match as R4 is caller-save // in ART, and callee-save in C. Alternatively, we could use or write @@ -76,11 +115,31 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM { blocked_core_registers_[arm::R4] = true; blocked_core_registers_[arm::R6] = false; blocked_core_registers_[arm::R7] = false; - // Makes pair R6-R7 available. - blocked_register_pairs_[arm::R6_R7] = false; } }; +// A way to test the VIXL32-based code generator on ARM. This will replace +// TestCodeGeneratorARM when the VIXL32-based backend replaces the existing one. +class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { + public: + TestCodeGeneratorARMVIXL(HGraph* graph, + const ArmInstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) + : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) { + AddAllocatedRegister(Location::RegisterLocation(arm::R6)); + AddAllocatedRegister(Location::RegisterLocation(arm::R7)); + } + + void SetupBlockedRegisters() const OVERRIDE { + arm::CodeGeneratorARMVIXL::SetupBlockedRegisters(); + blocked_core_registers_[arm::R4] = true; + blocked_core_registers_[arm::R6] = false; + blocked_core_registers_[arm::R7] = false; + } +}; +#endif + +#ifdef ART_ENABLE_CODEGEN_x86 class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { public: TestCodeGeneratorX86(HGraph* graph, @@ -95,16 +154,12 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { x86::CodeGeneratorX86::SetupBlockedRegisters(); // ebx is a callee-save register in C, but caller-save for ART. blocked_core_registers_[x86::EBX] = true; - blocked_register_pairs_[x86::EAX_EBX] = true; - blocked_register_pairs_[x86::EDX_EBX] = true; - blocked_register_pairs_[x86::ECX_EBX] = true; - blocked_register_pairs_[x86::EBX_EDI] = true; // Make edi available. blocked_core_registers_[x86::EDI] = false; - blocked_register_pairs_[x86::ECX_EDI] = false; } }; +#endif class InternalCodeAllocator : public CodeAllocator { public: @@ -200,12 +255,7 @@ static void Run(const InternalCodeAllocator& allocator, VerifyGeneratedCode(target_isa, f, has_result, expected); } -template <typename Expected> -static void RunCode(CodeGenerator* codegen, - HGraph* graph, - std::function<void(HGraph*)> hook_before_codegen, - bool has_result, - Expected expected) { +static void ValidateGraph(HGraph* graph) { GraphChecker graph_checker(graph); graph_checker.Run(); if (!graph_checker.IsValid()) { @@ -214,75 +264,137 @@ static void RunCode(CodeGenerator* codegen, } } ASSERT_TRUE(graph_checker.IsValid()); +} +template <typename Expected> +static void RunCodeNoCheck(CodeGenerator* codegen, + HGraph* graph, + std::function<void(HGraph*)> hook_before_codegen, + bool has_result, + Expected expected) { SsaLivenessAnalysis liveness(graph, codegen); - PrepareForRegisterAllocation(graph).Run(); liveness.Analyze(); RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters(); hook_before_codegen(graph); - InternalCodeAllocator allocator; codegen->Compile(&allocator); Run(allocator, *codegen, has_result, expected); } template <typename Expected> -static void RunCode(InstructionSet target_isa, +static void RunCode(CodeGenerator* codegen, + HGraph* graph, + std::function<void(HGraph*)> hook_before_codegen, + bool has_result, + Expected expected) { + ValidateGraph(graph); + RunCodeNoCheck(codegen, graph, hook_before_codegen, has_result, expected); +} + +template <typename Expected> +static void RunCode(CodegenTargetConfig target_config, HGraph* graph, std::function<void(HGraph*)> hook_before_codegen, bool has_result, Expected expected) { CompilerOptions compiler_options; - if (target_isa == kArm || target_isa == kThumb2) { - std::unique_ptr<const ArmInstructionSetFeatures> features_arm( - ArmInstructionSetFeatures::FromCppDefines()); - TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options); - RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kArm64) { - std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( - Arm64InstructionSetFeatures::FromCppDefines()); - arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options); - RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kX86) { - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); - RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kX86_64) { - std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( - X86_64InstructionSetFeatures::FromCppDefines()); - x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); - RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kMips) { - std::unique_ptr<const MipsInstructionSetFeatures> features_mips( - MipsInstructionSetFeatures::FromCppDefines()); - mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options); - RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kMips64) { - std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( - Mips64InstructionSetFeatures::FromCppDefines()); - mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options); - RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected); - } + CodeGenerator* codegen = target_config.CreateCodeGenerator(graph, compiler_options); + RunCode(codegen, graph, hook_before_codegen, has_result, expected); } -static ::std::vector<InstructionSet> GetTargetISAs() { - ::std::vector<InstructionSet> v; - // Add all ISAs that are executable on hardware or on simulator. - const ::std::vector<InstructionSet> executable_isa_candidates = { - kArm, - kArm64, - kThumb2, - kX86, - kX86_64, - kMips, - kMips64 +#ifdef ART_ENABLE_CODEGEN_arm +CodeGenerator* create_codegen_arm(HGraph* graph, const CompilerOptions& compiler_options) { + std::unique_ptr<const ArmInstructionSetFeatures> features_arm( + ArmInstructionSetFeatures::FromCppDefines()); + return new (graph->GetArena()) TestCodeGeneratorARM(graph, + *features_arm.get(), + compiler_options); +} + +CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) { + std::unique_ptr<const ArmInstructionSetFeatures> features_arm( + ArmInstructionSetFeatures::FromCppDefines()); + return new (graph->GetArena()) + TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options); +} +#endif + +#ifdef ART_ENABLE_CODEGEN_arm64 +CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) { + std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( + Arm64InstructionSetFeatures::FromCppDefines()); + return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph, + *features_arm64.get(), + compiler_options); +} +#endif + +#ifdef ART_ENABLE_CODEGEN_x86 +CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) { + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + return new (graph->GetArena()) TestCodeGeneratorX86(graph, *features_x86.get(), compiler_options); +} +#endif + +#ifdef ART_ENABLE_CODEGEN_x86_64 +CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) { + std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( + X86_64InstructionSetFeatures::FromCppDefines()); + return new (graph->GetArena()) + x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options); +} +#endif + +#ifdef ART_ENABLE_CODEGEN_mips +CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) { + std::unique_ptr<const MipsInstructionSetFeatures> features_mips( + MipsInstructionSetFeatures::FromCppDefines()); + return new (graph->GetArena()) + mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options); +} +#endif + +#ifdef ART_ENABLE_CODEGEN_mips64 +CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) { + std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( + Mips64InstructionSetFeatures::FromCppDefines()); + return new (graph->GetArena()) + mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options); +} +#endif + +// Return all combinations of ISA and code generator that are executable on +// hardware, or on simulator, and that we'd like to test. +static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { + ::std::vector<CodegenTargetConfig> v; + ::std::vector<CodegenTargetConfig> test_config_candidates = { +#ifdef ART_ENABLE_CODEGEN_arm + CodegenTargetConfig(kArm, create_codegen_arm), + CodegenTargetConfig(kThumb2, create_codegen_arm), + CodegenTargetConfig(kArm, create_codegen_arm_vixl32), +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 + CodegenTargetConfig(kArm64, create_codegen_arm64), +#endif +#ifdef ART_ENABLE_CODEGEN_x86 + CodegenTargetConfig(kX86, create_codegen_x86), +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 + CodegenTargetConfig(kX86_64, create_codegen_x86_64), +#endif +#ifdef ART_ENABLE_CODEGEN_mips + CodegenTargetConfig(kMips, create_codegen_mips), +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 + CodegenTargetConfig(kMips64, create_codegen_mips64) +#endif }; - for (auto target_isa : executable_isa_candidates) { - if (CanExecute(target_isa)) { - v.push_back(target_isa); + for (auto test_config : test_config_candidates) { + if (CanExecute(test_config.GetInstructionSet())) { + v.push_back(test_config); } } @@ -292,26 +404,26 @@ static ::std::vector<InstructionSet> GetTargetISAs() { static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { - for (InstructionSet target_isa : GetTargetISAs()) { + for (CodegenTargetConfig target_config : GetTargetConfigs()) { ArenaPool pool; ArenaAllocator arena(&pool); HGraph* graph = CreateCFG(&arena, data); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); - RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected); + RunCode(target_config, graph, [](HGraph*) {}, has_result, expected); } } static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { - for (InstructionSet target_isa : GetTargetISAs()) { + for (CodegenTargetConfig target_config : GetTargetConfigs()) { ArenaPool pool; ArenaAllocator arena(&pool); HGraph* graph = CreateCFG(&arena, data, Primitive::kPrimLong); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); - RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected); + RunCode(target_config, graph, [](HGraph*) {}, has_result, expected); } } @@ -628,7 +740,7 @@ TEST_F(CodegenTest, ReturnMulIntLit16) { } TEST_F(CodegenTest, NonMaterializedCondition) { - for (InstructionSet target_isa : GetTargetISAs()) { + for (CodegenTargetConfig target_config : GetTargetConfigs()) { ArenaPool pool; ArenaAllocator allocator(&pool); @@ -676,12 +788,12 @@ TEST_F(CodegenTest, NonMaterializedCondition) { block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_isa, graph, hook_before_codegen, true, 0); + RunCode(target_config, graph, hook_before_codegen, true, 0); } } TEST_F(CodegenTest, MaterializedCondition1) { - for (InstructionSet target_isa : GetTargetISAs()) { + for (CodegenTargetConfig target_config : GetTargetConfigs()) { // Check that condition are materialized correctly. A materialized condition // should yield `1` if it evaluated to true, and `0` otherwise. // We force the materialization of comparisons for different combinations of @@ -723,13 +835,13 @@ TEST_F(CodegenTest, MaterializedCondition1) { HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]); } } } TEST_F(CodegenTest, MaterializedCondition2) { - for (InstructionSet target_isa : GetTargetISAs()) { + for (CodegenTargetConfig target_config : GetTargetConfigs()) { // Check that HIf correctly interprets a materialized condition. // We force the materialization of comparisons for different combinations of // inputs. An HIf takes the materialized combination as input and returns a @@ -791,7 +903,7 @@ TEST_F(CodegenTest, MaterializedCondition2) { HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]); } } } @@ -820,7 +932,7 @@ static void TestComparison(IfCondition condition, int64_t i, int64_t j, Primitive::Type type, - const InstructionSet target_isa) { + const CodegenTargetConfig target_config) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = CreateGraph(&allocator); @@ -902,54 +1014,94 @@ static void TestComparison(IfCondition condition, block->AddInstruction(new (&allocator) HReturn(comparison)); graph->BuildDominatorTree(); - RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result); + RunCode(target_config, graph, [](HGraph*) {}, true, expected_result); } TEST_F(CodegenTest, ComparisonsInt) { - for (InstructionSet target_isa : GetTargetISAs()) { + for (CodegenTargetConfig target_config : GetTargetConfigs()) { for (int64_t i = -1; i <= 1; i++) { for (int64_t j = -1; j <= 1; j++) { - TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondB, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondA, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa); + for (int cond = kCondFirst; cond <= kCondLast; cond++) { + TestComparison(static_cast<IfCondition>(cond), i, j, Primitive::kPrimInt, target_config); + } } } } } TEST_F(CodegenTest, ComparisonsLong) { - // TODO: make MIPS work for long - if (kRuntimeISA == kMips || kRuntimeISA == kMips64) { - return; - } - - for (InstructionSet target_isa : GetTargetISAs()) { - if (target_isa == kMips || target_isa == kMips64) { - continue; - } - + for (CodegenTargetConfig target_config : GetTargetConfigs()) { for (int64_t i = -1; i <= 1; i++) { for (int64_t j = -1; j <= 1; j++) { - TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondB, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondA, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa); + for (int cond = kCondFirst; cond <= kCondLast; cond++) { + TestComparison(static_cast<IfCondition>(cond), i, j, Primitive::kPrimLong, target_config); + } } } } } +#ifdef ART_ENABLE_CODEGEN_mips +TEST_F(CodegenTest, MipsClobberRA) { + std::unique_ptr<const MipsInstructionSetFeatures> features_mips( + MipsInstructionSetFeatures::FromCppDefines()); + if (!CanExecute(kMips) || features_mips->IsR6()) { + // HMipsComputeBaseMethodAddress and the NAL instruction behind it + // should only be generated on non-R6. + return; + } + + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = CreateGraph(&allocator); + + HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry_block); + graph->SetEntryBlock(entry_block); + entry_block->AddInstruction(new (&allocator) HGoto()); + + HBasicBlock* block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block); + + HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(exit_block); + graph->SetExitBlock(exit_block); + exit_block->AddInstruction(new (&allocator) HExit()); + + entry_block->AddSuccessor(block); + block->AddSuccessor(exit_block); + + // To simplify matters, don't create PC-relative HLoadClass or HLoadString. + // Instead, generate HMipsComputeBaseMethodAddress directly. + HMipsComputeBaseMethodAddress* base = new (&allocator) HMipsComputeBaseMethodAddress(); + block->AddInstruction(base); + // HMipsComputeBaseMethodAddress is defined as int, so just make the + // compiled method return it. + block->AddInstruction(new (&allocator) HReturn(base)); + + graph->BuildDominatorTree(); + + mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), CompilerOptions()); + // Since there isn't HLoadClass or HLoadString, we need to manually indicate + // that RA is clobbered and the method entry code should generate a stack frame + // and preserve RA in it. And this is what we're testing here. + codegenMIPS.ClobberRA(); + // Without ClobberRA() the code would be: + // nal # Sets RA to point to the jr instruction below + // move v0, ra # and the CPU falls into an infinite loop. + // jr ra + // nop + // The expected code is: + // addiu sp, sp, -16 + // sw ra, 12(sp) + // sw a0, 0(sp) + // nal # Sets RA to point to the lw instruction below. + // move v0, ra + // lw ra, 12(sp) + // jr ra + // addiu sp, sp, 16 + RunCode(&codegenMIPS, graph, [](HGraph*) {}, false, 0); +} +#endif + } // namespace art diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h new file mode 100644 index 0000000000..5d92bfd9cc --- /dev/null +++ b/compiler/optimizing/common_arm.h @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ +#define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ + +// TODO(VIXL): Make VIXL compile with -Wshadow. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#include "aarch32/macro-assembler-aarch32.h" +#pragma GCC diagnostic pop + +namespace art { +namespace arm { +namespace helpers { + +static_assert(vixl::aarch32::kSpCode == SP, "vixl::aarch32::kSpCode must equal ART's SP"); + +inline dwarf::Reg DWARFReg(vixl::aarch32::Register reg) { + return dwarf::Reg::ArmCore(static_cast<int>(reg.GetCode())); +} + +inline dwarf::Reg DWARFReg(vixl::aarch32::SRegister reg) { + return dwarf::Reg::ArmFp(static_cast<int>(reg.GetCode())); +} + +inline vixl::aarch32::DRegister FromLowSToD(vixl::aarch32::SRegister reg) { + DCHECK_EQ(reg.GetCode() % 2, 0u) << reg; + return vixl::aarch32::DRegister(reg.GetCode() / 2); +} + +inline vixl::aarch32::Register HighRegisterFrom(Location location) { + DCHECK(location.IsRegisterPair()) << location; + return vixl::aarch32::Register(location.AsRegisterPairHigh<vixl32::Register>()); +} + +inline vixl::aarch32::DRegister HighDRegisterFrom(Location location) { + DCHECK(location.IsFpuRegisterPair()) << location; + return vixl::aarch32::DRegister(location.AsFpuRegisterPairHigh<vixl32::DRegister>()); +} + +inline vixl::aarch32::Register LowRegisterFrom(Location location) { + DCHECK(location.IsRegisterPair()) << location; + return vixl::aarch32::Register(location.AsRegisterPairLow<vixl32::Register>()); +} + +inline vixl::aarch32::SRegister LowSRegisterFrom(Location location) { + DCHECK(location.IsFpuRegisterPair()) << location; + return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl32::SRegister>()); +} + +inline vixl::aarch32::Register RegisterFrom(Location location) { + DCHECK(location.IsRegister()) << location; + return vixl::aarch32::Register(location.reg()); +} + +inline vixl::aarch32::Register RegisterFrom(Location location, Primitive::Type type) { + DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type; + return RegisterFrom(location); +} + +inline vixl::aarch32::DRegister DRegisterFrom(Location location) { + DCHECK(location.IsFpuRegisterPair()) << location; + int reg_code = location.low(); + DCHECK_EQ(reg_code % 2, 0) << reg_code; + return vixl::aarch32::DRegister(reg_code / 2); +} + +inline vixl::aarch32::SRegister SRegisterFrom(Location location) { + DCHECK(location.IsFpuRegister()) << location; + return vixl::aarch32::SRegister(location.reg()); +} + +inline vixl::aarch32::SRegister OutputSRegister(HInstruction* instr) { + Primitive::Type type = instr->GetType(); + DCHECK_EQ(type, Primitive::kPrimFloat) << type; + return SRegisterFrom(instr->GetLocations()->Out()); +} + +inline vixl::aarch32::DRegister OutputDRegister(HInstruction* instr) { + Primitive::Type type = instr->GetType(); + DCHECK_EQ(type, Primitive::kPrimDouble) << type; + return DRegisterFrom(instr->GetLocations()->Out()); +} + +inline vixl::aarch32::VRegister OutputVRegister(HInstruction* instr) { + Primitive::Type type = instr->GetType(); + if (type == Primitive::kPrimFloat) { + return OutputSRegister(instr); + } else { + return OutputDRegister(instr); + } +} + +inline vixl::aarch32::SRegister InputSRegisterAt(HInstruction* instr, int input_index) { + Primitive::Type type = instr->InputAt(input_index)->GetType(); + DCHECK_EQ(type, Primitive::kPrimFloat) << type; + return SRegisterFrom(instr->GetLocations()->InAt(input_index)); +} + +inline vixl::aarch32::DRegister InputDRegisterAt(HInstruction* instr, int input_index) { + Primitive::Type type = instr->InputAt(input_index)->GetType(); + DCHECK_EQ(type, Primitive::kPrimDouble) << type; + return DRegisterFrom(instr->GetLocations()->InAt(input_index)); +} + +inline vixl::aarch32::VRegister InputVRegisterAt(HInstruction* instr, int input_index) { + Primitive::Type type = instr->InputAt(input_index)->GetType(); + if (type == Primitive::kPrimFloat) { + return InputSRegisterAt(instr, input_index); + } else { + return InputDRegisterAt(instr, input_index); + } +} + +inline vixl::aarch32::Register OutputRegister(HInstruction* instr) { + return RegisterFrom(instr->GetLocations()->Out(), instr->GetType()); +} + +inline vixl::aarch32::Register InputRegisterAt(HInstruction* instr, int input_index) { + return RegisterFrom(instr->GetLocations()->InAt(input_index), + instr->InputAt(input_index)->GetType()); +} + +inline int64_t Int64ConstantFrom(Location location) { + HConstant* instr = location.GetConstant(); + if (instr->IsIntConstant()) { + return instr->AsIntConstant()->GetValue(); + } else if (instr->IsNullConstant()) { + return 0; + } else { + DCHECK(instr->IsLongConstant()) << instr->DebugName(); + return instr->AsLongConstant()->GetValue(); + } +} + +inline vixl::aarch32::Operand OperandFrom(Location location, Primitive::Type type) { + if (location.IsRegister()) { + return vixl::aarch32::Operand(RegisterFrom(location, type)); + } else { + return vixl::aarch32::Operand(Int64ConstantFrom(location)); + } +} + +inline vixl::aarch32::Operand InputOperandAt(HInstruction* instr, int input_index) { + return OperandFrom(instr->GetLocations()->InAt(input_index), + instr->InputAt(input_index)->GetType()); +} + +inline Location LocationFrom(const vixl::aarch32::Register& reg) { + return Location::RegisterLocation(reg.GetCode()); +} + +inline Location LocationFrom(const vixl::aarch32::SRegister& reg) { + return Location::FpuRegisterLocation(reg.GetCode()); +} + +inline Location LocationFrom(const vixl::aarch32::Register& low, + const vixl::aarch32::Register& high) { + return Location::RegisterPairLocation(low.GetCode(), high.GetCode()); +} + +inline Location LocationFrom(const vixl::aarch32::SRegister& low, + const vixl::aarch32::SRegister& high) { + return Location::FpuRegisterPairLocation(low.GetCode(), high.GetCode()); +} + +} // namespace helpers +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index af0ee4e197..776a483d43 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -22,8 +22,13 @@ #include "nodes.h" #include "utils/arm64/assembler_arm64.h" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +// TODO(VIXL): Make VIXL compile with -Wshadow. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" +#include "aarch64/simulator-aarch64.h" +#pragma GCC diagnostic pop namespace art { namespace arm64 { @@ -33,7 +38,7 @@ namespace helpers { static_assert((SP == 31) && (WSP == 31) && (XZR == 32) && (WZR == 32), "Unexpected values for register codes."); -static inline int VIXLRegCodeFromART(int code) { +inline int VIXLRegCodeFromART(int code) { if (code == SP) { return vixl::aarch64::kSPRegInternalCode; } @@ -43,7 +48,7 @@ static inline int VIXLRegCodeFromART(int code) { return code; } -static inline int ARTRegCodeFromVIXL(int code) { +inline int ARTRegCodeFromVIXL(int code) { if (code == vixl::aarch64::kSPRegInternalCode) { return SP; } @@ -53,73 +58,85 @@ static inline int ARTRegCodeFromVIXL(int code) { return code; } -static inline vixl::aarch64::Register XRegisterFrom(Location location) { +inline vixl::aarch64::Register XRegisterFrom(Location location) { DCHECK(location.IsRegister()) << location; return vixl::aarch64::Register::GetXRegFromCode(VIXLRegCodeFromART(location.reg())); } -static inline vixl::aarch64::Register WRegisterFrom(Location location) { +inline vixl::aarch64::Register WRegisterFrom(Location location) { DCHECK(location.IsRegister()) << location; return vixl::aarch64::Register::GetWRegFromCode(VIXLRegCodeFromART(location.reg())); } -static inline vixl::aarch64::Register RegisterFrom(Location location, Primitive::Type type) { +inline vixl::aarch64::Register RegisterFrom(Location location, Primitive::Type type) { DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type; return type == Primitive::kPrimLong ? XRegisterFrom(location) : WRegisterFrom(location); } -static inline vixl::aarch64::Register OutputRegister(HInstruction* instr) { +inline vixl::aarch64::Register OutputRegister(HInstruction* instr) { return RegisterFrom(instr->GetLocations()->Out(), instr->GetType()); } -static inline vixl::aarch64::Register InputRegisterAt(HInstruction* instr, int input_index) { +inline vixl::aarch64::Register InputRegisterAt(HInstruction* instr, int input_index) { return RegisterFrom(instr->GetLocations()->InAt(input_index), instr->InputAt(input_index)->GetType()); } -static inline vixl::aarch64::FPRegister DRegisterFrom(Location location) { +inline vixl::aarch64::FPRegister DRegisterFrom(Location location) { DCHECK(location.IsFpuRegister()) << location; return vixl::aarch64::FPRegister::GetDRegFromCode(location.reg()); } -static inline vixl::aarch64::FPRegister SRegisterFrom(Location location) { +inline vixl::aarch64::FPRegister SRegisterFrom(Location location) { DCHECK(location.IsFpuRegister()) << location; return vixl::aarch64::FPRegister::GetSRegFromCode(location.reg()); } -static inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, Primitive::Type type) { +inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, Primitive::Type type) { DCHECK(Primitive::IsFloatingPointType(type)) << type; return type == Primitive::kPrimDouble ? DRegisterFrom(location) : SRegisterFrom(location); } -static inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) { +inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) { return FPRegisterFrom(instr->GetLocations()->Out(), instr->GetType()); } -static inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) { +inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) { return FPRegisterFrom(instr->GetLocations()->InAt(input_index), instr->InputAt(input_index)->GetType()); } -static inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, Primitive::Type type) { +inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, Primitive::Type type) { return Primitive::IsFloatingPointType(type) ? vixl::aarch64::CPURegister(FPRegisterFrom(location, type)) : vixl::aarch64::CPURegister(RegisterFrom(location, type)); } -static inline vixl::aarch64::CPURegister OutputCPURegister(HInstruction* instr) { +inline vixl::aarch64::CPURegister OutputCPURegister(HInstruction* instr) { return Primitive::IsFloatingPointType(instr->GetType()) ? static_cast<vixl::aarch64::CPURegister>(OutputFPRegister(instr)) : static_cast<vixl::aarch64::CPURegister>(OutputRegister(instr)); } -static inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int index) { +inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int index) { return Primitive::IsFloatingPointType(instr->InputAt(index)->GetType()) ? static_cast<vixl::aarch64::CPURegister>(InputFPRegisterAt(instr, index)) : static_cast<vixl::aarch64::CPURegister>(InputRegisterAt(instr, index)); } -static inline int64_t Int64ConstantFrom(Location location) { +inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* instr, + int index) { + HInstruction* input = instr->InputAt(index); + Primitive::Type input_type = input->GetType(); + if (input->IsConstant() && input->AsConstant()->IsZeroBitPattern()) { + return (Primitive::ComponentSize(input_type) >= vixl::aarch64::kXRegSizeInBytes) + ? vixl::aarch64::xzr + : vixl::aarch64::wzr; + } + return InputCPURegisterAt(instr, index); +} + +inline int64_t Int64ConstantFrom(Location location) { HConstant* instr = location.GetConstant(); if (instr->IsIntConstant()) { return instr->AsIntConstant()->GetValue(); @@ -131,7 +148,7 @@ static inline int64_t Int64ConstantFrom(Location location) { } } -static inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::Type type) { +inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::Type type) { if (location.IsRegister()) { return vixl::aarch64::Operand(RegisterFrom(location, type)); } else { @@ -139,23 +156,23 @@ static inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::T } } -static inline vixl::aarch64::Operand InputOperandAt(HInstruction* instr, int input_index) { +inline vixl::aarch64::Operand InputOperandAt(HInstruction* instr, int input_index) { return OperandFrom(instr->GetLocations()->InAt(input_index), instr->InputAt(input_index)->GetType()); } -static inline vixl::aarch64::MemOperand StackOperandFrom(Location location) { +inline vixl::aarch64::MemOperand StackOperandFrom(Location location) { return vixl::aarch64::MemOperand(vixl::aarch64::sp, location.GetStackIndex()); } -static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base, +inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base, size_t offset = 0) { // A heap reference must be 32bit, so fit in a W register. DCHECK(base.IsW()); return vixl::aarch64::MemOperand(base.X(), offset); } -static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base, +inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base, const vixl::aarch64::Register& regoffset, vixl::aarch64::Shift shift = vixl::aarch64::LSL, unsigned shift_amount = 0) { @@ -164,24 +181,24 @@ static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Registe return vixl::aarch64::MemOperand(base.X(), regoffset, shift, shift_amount); } -static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base, +inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base, Offset offset) { return HeapOperand(base, offset.SizeValue()); } -static inline vixl::aarch64::MemOperand HeapOperandFrom(Location location, Offset offset) { +inline vixl::aarch64::MemOperand HeapOperandFrom(Location location, Offset offset) { return HeapOperand(RegisterFrom(location, Primitive::kPrimNot), offset); } -static inline Location LocationFrom(const vixl::aarch64::Register& reg) { +inline Location LocationFrom(const vixl::aarch64::Register& reg) { return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.GetCode())); } -static inline Location LocationFrom(const vixl::aarch64::FPRegister& fpreg) { +inline Location LocationFrom(const vixl::aarch64::FPRegister& fpreg) { return Location::FpuRegisterLocation(fpreg.GetCode()); } -static inline vixl::aarch64::Operand OperandFromMemOperand( +inline vixl::aarch64::Operand OperandFromMemOperand( const vixl::aarch64::MemOperand& mem_op) { if (mem_op.IsImmediateOffset()) { return vixl::aarch64::Operand(mem_op.GetOffset()); @@ -202,7 +219,7 @@ static inline vixl::aarch64::Operand OperandFromMemOperand( } } -static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) { +inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) { DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant()) << constant->DebugName(); @@ -241,7 +258,7 @@ static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst } } -static inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, +inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { @@ -255,10 +272,10 @@ static inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, // codes are same, we can initialize vixl register list simply by the register masks. Currently, // only SP/WSP and ZXR/WZR codes are different between art and vixl. // Note: This function is only used for debug checks. -static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers, - size_t num_core, - uint32_t art_fpu_registers, - size_t num_fpu) { +inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers, + size_t num_core, + uint32_t art_fpu_registers, + size_t num_fpu) { // The register masks won't work if the number of register is larger than 32. DCHECK_GE(sizeof(art_core_registers) * 8, num_core); DCHECK_GE(sizeof(art_fpu_registers) * 8, num_fpu); @@ -273,7 +290,7 @@ static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers, return true; } -static inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { +inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { switch (op_kind) { case HArm64DataProcWithShifterOp::kASR: return vixl::aarch64::ASR; case HArm64DataProcWithShifterOp::kLSL: return vixl::aarch64::LSL; @@ -285,7 +302,7 @@ static inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp:: } } -static inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { +inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { switch (op_kind) { case HArm64DataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB; case HArm64DataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH; @@ -300,7 +317,7 @@ static inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp } } -static inline bool CanFitInShifterOperand(HInstruction* instruction) { +inline bool CanFitInShifterOperand(HInstruction* instruction) { if (instruction->IsTypeConversion()) { HTypeConversion* conversion = instruction->AsTypeConversion(); Primitive::Type result_type = conversion->GetResultType(); @@ -315,7 +332,7 @@ static inline bool CanFitInShifterOperand(HInstruction* instruction) { } } -static inline bool HasShifterOperand(HInstruction* instr) { +inline bool HasShifterOperand(HInstruction* instr) { // `neg` instructions are an alias of `sub` using the zero register as the // first register input. bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() || @@ -323,7 +340,7 @@ static inline bool HasShifterOperand(HInstruction* instr) { return res; } -static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { +inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { DCHECK(HasShifterOperand(instruction)); // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg` // does *not* support extension. This is because the `extended register` form @@ -334,6 +351,10 @@ static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { return instruction->IsAdd() || instruction->IsSub(); } +inline bool IsConstantZeroBitPattern(const HInstruction* instruction) { + return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern(); +} + } // namespace helpers } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 0614945ddc..5f39a49d68 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -47,6 +47,9 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { private: void VisitShift(HBinaryOperation* shift); + void VisitEqual(HEqual* instruction) OVERRIDE; + void VisitNotEqual(HNotEqual* instruction) OVERRIDE; + void VisitAbove(HAbove* instruction) OVERRIDE; void VisitAboveOrEqual(HAboveOrEqual* instruction) OVERRIDE; void VisitBelow(HBelow* instruction) OVERRIDE; @@ -140,6 +143,30 @@ void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instr } } +void InstructionWithAbsorbingInputSimplifier::VisitEqual(HEqual* instruction) { + if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) || + (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) { + // Replace code looking like + // EQUAL lhs, null + // where lhs cannot be null with + // CONSTANT false + instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitNotEqual(HNotEqual* instruction) { + if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) || + (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) { + // Replace code looking like + // NOT_EQUAL lhs, null + // where lhs cannot be null with + // CONSTANT true + instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) { if (instruction->GetLeft()->IsConstant() && instruction->GetLeft()->AsConstant()->IsArithmeticZero()) { diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index e1bde7c737..9de521ad8d 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -16,8 +16,9 @@ #include "dead_code_elimination.h" -#include "utils/array_ref.h" +#include "base/array_ref.h" #include "base/bit_vector-inl.h" +#include "base/stl_util.h" #include "ssa_phi_elimination.h" namespace art { @@ -168,8 +169,7 @@ bool HDeadCodeElimination::SimplifyIfs() { bool simplified_one_or_more_ifs = false; bool rerun_dominance_and_loop_analysis = false; - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { HInstruction* last = block->GetLastInstruction(); HInstruction* first = block->GetFirstInstruction(); if (last->IsIf() && @@ -271,20 +271,22 @@ bool HDeadCodeElimination::SimplifyIfs() { } void HDeadCodeElimination::ConnectSuccessiveBlocks() { - // Order does not matter. - for (HReversePostOrderIterator it(*graph_); !it.Done();) { - HBasicBlock* block = it.Current(); - if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) { - it.Advance(); - continue; - } - HBasicBlock* successor = block->GetSingleSuccessor(); - if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) { - it.Advance(); - continue; + // Order does not matter. Skip the entry block by starting at index 1 in reverse post order. + for (size_t i = 1u, size = graph_->GetReversePostOrder().size(); i != size; ++i) { + HBasicBlock* block = graph_->GetReversePostOrder()[i]; + DCHECK(!block->IsEntryBlock()); + while (block->GetLastInstruction()->IsGoto()) { + HBasicBlock* successor = block->GetSingleSuccessor(); + if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) { + break; + } + DCHECK_LT(i, IndexOfElement(graph_->GetReversePostOrder(), successor)); + block->MergeWith(successor); + --size; + DCHECK_EQ(size, graph_->GetReversePostOrder().size()); + DCHECK_EQ(block, graph_->GetReversePostOrder()[i]); + // Reiterate on this block in case it can be merged with its new successor. } - block->MergeWith(successor); - // Reiterate on this block in case it can be merged with its new successor. } } @@ -300,8 +302,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks() { // Remove all dead blocks. Iterate in post order because removal needs the // block's chain of dominators and nested loops need to be updated from the // inside out. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { int id = block->GetBlockId(); if (!live_blocks.IsBitSet(id)) { MaybeRecordDeadBlock(block); @@ -332,8 +333,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks() { void HDeadCodeElimination::RemoveDeadInstructions() { // Process basic blocks in post-order in the dominator tree, so that // a dead instruction depending on another dead instruction is removed. - for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) { - HBasicBlock* block = b.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { // Traverse this block's instructions in backward order and remove // the unused ones. HBackwardInstructionIterator i(block->GetInstructions()); @@ -343,14 +343,7 @@ void HDeadCodeElimination::RemoveDeadInstructions() { for (i.Advance(); !i.Done(); i.Advance()) { HInstruction* inst = i.Current(); DCHECK(!inst->IsControlFlow()); - if (!inst->HasSideEffects() - && !inst->CanThrow() - && !inst->IsSuspendCheck() - && !inst->IsNativeDebugInfo() - // If we added an explicit barrier then we should keep it. - && !inst->IsMemoryBarrier() - && !inst->IsParameterValue() - && !inst->HasUses()) { + if (inst->IsDeadAndRemovable()) { block->RemoveInstruction(inst); MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction); } diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 0ce0ec1402..58e700deba 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -31,13 +31,11 @@ class HDeadCodeElimination : public HOptimization { public: HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats = nullptr, - const char* name = kInitialDeadCodeEliminationPassName) + const char* name = kDeadCodeEliminationPassName) : HOptimization(graph, name, stats) {} void Run() OVERRIDE; - - static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination"; - static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final"; + static constexpr const char* kDeadCodeEliminationPassName = "dead_code_elimination"; private: void MaybeRecordDeadBlock(HBasicBlock* block); diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc index 14c318e21f..82b81238ab 100644 --- a/compiler/optimizing/dex_cache_array_fixups_arm.cc +++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc @@ -17,6 +17,8 @@ #include "dex_cache_array_fixups_arm.h" #include "base/arena_containers.h" +#include "code_generator_arm.h" +#include "intrinsics_arm.h" #include "utils/dex_cache_arrays_layout-inl.h" namespace art { @@ -27,8 +29,9 @@ namespace arm { */ class DexCacheArrayFixupsVisitor : public HGraphVisitor { public: - explicit DexCacheArrayFixupsVisitor(HGraph* graph) + DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen) : HGraphVisitor(graph), + codegen_(down_cast<CodeGeneratorARM*>(codegen)), dex_cache_array_bases_(std::less<const DexFile*>(), // Attribute memory use to code generator. graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} @@ -59,31 +62,15 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { } } - void VisitLoadString(HLoadString* load_string) OVERRIDE { - // If this is a load with PC-relative access to the dex cache strings array, - // we need to add the dex cache arrays base as the special input. - if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) { - // Initialize base for target dex file if needed. - const DexFile& dex_file = load_string->GetDexFile(); - HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file); - // Update the element offset in base. - DexCacheArraysLayout layout(kArmPointerSize, &dex_file); - base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex())); - // Add the special argument base to the load. - load_string->AddSpecialInput(base); - } - } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { // If this is an invoke with PC-relative access to the dex cache methods array, // we need to add the dex cache arrays base as the special input. - if (invoke->HasPcRelativeDexCache()) { - // Initialize base for target method dex file if needed. - MethodReference target_method = invoke->GetTargetMethod(); - HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file); + if (invoke->HasPcRelativeDexCache() && + !IsCallFreeIntrinsic<IntrinsicLocationsBuilderARM>(invoke, codegen_)) { + HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(invoke->GetDexFile()); // Update the element offset in base. - DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file); - base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index)); + DexCacheArraysLayout layout(kArmPointerSize, &invoke->GetDexFile()); + base->UpdateElementOffset(layout.MethodOffset(invoke->GetDexMethodIndex())); // Add the special argument base to the method. DCHECK(!invoke->HasCurrentMethodInput()); invoke->AddSpecialInput(base); @@ -107,6 +94,8 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { return base; } + CodeGeneratorARM* codegen_; + using DexCacheArraysBaseMap = ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>; DexCacheArraysBaseMap dex_cache_array_bases_; @@ -118,7 +107,7 @@ void DexCacheArrayFixups::Run() { // that can be live-in at the irreducible loop header. return; } - DexCacheArrayFixupsVisitor visitor(graph_); + DexCacheArrayFixupsVisitor visitor(graph_, codegen_); visitor.VisitInsertionOrder(); visitor.MoveBasesIfNeeded(); } diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h index 015f910328..9d67a319b9 100644 --- a/compiler/optimizing/dex_cache_array_fixups_arm.h +++ b/compiler/optimizing/dex_cache_array_fixups_arm.h @@ -21,14 +21,23 @@ #include "optimization.h" namespace art { + +class CodeGenerator; + namespace arm { class DexCacheArrayFixups : public HOptimization { public: - DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {} + DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, kDexCacheArrayFixupsArmPassName, stats), + codegen_(codegen) {} + + static constexpr const char* kDexCacheArrayFixupsArmPassName = "dex_cache_array_fixups_arm"; void Run() OVERRIDE; + + private: + CodeGenerator* codegen_; }; } // namespace arm diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc index 19bab08eb4..31fff26dd5 100644 --- a/compiler/optimizing/dex_cache_array_fixups_mips.cc +++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc @@ -18,6 +18,7 @@ #include "dex_cache_array_fixups_mips.h" #include "base/arena_containers.h" +#include "intrinsics_mips.h" #include "utils/dex_cache_arrays_layout-inl.h" namespace art { @@ -67,31 +68,16 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { } } - void VisitLoadString(HLoadString* load_string) OVERRIDE { - // If this is a load with PC-relative access to the dex cache strings array, - // we need to add the dex cache arrays base as the special input. - if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) { - // Initialize base for target dex file if needed. - const DexFile& dex_file = load_string->GetDexFile(); - HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file); - // Update the element offset in base. - DexCacheArraysLayout layout(kMipsPointerSize, &dex_file); - base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex())); - // Add the special argument base to the load. - load_string->AddSpecialInput(base); - } - } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { // If this is an invoke with PC-relative access to the dex cache methods array, // we need to add the dex cache arrays base as the special input. - if (invoke->HasPcRelativeDexCache()) { + if (invoke->HasPcRelativeDexCache() && + !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) { // Initialize base for target method dex file if needed. - MethodReference target_method = invoke->GetTargetMethod(); - HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file); + HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(invoke->GetDexFile()); // Update the element offset in base. - DexCacheArraysLayout layout(kMipsPointerSize, target_method.dex_file); - base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index)); + DexCacheArraysLayout layout(kMipsPointerSize, &invoke->GetDexFile()); + base->UpdateElementOffset(layout.MethodOffset(invoke->GetDexMethodIndex())); // Add the special argument base to the method. DCHECK(!invoke->HasCurrentMethodInput()); invoke->AddSpecialInput(base); diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h index 21056e130a..861a199d6c 100644 --- a/compiler/optimizing/dex_cache_array_fixups_mips.h +++ b/compiler/optimizing/dex_cache_array_fixups_mips.h @@ -29,9 +29,11 @@ namespace mips { class DexCacheArrayFixups : public HOptimization { public: DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) - : HOptimization(graph, "dex_cache_array_fixups_mips", stats), + : HOptimization(graph, kDexCacheArrayFixupsMipsPassName, stats), codegen_(codegen) {} + static constexpr const char* kDexCacheArrayFixupsMipsPassName = "dex_cache_array_fixups_mips"; + void Run() OVERRIDE; private: diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 89d80cc281..09dcefa02c 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -122,7 +122,10 @@ class HGraphVisualizerDisassembler { new DisassemblerOptions(/* absolute_addresses */ false, base_address, end_address, - /* can_read_literals */ true))); + /* can_read_literals */ true, + Is64BitInstructionSet(instruction_set) + ? &Thread::DumpThreadOffset<PointerSize::k64> + : &Thread::DumpThreadOffset<PointerSize::k32>))); } ~HGraphVisualizerDisassembler() { @@ -438,13 +441,13 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void VisitInvoke(HInvoke* invoke) OVERRIDE { StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex(); - StartAttributeStream("method_name") << PrettyMethod( - invoke->GetDexMethodIndex(), GetGraph()->GetDexFile(), /* with_signature */ false); + StartAttributeStream("method_name") << GetGraph()->GetDexFile().PrettyMethod( + invoke->GetDexMethodIndex(), /* with_signature */ false); } void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE { VisitInvoke(invoke); - StartAttributeStream("invoke_type") << invoke->GetOriginalInvokeType(); + StartAttributeStream("invoke_type") << invoke->GetInvokeType(); } void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { @@ -462,15 +465,15 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } void VisitInstanceFieldGet(HInstanceFieldGet* iget) OVERRIDE { - StartAttributeStream("field_name") << PrettyField(iget->GetFieldInfo().GetFieldIndex(), - iget->GetFieldInfo().GetDexFile(), + StartAttributeStream("field_name") << + iget->GetFieldInfo().GetDexFile().PrettyField(iget->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << iget->GetFieldType(); } void VisitInstanceFieldSet(HInstanceFieldSet* iset) OVERRIDE { - StartAttributeStream("field_name") << PrettyField(iset->GetFieldInfo().GetFieldIndex(), - iset->GetFieldInfo().GetDexFile(), + StartAttributeStream("field_name") << + iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << iset->GetFieldType(); } @@ -601,7 +604,8 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { : instruction->GetReferenceTypeInfo(); ScopedObjectAccess soa(Thread::Current()); if (info.IsValid()) { - StartAttributeStream("klass") << PrettyDescriptor(info.GetTypeHandle().Get()); + StartAttributeStream("klass") + << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get()); StartAttributeStream("can_be_null") << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 1e86b75075..f5931a2f81 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -411,8 +411,8 @@ void GlobalValueNumberer::Run() { // Use the reverse post order to ensure the non back-edge predecessors of a block are // visited before the block itself. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + VisitBasicBlock(block); } } diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 129c2a94b5..f2602fbf8c 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -23,12 +23,12 @@ namespace art { * Since graph traversal may enter a SCC at any position, an initial representation may be rotated, * along dependences, viz. any of (a, b, c, d), (d, a, b, c) (c, d, a, b), (b, c, d, a) assuming * a chain of dependences (mutual independent items may occur in arbitrary order). For proper - * classification, the lexicographically first entry-phi is rotated to the front. + * classification, the lexicographically first loop-phi is rotated to the front. */ static void RotateEntryPhiFirst(HLoopInformation* loop, ArenaVector<HInstruction*>* scc, ArenaVector<HInstruction*>* new_scc) { - // Find very first entry-phi. + // Find very first loop-phi. const HInstructionList& phis = loop->GetHeader()->GetPhis(); HInstruction* phi = nullptr; size_t phi_pos = -1; @@ -41,7 +41,7 @@ static void RotateEntryPhiFirst(HLoopInformation* loop, } } - // If found, bring that entry-phi to front. + // If found, bring that loop-phi to front. if (phi != nullptr) { new_scc->clear(); for (size_t i = 0; i < size; i++) { @@ -87,23 +87,24 @@ HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph) : HOptimization(graph, kInductionPassName), global_depth_(0), stack_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), - scc_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), map_(std::less<HInstruction*>(), graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + scc_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), cycle_(std::less<HInstruction*>(), graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + type_(Primitive::kPrimVoid), induction_(std::less<HLoopInformation*>(), - graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) { + graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + cycles_(std::less<HPhi*>(), + graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) { } void HInductionVarAnalysis::Run() { // Detects sequence variables (generalized induction variables) during an outer to inner // traversal of all loops using Gerlek's algorithm. The order is important to enable // range analysis on outer loop while visiting inner loops. - for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) { - HBasicBlock* graph_block = it_graph.Current(); + for (HBasicBlock* graph_block : graph_->GetReversePostOrder()) { // Don't analyze irreducible loops. - // TODO(ajcbik): could/should we remove this restriction? if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) { VisitLoop(graph_block->GetLoopInformation()); } @@ -121,7 +122,7 @@ void HInductionVarAnalysis::VisitLoop(HLoopInformation* loop) { HBasicBlock* loop_block = it_loop.Current(); DCHECK(loop_block->IsInLoop()); if (loop_block->GetLoopInformation() != loop) { - continue; // Inner loops already visited. + continue; // Inner loops visited later. } // Visit phi-operations and instructions. for (HInstructionIterator it(loop_block->GetPhis()); !it.Done(); it.Advance()) { @@ -245,13 +246,13 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { const size_t size = scc_.size(); DCHECK_GE(size, 1u); - // Rotate proper entry-phi to front. + // Rotate proper loop-phi to front. if (size > 1) { ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)); RotateEntryPhiFirst(loop, &scc_, &other); } - // Analyze from entry-phi onwards. + // Analyze from loop-phi onwards. HInstruction* phi = scc_[0]; if (!phi->IsLoopHeaderPhi()) { return; @@ -263,6 +264,9 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { return; } + // Store interesting cycle. + AssignCycle(phi->AsPhi()); + // Singleton is wrap-around induction if all internal links have the same meaning. if (size == 1) { InductionInfo* update = TransferPhi(loop, phi, /* input_index */ 1); @@ -285,6 +289,12 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { } else if (instruction->IsSub()) { update = SolveAddSub( loop, phi, instruction, instruction->InputAt(0), instruction->InputAt(1), kSub, true); + } else if (instruction->IsXor()) { + update = SolveXor(loop, phi, instruction, instruction->InputAt(0), instruction->InputAt(1)); + } else if (instruction->IsEqual()) { + update = SolveTest(loop, phi, instruction, 0); + } else if (instruction->IsNotEqual()) { + update = SolveTest(loop, phi, instruction, 1); } else if (instruction->IsTypeConversion()) { update = SolveCnv(instruction->AsTypeConversion()); } @@ -360,6 +370,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferAddSub(Indu // can be combined with an invariant to yield a similar result. Even two linear inputs can // be combined. All other combinations fail, however. if (a != nullptr && b != nullptr) { + type_ = Narrowest(type_, Narrowest(a->type, b->type)); if (a->induction_class == kInvariant && b->induction_class == kInvariant) { return CreateInvariantOp(op, a, b); } else if (a->induction_class == kLinear && b->induction_class == kLinear) { @@ -396,6 +407,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferMul(Inducti // can be multiplied with an invariant to yield a similar but multiplied result. // Two non-invariant inputs cannot be multiplied, however. if (a != nullptr && b != nullptr) { + type_ = Narrowest(type_, Narrowest(a->type, b->type)); if (a->induction_class == kInvariant && b->induction_class == kInvariant) { return CreateInvariantOp(kMul, a, b); } else if (a->induction_class == kInvariant) { @@ -436,6 +448,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferNeg(Inducti // Transfer over a unary negation: an invariant, linear, wrap-around, or periodic input // yields a similar but negated induction as result. if (a != nullptr) { + type_ = Narrowest(type_, a->type); if (a->induction_class == kInvariant) { return CreateInvariantOp(kNeg, nullptr, a); } @@ -553,6 +566,42 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveAddSub(HLoopIn return nullptr; } +HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveXor(HLoopInformation* loop, + HInstruction* entry_phi, + HInstruction* instruction, + HInstruction* x, + HInstruction* y) { + // Solve within a tight cycle on x = c ^ x or x = x ^ c. + if (entry_phi->InputCount() == 2 && instruction == entry_phi->InputAt(1)) { + InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0)); + InductionInfo* a = LookupInfo(loop, x); + if (a != nullptr && a->induction_class == kInvariant && entry_phi == y) { + return CreateInduction(kPeriodic, CreateInvariantOp(kXor, a, initial), initial, type_); + } + InductionInfo* b = LookupInfo(loop, y); + if (b != nullptr && b->induction_class == kInvariant && entry_phi == x) { + return CreateInduction(kPeriodic, CreateInvariantOp(kXor, initial, b), initial, type_); + } + } + return nullptr; +} + +HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveTest(HLoopInformation* loop, + HInstruction* entry_phi, + HInstruction* instruction, + int64_t opposite_value) { + // Detect hidden XOR construction in tight cycles on x = (x == 0) or x = (x != 1). + int64_t value = -1; + HInstruction* x = instruction->InputAt(0); + HInstruction* y = instruction->InputAt(1); + if (IsExact(LookupInfo(loop, x), &value) && value == opposite_value) { + return SolveXor(loop, entry_phi, instruction, graph_->GetIntConstant(1), y); + } else if (IsExact(LookupInfo(loop, y), &value) && value == opposite_value) { + return SolveXor(loop, entry_phi, instruction, x, graph_->GetIntConstant(1)); + } + return nullptr; +} + HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveCnv(HTypeConversion* conversion) { Primitive::Type from = conversion->GetInputType(); Primitive::Type to = conversion->GetResultType(); @@ -714,10 +763,12 @@ void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop, case kCondGE: op = kGE; break; default: LOG(FATAL) << "CONDITION UNREACHABLE"; } + // Associate trip count with control instruction, rather than the condition (even + // though it's its use) since former provides a convenient use-free placeholder. + HInstruction* control = loop->GetHeader()->GetLastInstruction(); InductionInfo* taken_test = CreateInvariantOp(op, lower_expr, upper_expr); - AssignInfo(loop, - loop->GetHeader()->GetLastInstruction(), - CreateTripCount(tcKind, trip_count, taken_test, type)); + DCHECK(control->IsIf()); + AssignInfo(loop, control, CreateTripCount(tcKind, trip_count, taken_test, type)); } bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr, @@ -848,8 +899,8 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv int64_t value = -1; if (IsExact(a, &value)) { if (value == 0) { - // Simplify 0 + b = b, 0 * b = 0. - if (op == kAdd) { + // Simplify 0 + b = b, 0 ^ b = b, 0 * b = 0. + if (op == kAdd || op == kXor) { return b; } else if (op == kMul) { return a; @@ -865,8 +916,8 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv } if (IsExact(b, &value)) { if (value == 0) { - // Simplify a + 0 = a, a - 0 = a, a * 0 = 0, -0 = 0. - if (op == kAdd || op == kSub) { + // Simplify a + 0 = a, a - 0 = a, a ^ 0 = a, a * 0 = 0, -0 = 0. + if (op == kAdd || op == kSub || op == kXor) { return a; } else if (op == kMul || op == kNeg) { return b; @@ -897,6 +948,23 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, b->type); } + +void HInductionVarAnalysis::AssignCycle(HPhi* phi) { + ArenaSet<HInstruction*>* set = &cycles_.Put(phi, ArenaSet<HInstruction*>( + graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)))->second; + for (HInstruction* i : scc_) { + set->insert(i); + } +} + +ArenaSet<HInstruction*>* HInductionVarAnalysis::LookupCycle(HPhi* phi) { + auto it = cycles_.find(phi); + if (it != cycles_.end()) { + return &it->second; + } + return nullptr; +} + bool HInductionVarAnalysis::IsExact(InductionInfo* info, int64_t* value) { return InductionVarRange(this).IsConstant(info, InductionVarRange::kExact, value); } @@ -937,6 +1005,7 @@ std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) { case kNeg: inv += " - "; break; case kMul: inv += " * "; break; case kDiv: inv += " / "; break; + case kXor: inv += " ^ "; break; case kLT: inv += " < "; break; case kLE: inv += " <= "; break; case kGT: inv += " > "; break; diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index 7c74816c26..70271799d2 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -39,9 +39,9 @@ class HInductionVarAnalysis : public HOptimization { void Run() OVERRIDE; - private: static constexpr const char* kInductionPassName = "induction_var_analysis"; + private: struct NodeInfo { explicit NodeInfo(uint32_t d) : depth(d), done(false) {} uint32_t depth; @@ -64,6 +64,7 @@ class HInductionVarAnalysis : public HOptimization { kNeg, kMul, kDiv, + kXor, kFetch, // Trip-counts. kTripCountInLoop, // valid in full loop; loop is finite @@ -171,7 +172,16 @@ class HInductionVarAnalysis : public HOptimization { HInstruction* x, HInstruction* y, InductionOp op, - bool is_first_call); + bool is_first_call); // possibly swaps x and y to try again + InductionInfo* SolveXor(HLoopInformation* loop, + HInstruction* entry_phi, + HInstruction* instruction, + HInstruction* x, + HInstruction* y); + InductionInfo* SolveTest(HLoopInformation* loop, + HInstruction* entry_phi, + HInstruction* instruction, + int64_t oppositive_value); InductionInfo* SolveCnv(HTypeConversion* conversion); // Trip count information. @@ -204,6 +214,8 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* LookupInfo(HLoopInformation* loop, HInstruction* instruction); InductionInfo* CreateConstant(int64_t value, Primitive::Type type); InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b); + void AssignCycle(HPhi* phi); + ArenaSet<HInstruction*>* LookupCycle(HPhi* phi); // Constants. bool IsExact(InductionInfo* info, /*out*/ int64_t* value); @@ -219,8 +231,8 @@ class HInductionVarAnalysis : public HOptimization { // Temporary book-keeping during the analysis. uint32_t global_depth_; ArenaVector<HInstruction*> stack_; - ArenaVector<HInstruction*> scc_; ArenaSafeMap<HInstruction*, NodeInfo> map_; + ArenaVector<HInstruction*> scc_; ArenaSafeMap<HInstruction*, InductionInfo*> cycle_; Primitive::Type type_; @@ -230,6 +242,11 @@ class HInductionVarAnalysis : public HOptimization { */ ArenaSafeMap<HLoopInformation*, ArenaSafeMap<HInstruction*, InductionInfo*>> induction_; + /** + * Preserves induction cycle information for each loop-phi. + */ + ArenaSafeMap<HPhi*, ArenaSet<HInstruction*>> cycles_; + friend class InductionVarAnalysisTest; friend class InductionVarRange; friend class InductionVarRangeTest; diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index 580d24b74b..031f1d74a8 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -107,7 +107,7 @@ class InductionVarAnalysisTest : public CommonCompilerTest { } // Builds if-statement at depth d. - HPhi* BuildIf(int d, HBasicBlock** ifT, HBasicBlock **ifF) { + HPhi* BuildIf(int d, HBasicBlock** ifT, HBasicBlock** ifF) { HBasicBlock* cond = new (&allocator_) HBasicBlock(graph_); HBasicBlock* ifTrue = new (&allocator_) HBasicBlock(graph_); HBasicBlock* ifFalse = new (&allocator_) HBasicBlock(graph_); @@ -157,6 +157,13 @@ class InductionVarAnalysisTest : public CommonCompilerTest { iva_->LookupInfo(loop_body_[d]->GetLoopInformation(), instruction)); } + // Returns induction information of the trip-count of loop at depth d. + std::string GetTripCount(int d) { + HInstruction* control = loop_header_[d]->GetLastInstruction(); + DCHECK(control->IsIf()); + return GetInductionInfo(control, d); + } + // Returns true if instructions have identical induction. bool HaveSameInduction(HInstruction* instruction1, HInstruction* instruction2) { return HInductionVarAnalysis::InductionEqual( @@ -239,8 +246,7 @@ TEST_F(InductionVarAnalysisTest, FindBasicInduction) { EXPECT_FALSE(HaveSameInduction(store->InputAt(1), increment_[0])); // Trip-count. - EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", - GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); + EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", GetTripCount(0).c_str()); } TEST_F(InductionVarAnalysisTest, FindDerivedInduction) { @@ -253,15 +259,15 @@ TEST_F(InductionVarAnalysisTest, FindDerivedInduction) { // k = - i; // } BuildLoopNest(1); - HInstruction *add = InsertInstruction( + HInstruction* add = InsertInstruction( new (&allocator_) HAdd(Primitive::kPrimInt, constant100_, basic_[0]), 0); - HInstruction *sub = InsertInstruction( + HInstruction* sub = InsertInstruction( new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0); - HInstruction *mul = InsertInstruction( + HInstruction* mul = InsertInstruction( new (&allocator_) HMul(Primitive::kPrimInt, constant100_, basic_[0]), 0); - HInstruction *shl = InsertInstruction( + HInstruction* shl = InsertInstruction( new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0); - HInstruction *neg = InsertInstruction( + HInstruction* neg = InsertInstruction( new (&allocator_) HNeg(Primitive::kPrimInt, basic_[0]), 0); PerformInductionVarAnalysis(); @@ -285,10 +291,10 @@ TEST_F(InductionVarAnalysisTest, FindChainInduction) { HPhi* k = InsertLoopPhi(0, 0); k->AddInput(constant0_); - HInstruction *add = InsertInstruction( + HInstruction* add = InsertInstruction( new (&allocator_) HAdd(Primitive::kPrimInt, k, constant100_), 0); HInstruction* store1 = InsertArrayStore(add, 0); - HInstruction *sub = InsertInstruction( + HInstruction* sub = InsertInstruction( new (&allocator_) HSub(Primitive::kPrimInt, add, constant1_), 0); HInstruction* store2 = InsertArrayStore(sub, 0); k->AddInput(sub); @@ -375,7 +381,7 @@ TEST_F(InductionVarAnalysisTest, FindFirstOrderWrapAroundInduction) { k->AddInput(constant0_); HInstruction* store = InsertArrayStore(k, 0); - HInstruction *sub = InsertInstruction( + HInstruction* sub = InsertInstruction( new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0); k->AddInput(sub); PerformInductionVarAnalysis(); @@ -401,7 +407,7 @@ TEST_F(InductionVarAnalysisTest, FindSecondOrderWrapAroundInduction) { HInstruction* store = InsertArrayStore(k, 0); k->AddInput(t); - HInstruction *sub = InsertInstruction( + HInstruction* sub = InsertInstruction( new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0], 0), 0); t->AddInput(sub); PerformInductionVarAnalysis(); @@ -425,15 +431,15 @@ TEST_F(InductionVarAnalysisTest, FindWrapAroundDerivedInduction) { HPhi* k = InsertLoopPhi(0, 0); k->AddInput(constant0_); - HInstruction *add = InsertInstruction( + HInstruction* add = InsertInstruction( new (&allocator_) HAdd(Primitive::kPrimInt, k, constant100_), 0); - HInstruction *sub = InsertInstruction( + HInstruction* sub = InsertInstruction( new (&allocator_) HSub(Primitive::kPrimInt, k, constant100_), 0); - HInstruction *mul = InsertInstruction( + HInstruction* mul = InsertInstruction( new (&allocator_) HMul(Primitive::kPrimInt, k, constant100_), 0); - HInstruction *shl = InsertInstruction( + HInstruction* shl = InsertInstruction( new (&allocator_) HShl(Primitive::kPrimInt, k, constant1_), 0); - HInstruction *neg = InsertInstruction( + HInstruction* neg = InsertInstruction( new (&allocator_) HNeg(Primitive::kPrimInt, k), 0); k->AddInput( InsertInstruction(new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0)); @@ -491,7 +497,7 @@ TEST_F(InductionVarAnalysisTest, FindIdiomaticPeriodicInduction) { k->AddInput(constant0_); HInstruction* store = InsertArrayStore(k, 0); - HInstruction *sub = InsertInstruction( + HInstruction* sub = InsertInstruction( new (&allocator_) HSub(Primitive::kPrimInt, constant1_, k), 0); k->AddInput(sub); PerformInductionVarAnalysis(); @@ -500,6 +506,131 @@ TEST_F(InductionVarAnalysisTest, FindIdiomaticPeriodicInduction) { EXPECT_STREQ("periodic((1), (0)):PrimInt", GetInductionInfo(sub, 0).c_str()); } +TEST_F(InductionVarAnalysisTest, FindXorPeriodicInduction) { + // Setup: + // k = 0; + // for (int i = 0; i < 100; i++) { + // a[k] = 0; + // k = k ^ 1; + // } + BuildLoopNest(1); + HPhi* k = InsertLoopPhi(0, 0); + k->AddInput(constant0_); + + HInstruction* store = InsertArrayStore(k, 0); + HInstruction* x = InsertInstruction( + new (&allocator_) HXor(Primitive::kPrimInt, k, constant1_), 0); + k->AddInput(x); + PerformInductionVarAnalysis(); + + EXPECT_STREQ("periodic((0), (1)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str()); + EXPECT_STREQ("periodic((1), (0)):PrimInt", GetInductionInfo(x, 0).c_str()); +} + +TEST_F(InductionVarAnalysisTest, FindXorConstantLeftPeriodicInduction) { + // Setup: + // k = 1; + // for (int i = 0; i < 100; i++) { + // k = 1 ^ k; + // } + BuildLoopNest(1); + HPhi* k = InsertLoopPhi(0, 0); + k->AddInput(constant1_); + + HInstruction* x = InsertInstruction( + new (&allocator_) HXor(Primitive::kPrimInt, constant1_, k), 0); + k->AddInput(x); + PerformInductionVarAnalysis(); + + EXPECT_STREQ("periodic(((1) ^ (1)), (1)):PrimInt", GetInductionInfo(x, 0).c_str()); +} + +TEST_F(InductionVarAnalysisTest, FindXor100PeriodicInduction) { + // Setup: + // k = 1; + // for (int i = 0; i < 100; i++) { + // k = k ^ 100; + // } + BuildLoopNest(1); + HPhi* k = InsertLoopPhi(0, 0); + k->AddInput(constant1_); + + HInstruction* x = InsertInstruction( + new (&allocator_) HXor(Primitive::kPrimInt, k, constant100_), 0); + k->AddInput(x); + PerformInductionVarAnalysis(); + + EXPECT_STREQ("periodic(((1) ^ (100)), (1)):PrimInt", GetInductionInfo(x, 0).c_str()); +} + +TEST_F(InductionVarAnalysisTest, FindBooleanEqPeriodicInduction) { + // Setup: + // k = 0; + // for (int i = 0; i < 100; i++) { + // k = (k == 0); + // } + BuildLoopNest(1); + HPhi* k = InsertLoopPhi(0, 0); + k->AddInput(constant0_); + + HInstruction* x = InsertInstruction(new (&allocator_) HEqual(k, constant0_), 0); + k->AddInput(x); + PerformInductionVarAnalysis(); + + EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str()); +} + +TEST_F(InductionVarAnalysisTest, FindBooleanEqConstantLeftPeriodicInduction) { + // Setup: + // k = 0; + // for (int i = 0; i < 100; i++) { + // k = (0 == k); + // } + BuildLoopNest(1); + HPhi* k = InsertLoopPhi(0, 0); + k->AddInput(constant0_); + + HInstruction* x = InsertInstruction(new (&allocator_) HEqual(constant0_, k), 0); + k->AddInput(x); + PerformInductionVarAnalysis(); + + EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str()); +} + +TEST_F(InductionVarAnalysisTest, FindBooleanNePeriodicInduction) { + // Setup: + // k = 0; + // for (int i = 0; i < 100; i++) { + // k = (k != 1); + // } + BuildLoopNest(1); + HPhi* k = InsertLoopPhi(0, 0); + k->AddInput(constant0_); + + HInstruction* x = InsertInstruction(new (&allocator_) HNotEqual(k, constant1_), 0); + k->AddInput(x); + PerformInductionVarAnalysis(); + + EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str()); +} + +TEST_F(InductionVarAnalysisTest, FindBooleanNeConstantLeftPeriodicInduction) { + // Setup: + // k = 0; + // for (int i = 0; i < 100; i++) { + // k = (1 != k); + // } + BuildLoopNest(1); + HPhi* k = InsertLoopPhi(0, 0); + k->AddInput(constant0_); + + HInstruction* x = InsertInstruction(new (&allocator_) HNotEqual(constant1_, k), 0); + k->AddInput(x); + PerformInductionVarAnalysis(); + + EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str()); +} + TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) { // Setup: // k = 0; @@ -520,15 +651,15 @@ TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) { k_header->AddInput(k_body); // Derived expressions. - HInstruction *add = InsertInstruction( + HInstruction* add = InsertInstruction( new (&allocator_) HAdd(Primitive::kPrimInt, k_body, constant100_), 0); - HInstruction *sub = InsertInstruction( + HInstruction* sub = InsertInstruction( new (&allocator_) HSub(Primitive::kPrimInt, k_body, constant100_), 0); - HInstruction *mul = InsertInstruction( + HInstruction* mul = InsertInstruction( new (&allocator_) HMul(Primitive::kPrimInt, k_body, constant100_), 0); - HInstruction *shl = InsertInstruction( + HInstruction* shl = InsertInstruction( new (&allocator_) HShl(Primitive::kPrimInt, k_body, constant1_), 0); - HInstruction *neg = InsertInstruction( + HInstruction* neg = InsertInstruction( new (&allocator_) HNeg(Primitive::kPrimInt, k_body), 0); PerformInductionVarAnalysis(); @@ -557,7 +688,7 @@ TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) { k[d] = InsertLoopPhi(0, d); } - HInstruction *inc = InsertInstruction( + HInstruction* inc = InsertInstruction( new (&allocator_) HAdd(Primitive::kPrimInt, constant1_, k[9]), 9); HInstruction* store = InsertArrayStore(inc, 9); @@ -579,8 +710,7 @@ TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) { } EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(increment_[d], d).c_str()); // Trip-count. - EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", - GetInductionInfo(loop_header_[d]->GetLastInstruction(), d).c_str()); + EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", GetTripCount(d).c_str()); } } @@ -592,7 +722,7 @@ TEST_F(InductionVarAnalysisTest, ByteInductionIntLoopControl) { // a[i] = 0; // } BuildLoopNest(1); - HInstruction *conv = InsertInstruction( + HInstruction* conv = InsertInstruction( new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0); HInstruction* store1 = InsertArrayStore(conv, 0); HInstruction* store2 = InsertArrayStore(basic_[0], 0); @@ -607,8 +737,32 @@ TEST_F(InductionVarAnalysisTest, ByteInductionIntLoopControl) { EXPECT_FALSE(HaveSameInduction(store1->InputAt(1), store2->InputAt(1))); // Trip-count. - EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", - GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); + EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", GetTripCount(0).c_str()); +} + +TEST_F(InductionVarAnalysisTest, ByteInductionDerivedIntLoopControl) { + // Setup: + // for (int i = 0; i < 100; i++) { + // k = (byte) i; + // a[k] = 0; + // k = k + 1 + // a[k] = 0; + // } + BuildLoopNest(1); + HInstruction* conv = InsertInstruction( + new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0); + HInstruction* store1 = InsertArrayStore(conv, 0); + HInstruction* add = InsertInstruction( + new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0); + HInstruction* store2 = InsertArrayStore(add, 0); + + PerformInductionVarAnalysis(); + + // Byte induction (k) is "transferred" over conversion into addition (k + 1). + // This means only values within byte range can be trusted (even though + // addition can jump out of the range of course). + EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str()); + EXPECT_STREQ("((1) * i + (1)):PrimByte", GetInductionInfo(store2->InputAt(1), 0).c_str()); } TEST_F(InductionVarAnalysisTest, ByteLoopControl1) { @@ -626,8 +780,7 @@ TEST_F(InductionVarAnalysisTest, ByteLoopControl1) { EXPECT_STREQ("((1) * i + ((-128) + (1))):PrimByte", GetInductionInfo(increment_[0], 0).c_str()); // Trip-count. - EXPECT_STREQ("(((127) - (-128)) (TC-loop) ((-128) < (127)))", - GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); + EXPECT_STREQ("(((127) - (-128)) (TC-loop) ((-128) < (127)))", GetTripCount(0).c_str()); } TEST_F(InductionVarAnalysisTest, ByteLoopControl2) { @@ -645,7 +798,7 @@ TEST_F(InductionVarAnalysisTest, ByteLoopControl2) { EXPECT_STREQ("((1) * i + ((-128) + (1))):PrimByte", GetInductionInfo(increment_[0], 0).c_str()); // Trip-count undefined. - EXPECT_STREQ("", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); + EXPECT_STREQ("", GetTripCount(0).c_str()); } TEST_F(InductionVarAnalysisTest, ShortLoopControl1) { @@ -664,8 +817,7 @@ TEST_F(InductionVarAnalysisTest, ShortLoopControl1) { EXPECT_STREQ("((1) * i + ((-32768) + (1))):PrimShort", GetInductionInfo(increment_[0], 0).c_str()); // Trip-count. - EXPECT_STREQ("(((32767) - (-32768)) (TC-loop) ((-32768) < (32767)))", - GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); + EXPECT_STREQ("(((32767) - (-32768)) (TC-loop) ((-32768) < (32767)))", GetTripCount(0).c_str()); } TEST_F(InductionVarAnalysisTest, ShortLoopControl2) { @@ -684,7 +836,7 @@ TEST_F(InductionVarAnalysisTest, ShortLoopControl2) { EXPECT_STREQ("((1) * i + ((-32768) + (1))):PrimShort", GetInductionInfo(increment_[0], 0).c_str()); // Trip-count undefined. - EXPECT_STREQ("", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); + EXPECT_STREQ("", GetTripCount(0).c_str()); } TEST_F(InductionVarAnalysisTest, CharLoopControl1) { @@ -701,8 +853,7 @@ TEST_F(InductionVarAnalysisTest, CharLoopControl1) { EXPECT_STREQ("((1) * i + (1)):PrimChar", GetInductionInfo(increment_[0], 0).c_str()); // Trip-count. - EXPECT_STREQ("((65535) (TC-loop) ((0) < (65535)))", - GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); + EXPECT_STREQ("((65535) (TC-loop) ((0) < (65535)))", GetTripCount(0).c_str()); } TEST_F(InductionVarAnalysisTest, CharLoopControl2) { @@ -719,7 +870,7 @@ TEST_F(InductionVarAnalysisTest, CharLoopControl2) { EXPECT_STREQ("((1) * i + (1)):PrimChar", GetInductionInfo(increment_[0], 0).c_str()); // Trip-count undefined. - EXPECT_STREQ("", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); + EXPECT_STREQ("", GetTripCount(0).c_str()); } } // namespace art diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 5e587e0810..7cc8b1ea4c 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -106,6 +106,12 @@ static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { return instruction; } +/** Helper method to obtain loop's control instruction. */ +static HInstruction* GetLoopControl(HLoopInformation* loop) { + DCHECK(loop != nullptr); + return loop->GetHeader()->GetLastInstruction(); +} + // // Public class methods. // @@ -143,45 +149,143 @@ bool InductionVarRange::GetInductionRange(HInstruction* context, // Find range. chase_hint_ = chase_hint; bool in_body = context->GetBlock() != loop->GetHeader(); + int64_t stride_value = 0; *min_val = GetVal(info, trip, in_body, /* is_min */ true); *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false)); - *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); + *needs_finite_test = NeedsTripCount(info, &stride_value) && IsUnsafeTripCount(trip); return true; } -bool InductionVarRange::CanGenerateCode(HInstruction* context, - HInstruction* instruction, - /*out*/bool* needs_finite_test, - /*out*/bool* needs_taken_test) { - return GenerateCode(context, - instruction, - nullptr, nullptr, nullptr, nullptr, nullptr, // nothing generated yet - needs_finite_test, - needs_taken_test); -} - -void InductionVarRange::GenerateRangeCode(HInstruction* context, - HInstruction* instruction, - HGraph* graph, - HBasicBlock* block, - /*out*/HInstruction** lower, - /*out*/HInstruction** upper) { +bool InductionVarRange::CanGenerateRange(HInstruction* context, + HInstruction* instruction, + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test) { + bool is_last_value = false; + int64_t stride_value = 0; + return GenerateRangeOrLastValue(context, + instruction, + is_last_value, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // nothing generated yet + &stride_value, + needs_finite_test, + needs_taken_test) + && (stride_value == -1 || + stride_value == 0 || + stride_value == 1); // avoid wrap-around anomalies. +} + +void InductionVarRange::GenerateRange(HInstruction* context, + HInstruction* instruction, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** lower, + /*out*/HInstruction** upper) { + bool is_last_value = false; + int64_t stride_value = 0; + bool b1, b2; // unused + if (!GenerateRangeOrLastValue(context, + instruction, + is_last_value, + graph, + block, + lower, + upper, + nullptr, + &stride_value, + &b1, + &b2)) { + LOG(FATAL) << "Failed precondition: CanGenerateRange()"; + } +} + +HInstruction* InductionVarRange::GenerateTakenTest(HInstruction* context, + HGraph* graph, + HBasicBlock* block) { + HInstruction* taken_test = nullptr; + bool is_last_value = false; + int64_t stride_value = 0; bool b1, b2; // unused - if (!GenerateCode(context, instruction, graph, block, lower, upper, nullptr, &b1, &b2)) { - LOG(FATAL) << "Failed precondition: GenerateCode()"; + if (!GenerateRangeOrLastValue(context, + context, + is_last_value, + graph, + block, + nullptr, + nullptr, + &taken_test, + &stride_value, + &b1, + &b2)) { + LOG(FATAL) << "Failed precondition: CanGenerateRange()"; + } + return taken_test; +} + +bool InductionVarRange::CanGenerateLastValue(HInstruction* instruction) { + bool is_last_value = true; + int64_t stride_value = 0; + bool needs_finite_test = false; + bool needs_taken_test = false; + return GenerateRangeOrLastValue(instruction, + instruction, + is_last_value, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // nothing generated yet + &stride_value, + &needs_finite_test, + &needs_taken_test) + && !needs_finite_test && !needs_taken_test; +} + +HInstruction* InductionVarRange::GenerateLastValue(HInstruction* instruction, + HGraph* graph, + HBasicBlock* block) { + HInstruction* last_value = nullptr; + bool is_last_value = true; + int64_t stride_value = 0; + bool b1, b2; // unused + if (!GenerateRangeOrLastValue(instruction, + instruction, + is_last_value, + graph, + block, + &last_value, + &last_value, + nullptr, + &stride_value, + &b1, + &b2)) { + LOG(FATAL) << "Failed precondition: CanGenerateLastValue()"; } + return last_value; } -void InductionVarRange::GenerateTakenTest(HInstruction* context, - HGraph* graph, - HBasicBlock* block, - /*out*/HInstruction** taken_test) { - bool b1, b2; // unused - if (!GenerateCode(context, context, graph, block, nullptr, nullptr, taken_test, &b1, &b2)) { - LOG(FATAL) << "Failed precondition: GenerateCode()"; +void InductionVarRange::Replace(HInstruction* instruction, + HInstruction* fetch, + HInstruction* replacement) { + for (HLoopInformation* lp = instruction->GetBlock()->GetLoopInformation(); // closest enveloping loop + lp != nullptr; + lp = lp->GetPreHeader()->GetLoopInformation()) { + // Update instruction's information. + ReplaceInduction(induction_analysis_->LookupInfo(lp, instruction), fetch, replacement); + // Update loop's trip-count information. + ReplaceInduction(induction_analysis_->LookupInfo(lp, GetLoopControl(lp)), fetch, replacement); } } +bool InductionVarRange::IsFinite(HLoopInformation* loop) const { + HInductionVarAnalysis::InductionInfo *trip = + induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); + return trip != nullptr && !IsUnsafeTripCount(trip); +} + // // Private class methods. // @@ -221,13 +325,13 @@ bool InductionVarRange::HasInductionInfo( /*out*/ HLoopInformation** loop, /*out*/ HInductionVarAnalysis::InductionInfo** info, /*out*/ HInductionVarAnalysis::InductionInfo** trip) const { - HLoopInformation* l = context->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (l != nullptr) { - HInductionVarAnalysis::InductionInfo* i = induction_analysis_->LookupInfo(l, instruction); + HLoopInformation* lp = context->GetBlock()->GetLoopInformation(); // closest enveloping loop + if (lp != nullptr) { + HInductionVarAnalysis::InductionInfo* i = induction_analysis_->LookupInfo(lp, instruction); if (i != nullptr) { - *loop = l; + *loop = lp; *info = i; - *trip = induction_analysis_->LookupInfo(l, l->GetHeader()->GetLastInstruction()); + *trip = induction_analysis_->LookupInfo(lp, GetLoopControl(lp)); return true; } } @@ -260,12 +364,13 @@ bool InductionVarRange::HasFetchInLoop(HInductionVarAnalysis::InductionInfo* inf return false; } -bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const { +bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info, + int64_t* stride_value) const { if (info != nullptr) { if (info->induction_class == HInductionVarAnalysis::kLinear) { - return true; + return IsConstant(info->op_a, kExact, stride_value); } else if (info->induction_class == HInductionVarAnalysis::kWrapAround) { - return NeedsTripCount(info->op_b); + return NeedsTripCount(info->op_b, stride_value); } } return false; @@ -426,6 +531,8 @@ InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::Induct return GetMul(info->op_a, info->op_b, trip, in_body, is_min); case HInductionVarAnalysis::kDiv: return GetDiv(info->op_a, info->op_b, trip, in_body, is_min); + case HInductionVarAnalysis::kXor: + return GetXor(info->op_a, info->op_b); case HInductionVarAnalysis::kFetch: return GetFetch(info->fetch, trip, in_body, is_min); case HInductionVarAnalysis::kTripCountInLoop: @@ -527,6 +634,21 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct return Value(); } +InductionVarRange::Value InductionVarRange::GetXor( + HInductionVarAnalysis::InductionInfo* info1, + HInductionVarAnalysis::InductionInfo* info2) const { + int64_t v1 = 0; + int64_t v2 = 0; + // Only accept exact values. + if (IsConstant(info1, kExact, &v1) && IsConstant(info2, kExact, &v2)) { + int64_t value = v1 ^ v2; + if (CanLongValueFitIntoInt(value)) { + return Value(static_cast<int32_t>(value)); + } + } + return Value(); +} + InductionVarRange::Value InductionVarRange::MulRangeAndConstant( int64_t value, HInductionVarAnalysis::InductionInfo* info, @@ -616,15 +738,17 @@ InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is return Value(); } -bool InductionVarRange::GenerateCode(HInstruction* context, - HInstruction* instruction, - HGraph* graph, - HBasicBlock* block, - /*out*/HInstruction** lower, - /*out*/HInstruction** upper, - /*out*/HInstruction** taken_test, - /*out*/bool* needs_finite_test, - /*out*/bool* needs_taken_test) const { +bool InductionVarRange::GenerateRangeOrLastValue(HInstruction* context, + HInstruction* instruction, + bool is_last_value, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** lower, + /*out*/HInstruction** upper, + /*out*/HInstruction** taken_test, + /*out*/int64_t* stride_value, + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test) const { HLoopInformation* loop = nullptr; HInductionVarAnalysis::InductionInfo* info = nullptr; HInductionVarAnalysis::InductionInfo* trip = nullptr; @@ -637,8 +761,24 @@ bool InductionVarRange::GenerateCode(HInstruction* context, // code does not use the trip-count explicitly (since there could be an implicit relation // between e.g. an invariant subscript and a not-taken condition). bool in_body = context->GetBlock() != loop->GetHeader(); - *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); + *stride_value = 0; + *needs_finite_test = NeedsTripCount(info, stride_value) && IsUnsafeTripCount(trip); *needs_taken_test = IsBodyTripCount(trip); + // Handle last value request. + if (is_last_value) { + if (info->induction_class == HInductionVarAnalysis::kLinear) { + if (*stride_value > 0) { + lower = nullptr; + } else { + upper = nullptr; + } + } else if (info->induction_class == HInductionVarAnalysis::kPeriodic) { + DCHECK(!in_body); + return GenerateLastValuePeriodic(info, trip, graph, block, lower, needs_taken_test); + } else { + return false; + } + } // Code generation for taken test: generate the code when requested or otherwise analyze // if code generation is feasible when taken test is needed. if (taken_test != nullptr) { @@ -658,6 +798,56 @@ bool InductionVarRange::GenerateCode(HInstruction* context, GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false); } +bool InductionVarRange::GenerateLastValuePeriodic(HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** result, + /*out*/bool* needs_taken_test) const { + DCHECK(info->induction_class == HInductionVarAnalysis::kPeriodic); + // Count period. + int32_t period = 1; + for (HInductionVarAnalysis::InductionInfo* p = info; + p->induction_class == HInductionVarAnalysis::kPeriodic; + p = p->op_b, ++period) {} + // Handle periodic(x, y) case for restricted types. + if (period != 2 || + trip->op_a->type != Primitive::kPrimInt || + (info->type != Primitive::kPrimInt && info->type != Primitive::kPrimBoolean)) { + return false; // TODO: easy to generalize + } + HInstruction* x_instr = nullptr; + HInstruction* y_instr = nullptr; + HInstruction* trip_expr = nullptr; + if (GenerateCode(info->op_a, nullptr, graph, block, graph ? &x_instr : nullptr, false, false) && + GenerateCode(info->op_b, nullptr, graph, block, graph ? &y_instr : nullptr, false, false) && + GenerateCode(trip->op_a, nullptr, graph, block, graph ? &trip_expr : nullptr, false, false)) { + // During actual code generation (graph != nullptr), + // generate is_even ? x : y select instruction. + if (graph != nullptr) { + HInstruction* is_even = Insert(block, new (graph->GetArena()) HEqual( + Insert(block, new (graph->GetArena()) HAnd( + Primitive::kPrimInt, trip_expr, graph->GetIntConstant(1))), + graph->GetIntConstant(0), kNoDexPc)); + *result = Insert(block, new (graph->GetArena()) HSelect(is_even, x_instr, y_instr, kNoDexPc)); + } + // Guard select with taken test if needed. + if (*needs_taken_test) { + HInstruction* taken_test = nullptr; + if (!GenerateCode( + trip->op_b, nullptr, graph, block, graph ? &taken_test : nullptr, false, false)) { + return false; + } else if (graph != nullptr) { + *result = Insert(block, + new (graph->GetArena()) HSelect(taken_test, *result, x_instr, kNoDexPc)); + } + *needs_taken_test = false; // taken care of + } + return true; + } + return false; +} + bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip, HGraph* graph, // when set, code is generated @@ -666,9 +856,13 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, bool in_body, bool is_min) const { if (info != nullptr) { + // If during codegen, the result is not needed (nullptr), simply return success. + if (graph != nullptr && result == nullptr) { + return true; + } // Verify type safety. Primitive::Type type = Primitive::kPrimInt; - if (info->type != type) { + if (info->type != Primitive::kPrimInt && info->type != Primitive::kPrimBoolean) { return false; } // Handle current operation. @@ -679,6 +873,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, // Invariants. switch (info->operation) { case HInductionVarAnalysis::kAdd: + case HInductionVarAnalysis::kXor: case HInductionVarAnalysis::kLT: case HInductionVarAnalysis::kLE: case HInductionVarAnalysis::kGT: @@ -690,6 +885,8 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, switch (info->operation) { case HInductionVarAnalysis::kAdd: operation = new (graph->GetArena()) HAdd(type, opa, opb); break; + case HInductionVarAnalysis::kXor: + operation = new (graph->GetArena()) HXor(type, opa, opb); break; case HInductionVarAnalysis::kLT: operation = new (graph->GetArena()) HLessThan(opa, opb); break; case HInductionVarAnalysis::kLE: @@ -757,25 +954,29 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, } break; case HInductionVarAnalysis::kLinear: { - // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only - // to avoid arithmetic wrap-around situations that are hard to guard against. + // Linear induction a * i + b, for normalized 0 <= i < TC. For ranges, this should + // be restricted to a unit stride to avoid arithmetic wrap-around situations that + // are harder to guard against. For a last value, requesting min/max based on any + // stride yields right value. int64_t stride_value = 0; if (IsConstant(info->op_a, kExact, &stride_value)) { - if (stride_value == 1 || stride_value == -1) { - const bool is_min_a = stride_value == 1 ? is_min : !is_min; - if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && - GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { - if (graph != nullptr) { - HInstruction* oper; - if (stride_value == 1) { - oper = new (graph->GetArena()) HAdd(type, opa, opb); - } else { - oper = new (graph->GetArena()) HSub(type, opb, opa); - } - *result = Insert(block, oper); + const bool is_min_a = stride_value >= 0 ? is_min : !is_min; + if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && + GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { + if (graph != nullptr) { + HInstruction* oper; + if (stride_value == 1) { + oper = new (graph->GetArena()) HAdd(type, opa, opb); + } else if (stride_value == -1) { + oper = new (graph->GetArena()) HSub(type, opb, opa); + } else { + HInstruction* mul = new (graph->GetArena()) HMul( + type, graph->GetIntConstant(stride_value), opa); + oper = new (graph->GetArena()) HAdd(type, Insert(block, mul), opb); } - return true; + *result = Insert(block, oper); } + return true; } } break; @@ -800,4 +1001,18 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, return false; } +void InductionVarRange::ReplaceInduction(HInductionVarAnalysis::InductionInfo* info, + HInstruction* fetch, + HInstruction* replacement) { + if (info != nullptr) { + if (info->induction_class == HInductionVarAnalysis::kInvariant && + info->operation == HInductionVarAnalysis::kFetch && + info->fetch == fetch) { + info->fetch = replacement; + } + ReplaceInduction(info->op_a, fetch, replacement); + ReplaceInduction(info->op_b, fetch, replacement); + } +} + } // namespace art diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 00aaa167f8..034cf32b2d 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -76,10 +76,10 @@ class InductionVarRange { * and need_taken test flags denote if an additional finite-test and/or taken-test * are needed to protect the range evaluation inside its loop. */ - bool CanGenerateCode(HInstruction* context, - HInstruction* instruction, - /*out*/ bool* needs_finite_test, - /*out*/ bool* needs_taken_test); + bool CanGenerateRange(HInstruction* context, + HInstruction* instruction, + /*out*/ bool* needs_finite_test, + /*out*/ bool* needs_taken_test); /** * Generates the actual code in the HIR for the lower and upper bound expressions on the @@ -94,25 +94,65 @@ class InductionVarRange { * lower: add x, 0 * upper: add x, 5 * - * Precondition: CanGenerateCode() returns true. + * Precondition: CanGenerateRange() returns true. */ - void GenerateRangeCode(HInstruction* context, - HInstruction* instruction, - HGraph* graph, - HBasicBlock* block, - /*out*/ HInstruction** lower, - /*out*/ HInstruction** upper); + void GenerateRange(HInstruction* context, + HInstruction* instruction, + HGraph* graph, + HBasicBlock* block, + /*out*/ HInstruction** lower, + /*out*/ HInstruction** upper); /** * Generates explicit taken-test for the loop in the given context. Code is generated in - * given block and graph. The taken-test is returned in parameter test. + * given block and graph. Returns generated taken-test. * - * Precondition: CanGenerateCode() returns true and needs_taken_test is set. + * Precondition: CanGenerateRange() returns true and needs_taken_test is set. */ - void GenerateTakenTest(HInstruction* context, - HGraph* graph, - HBasicBlock* block, - /*out*/ HInstruction** taken_test); + HInstruction* GenerateTakenTest(HInstruction* context, HGraph* graph, HBasicBlock* block); + + /** + * Returns true if induction analysis is able to generate code for last value of + * the given instruction inside the closest enveloping loop. + */ + bool CanGenerateLastValue(HInstruction* instruction); + + /** + * Generates last value of the given instruction in the closest enveloping loop. + * Code is generated in given block and graph. Returns generated last value. + * + * Precondition: CanGenerateLastValue() returns true. + */ + HInstruction* GenerateLastValue(HInstruction* instruction, HGraph* graph, HBasicBlock* block); + + /** + * Updates all matching fetches with the given replacement in all induction information + * that is associated with the given instruction. + */ + void Replace(HInstruction* instruction, HInstruction* fetch, HInstruction* replacement); + + /** + * Incrementally updates induction information for just the given loop. + */ + void ReVisit(HLoopInformation* loop) { + induction_analysis_->induction_.erase(loop); + for (HInstructionIterator it(loop->GetHeader()->GetPhis()); !it.Done(); it.Advance()) { + induction_analysis_->cycles_.erase(it.Current()->AsPhi()); + } + induction_analysis_->VisitLoop(loop); + } + + /** + * Lookup an interesting cycle associated with an entry phi. + */ + ArenaSet<HInstruction*>* LookupCycle(HPhi* phi) const { + return induction_analysis_->LookupCycle(phi); + } + + /** + * Checks if header logic of a loop terminates. + */ + bool IsFinite(HLoopInformation* loop) const; private: /* @@ -140,7 +180,8 @@ class InductionVarRange { /*out*/ HInductionVarAnalysis::InductionInfo** trip) const; bool HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const; - bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const; + bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info, + /*out*/ int64_t* stride_value) const; bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const; bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const; bool IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const; @@ -167,6 +208,8 @@ class InductionVarRange { HInductionVarAnalysis::InductionInfo* trip, bool in_body, bool is_min) const; + Value GetXor(HInductionVarAnalysis::InductionInfo* info1, + HInductionVarAnalysis::InductionInfo* info2) const; Value MulRangeAndConstant(int64_t value, HInductionVarAnalysis::InductionInfo* info, @@ -186,19 +229,28 @@ class InductionVarRange { Value MergeVal(Value v1, Value v2, bool is_min) const; /** - * Generates code for lower/upper/taken-test in the HIR. Returns true on success. - * With values nullptr, the method can be used to determine if code generation + * Generates code for lower/upper/taken-test or last value in the HIR. Returns true on + * success. With values nullptr, the method can be used to determine if code generation * would be successful without generating actual code yet. */ - bool GenerateCode(HInstruction* context, - HInstruction* instruction, - HGraph* graph, - HBasicBlock* block, - /*out*/ HInstruction** lower, - /*out*/ HInstruction** upper, - /*out*/ HInstruction** taken_test, - /*out*/ bool* needs_finite_test, - /*out*/ bool* needs_taken_test) const; + bool GenerateRangeOrLastValue(HInstruction* context, + HInstruction* instruction, + bool is_last_val, + HGraph* graph, + HBasicBlock* block, + /*out*/ HInstruction** lower, + /*out*/ HInstruction** upper, + /*out*/ HInstruction** taken_test, + /*out*/ int64_t* stride_value, + /*out*/ bool* needs_finite_test, + /*out*/ bool* needs_taken_test) const; + + bool GenerateLastValuePeriodic(HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** result, + /*out*/ bool* needs_taken_test) const; bool GenerateCode(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip, @@ -208,6 +260,10 @@ class InductionVarRange { bool in_body, bool is_min) const; + void ReplaceInduction(HInductionVarAnalysis::InductionInfo* info, + HInstruction* fetch, + HInstruction* replacement); + /** Results of prior induction variable analysis. */ HInductionVarAnalysis* induction_analysis_; diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index 4ea170f659..8bbdd4acb7 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -75,34 +75,34 @@ class InductionVarRangeTest : public CommonCompilerTest { // Control flow. loop_preheader_ = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(loop_preheader_); - HBasicBlock* loop_header = new (&allocator_) HBasicBlock(graph_); - graph_->AddBlock(loop_header); - HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_); - graph_->AddBlock(loop_body); + loop_header_ = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(loop_header_); + loop_body_ = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(loop_body_); HBasicBlock* return_block = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(return_block); entry_block_->AddSuccessor(loop_preheader_); - loop_preheader_->AddSuccessor(loop_header); - loop_header->AddSuccessor(loop_body); - loop_header->AddSuccessor(return_block); - loop_body->AddSuccessor(loop_header); + loop_preheader_->AddSuccessor(loop_header_); + loop_header_->AddSuccessor(loop_body_); + loop_header_->AddSuccessor(return_block); + loop_body_->AddSuccessor(loop_header_); return_block->AddSuccessor(exit_block_); // Instructions. loop_preheader_->AddInstruction(new (&allocator_) HGoto()); HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt); - loop_header->AddPhi(phi); + loop_header_->AddPhi(phi); phi->AddInput(graph_->GetIntConstant(lower)); // i = l if (stride > 0) { condition_ = new (&allocator_) HLessThan(phi, upper); // i < u } else { condition_ = new (&allocator_) HGreaterThan(phi, upper); // i > u } - loop_header->AddInstruction(condition_); - loop_header->AddInstruction(new (&allocator_) HIf(condition_)); + loop_header_->AddInstruction(condition_); + loop_header_->AddInstruction(new (&allocator_) HIf(condition_)); increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, phi, graph_->GetIntConstant(stride)); - loop_body->AddInstruction(increment_); // i += s + loop_body_->AddInstruction(increment_); // i += s phi->AddInput(increment_); - loop_body->AddInstruction(new (&allocator_) HGoto()); + loop_body_->AddInstruction(new (&allocator_) HGoto()); return_block->AddInstruction(new (&allocator_) HReturnVoid()); exit_block_->AddInstruction(new (&allocator_) HExit()); } @@ -192,7 +192,8 @@ class InductionVarRangeTest : public CommonCompilerTest { // bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) { - return range_.NeedsTripCount(info); + int64_t s = 0; + return range_.NeedsTripCount(info, &s); } bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) { @@ -251,6 +252,8 @@ class InductionVarRangeTest : public CommonCompilerTest { HBasicBlock* entry_block_; HBasicBlock* exit_block_; HBasicBlock* loop_preheader_; + HBasicBlock* loop_header_; + HBasicBlock* loop_body_; HInductionVarAnalysis* iva_; InductionVarRange range_; @@ -600,15 +603,19 @@ TEST_F(InductionVarRangeTest, ConstantTripCountUp) { Value v1, v2; bool needs_finite_test = true; + bool needs_taken_test = true; + + HInstruction* phi = condition_->InputAt(0); + HInstruction* exit = exit_block_->GetLastInstruction(); // In context of header: known. - range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, phi, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(1000), v2); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, phi, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(999), v2); @@ -616,6 +623,20 @@ TEST_F(InductionVarRangeTest, ConstantTripCountUp) { EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); ExpectEqual(Value(1000), v2); + + // Induction vs. no-induction. + EXPECT_TRUE(range_.CanGenerateRange(increment_, phi, &needs_finite_test, &needs_taken_test)); + EXPECT_TRUE(range_.CanGenerateLastValue(phi)); + EXPECT_FALSE(range_.CanGenerateRange(exit, exit, &needs_finite_test, &needs_taken_test)); + EXPECT_FALSE(range_.CanGenerateLastValue(exit)); + + // Last value (unsimplified). + HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_); + ASSERT_TRUE(last->IsAdd()); + ASSERT_TRUE(last->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue()); + ASSERT_TRUE(last->InputAt(1)->IsIntConstant()); + EXPECT_EQ(0, last->InputAt(1)->AsIntConstant()->GetValue()); } TEST_F(InductionVarRangeTest, ConstantTripCountDown) { @@ -624,15 +645,19 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) { Value v1, v2; bool needs_finite_test = true; + bool needs_taken_test = true; + + HInstruction* phi = condition_->InputAt(0); + HInstruction* exit = exit_block_->GetLastInstruction(); // In context of header: known. - range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, phi, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(1000), v2); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, phi, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); ExpectEqual(Value(1000), v2); @@ -640,6 +665,25 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) { EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(999), v2); + + // Induction vs. no-induction. + EXPECT_TRUE(range_.CanGenerateRange(increment_, phi, &needs_finite_test, &needs_taken_test)); + EXPECT_TRUE(range_.CanGenerateLastValue(phi)); + EXPECT_FALSE(range_.CanGenerateRange(exit, exit, &needs_finite_test, &needs_taken_test)); + EXPECT_FALSE(range_.CanGenerateLastValue(exit)); + + // Last value (unsimplified). + HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_); + ASSERT_TRUE(last->IsSub()); + ASSERT_TRUE(last->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue()); + ASSERT_TRUE(last->InputAt(1)->IsNeg()); + last = last->InputAt(1)->InputAt(0); + ASSERT_TRUE(last->IsSub()); + ASSERT_TRUE(last->InputAt(0)->IsIntConstant()); + EXPECT_EQ(0, last->InputAt(0)->AsIntConstant()->GetValue()); + ASSERT_TRUE(last->InputAt(1)->IsIntConstant()); + EXPECT_EQ(1000, last->InputAt(1)->AsIntConstant()->GetValue()); } TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { @@ -650,14 +694,16 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { bool needs_finite_test = true; bool needs_taken_test = true; + HInstruction* phi = condition_->InputAt(0); + // In context of header: upper unknown. - range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, phi, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(), v2); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, phi, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(x_, 1, -1), v2); @@ -668,19 +714,15 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { HInstruction* lower = nullptr; HInstruction* upper = nullptr; - HInstruction* taken = nullptr; // Can generate code in context of loop-body only. - EXPECT_FALSE(range_.CanGenerateCode( - condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); - ASSERT_TRUE(range_.CanGenerateCode( - increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + EXPECT_FALSE(range_.CanGenerateRange(condition_, phi, &needs_finite_test, &needs_taken_test)); + ASSERT_TRUE(range_.CanGenerateRange(increment_, phi, &needs_finite_test, &needs_taken_test)); EXPECT_FALSE(needs_finite_test); EXPECT_TRUE(needs_taken_test); - // Generates code. - range_.GenerateRangeCode( - increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); + // Generates code (unsimplified). + range_.GenerateRange(increment_, phi, graph_, loop_preheader_, &lower, &upper); // Verify lower is 0+0. ASSERT_TRUE(lower != nullptr); @@ -701,12 +743,19 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); // Verify taken-test is 0<V. - range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); + HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_); ASSERT_TRUE(taken != nullptr); ASSERT_TRUE(taken->IsLessThan()); ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue()); EXPECT_TRUE(taken->InputAt(1)->IsParameterValue()); + + // Replacement. + range_.Replace(loop_header_->GetLastInstruction(), x_, y_); + range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(1), v1); + ExpectEqual(Value(y_, 1, 0), v2); } TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { @@ -717,14 +766,16 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { bool needs_finite_test = true; bool needs_taken_test = true; + HInstruction* phi = condition_->InputAt(0); + // In context of header: lower unknown. - range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, phi, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(), v1); ExpectEqual(Value(1000), v2); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, phi, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(x_, 1, 1), v1); ExpectEqual(Value(1000), v2); @@ -735,19 +786,15 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { HInstruction* lower = nullptr; HInstruction* upper = nullptr; - HInstruction* taken = nullptr; // Can generate code in context of loop-body only. - EXPECT_FALSE(range_.CanGenerateCode( - condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); - ASSERT_TRUE(range_.CanGenerateCode( - increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + EXPECT_FALSE(range_.CanGenerateRange(condition_, phi, &needs_finite_test, &needs_taken_test)); + ASSERT_TRUE(range_.CanGenerateRange(increment_, phi, &needs_finite_test, &needs_taken_test)); EXPECT_FALSE(needs_finite_test); EXPECT_TRUE(needs_taken_test); - // Generates code. - range_.GenerateRangeCode( - increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); + // Generates code (unsimplified). + range_.GenerateRange(increment_, phi, graph_, loop_preheader_, &lower, &upper); // Verify lower is 1000-((1000-V)-1). ASSERT_TRUE(lower != nullptr); @@ -773,12 +820,19 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); // Verify taken-test is 1000>V. - range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); + HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_); ASSERT_TRUE(taken != nullptr); ASSERT_TRUE(taken->IsGreaterThan()); ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue()); EXPECT_TRUE(taken->InputAt(1)->IsParameterValue()); + + // Replacement. + range_.Replace(loop_header_->GetLastInstruction(), x_, y_); + range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(y_, 1, 0), v1); + ExpectEqual(Value(999), v2); } } // namespace art diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 451aa38033..cc420b3260 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -41,7 +41,7 @@ #include "sharpening.h" #include "ssa_builder.h" #include "ssa_phi_elimination.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" namespace art { @@ -90,14 +90,14 @@ void HInliner::Run() { if (!TryInline(call)) { if (kIsDebugBuild && IsCompilingWithCoreImage()) { std::string callee_name = - PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile()); + outer_compilation_unit_.GetDexFile()->PrettyMethod(call->GetDexMethodIndex()); bool should_inline = callee_name.find("$inline$") != std::string::npos; CHECK(!should_inline) << "Could not inline " << callee_name; } } else { if (kIsDebugBuild && IsCompilingWithCoreImage()) { std::string callee_name = - PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile()); + outer_compilation_unit_.GetDexFile()->PrettyMethod(call->GetDexMethodIndex()); bool must_not_inline = callee_name.find("$noinline$") != std::string::npos; CHECK(!must_not_inline) << "Should not have inlined " << callee_name; } @@ -109,7 +109,7 @@ void HInliner::Run() { } static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { return method->IsFinal() || method->GetDeclaringClass()->IsFinal(); } @@ -119,7 +119,7 @@ static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) * Return nullptr if the runtime target cannot be proven. */ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resolved_method) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { if (IsMethodOrDeclaringClassFinal(resolved_method)) { // No need to lookup further, the resolved method will be the target. return resolved_method; @@ -189,7 +189,7 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol static uint32_t FindMethodIndexIn(ArtMethod* method, const DexFile& dex_file, uint32_t name_and_signature_index) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { if (IsSameDexFile(*method->GetDexFile(), dex_file)) { return method->GetDexMethodIndex(); } else { @@ -200,13 +200,13 @@ static uint32_t FindMethodIndexIn(ArtMethod* method, static uint32_t FindClassIndexIn(mirror::Class* cls, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { uint32_t index = DexFile::kDexNoIndex; if (cls->GetDexCache() == nullptr) { - DCHECK(cls->IsArrayClass()) << PrettyClass(cls); + DCHECK(cls->IsArrayClass()) << cls->PrettyClass(); index = cls->FindTypeIndexInOtherDexFile(dex_file); } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) { - DCHECK(cls->IsProxyClass()) << PrettyClass(cls); + DCHECK(cls->IsProxyClass()) << cls->PrettyClass(); // TODO: deal with proxy classes. } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) { DCHECK_EQ(cls->GetDexCache(), dex_cache.Get()); @@ -263,42 +263,24 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { return false; // Don't bother to move further if we know the method is unresolved. } - uint32_t method_index = invoke_instruction->GetDexMethodIndex(); ScopedObjectAccess soa(Thread::Current()); + uint32_t method_index = invoke_instruction->GetDexMethodIndex(); const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); - VLOG(compiler) << "Try inlining " << PrettyMethod(method_index, caller_dex_file); + VLOG(compiler) << "Try inlining " << caller_dex_file.PrettyMethod(method_index); - ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); // We can query the dex cache directly. The verifier has populated it already. - ArtMethod* resolved_method; + ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod(); ArtMethod* actual_method = nullptr; - if (invoke_instruction->IsInvokeStaticOrDirect()) { - if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) { - VLOG(compiler) << "Not inlining a String.<init> method"; - return false; - } - MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod(); - mirror::DexCache* const dex_cache = IsSameDexFile(caller_dex_file, *ref.dex_file) - ? caller_compilation_unit_.GetDexCache().Get() - : class_linker->FindDexCache(soa.Self(), *ref.dex_file); - resolved_method = dex_cache->GetResolvedMethod( - ref.dex_method_index, class_linker->GetImagePointerSize()); - // actual_method == resolved_method for direct or static calls. - actual_method = resolved_method; - } else { - resolved_method = caller_compilation_unit_.GetDexCache().Get()->GetResolvedMethod( - method_index, class_linker->GetImagePointerSize()); - if (resolved_method != nullptr) { - // Check if we can statically find the method. - actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); - } - } - if (resolved_method == nullptr) { - // TODO: Can this still happen? - // Method cannot be resolved if it is in another dex file we do not have access to. - VLOG(compiler) << "Method cannot be resolved " << PrettyMethod(method_index, caller_dex_file); + DCHECK(invoke_instruction->IsInvokeStaticOrDirect()); + DCHECK(invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()); + VLOG(compiler) << "Not inlining a String.<init> method"; return false; + } else if (invoke_instruction->IsInvokeStaticOrDirect()) { + actual_method = resolved_method; + } else { + // Check if we can statically find the method. + actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); } if (actual_method != nullptr) { @@ -322,7 +304,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()); if (ic.IsUninitialized()) { VLOG(compiler) << "Interface or virtual call to " - << PrettyMethod(method_index, caller_dex_file) + << caller_dex_file.PrettyMethod(method_index) << " is not hit and not inlined"; return false; } else if (ic.IsMonomorphic()) { @@ -340,7 +322,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { } else { DCHECK(ic.IsMegamorphic()); VLOG(compiler) << "Interface or virtual call to " - << PrettyMethod(method_index, caller_dex_file) + << caller_dex_file.PrettyMethod(method_index) << " is megamorphic and not inlined"; MaybeRecordStat(kMegamorphicCall); return false; @@ -349,7 +331,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { } VLOG(compiler) << "Interface or virtual call to " - << PrettyMethod(method_index, caller_dex_file) + << caller_dex_file.PrettyMethod(method_index) << " could not be statically determined"; return false; } @@ -384,7 +366,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, uint32_t class_index = FindClassIndexIn( ic.GetMonomorphicType(), caller_dex_file, caller_compilation_unit_.GetDexCache()); if (class_index == DexFile::kDexNoIndex) { - VLOG(compiler) << "Call to " << PrettyMethod(resolved_method) + VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method) << " from inline cache is not inlined because its class is not" << " accessible to the caller"; return false; @@ -452,7 +434,8 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, is_referrer, invoke_instruction->GetDexPc(), /* needs_access_check */ false, - /* is_in_dex_cache */ true); + /* is_in_dex_cache */ true, + /* is_in_boot_image */ false); HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class); // TODO: Extend reference type propagation to understand the guard. @@ -543,7 +526,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, } if (!one_target_inlined) { - VLOG(compiler) << "Call to " << PrettyMethod(resolved_method) + VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method) << " from inline cache is not inlined because none" << " of its targets could be inlined"; return false; @@ -677,7 +660,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, actual_method = new_method; } else if (actual_method != new_method) { // Different methods, bailout. - VLOG(compiler) << "Call to " << PrettyMethod(resolved_method) + VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method) << " from inline cache is not inlined because it resolves" << " to different methods"; return false; @@ -762,9 +745,9 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* metho // 2) We will not go to the conflict trampoline with an invoke-virtual. // TODO: Consider sharpening once it is not dependent on the compiler driver. const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); - uint32_t method_index = FindMethodIndexIn( + uint32_t dex_method_index = FindMethodIndexIn( method, caller_dex_file, invoke_instruction->GetDexMethodIndex()); - if (method_index == DexFile::kDexNoIndex) { + if (dex_method_index == DexFile::kDexNoIndex) { return false; } HInvokeVirtual* new_invoke = new (graph_->GetArena()) HInvokeVirtual( @@ -772,7 +755,8 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* metho invoke_instruction->GetNumberOfArguments(), invoke_instruction->GetType(), invoke_instruction->GetDexPc(), - method_index, + dex_method_index, + method, method->GetMethodIndex()); HInputsRef inputs = invoke_instruction->GetInputs(); for (size_t index = 0; index != inputs.size(); ++index) { @@ -810,7 +794,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, HInstruction** return_replacement) { if (method->IsProxyMethod()) { - VLOG(compiler) << "Method " << PrettyMethod(method) + VLOG(compiler) << "Method " << method->PrettyMethod() << " is not inlined because of unimplemented inline support for proxy methods."; return false; } @@ -820,11 +804,12 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, if (!compiler_driver_->MayInline(method->GetDexFile(), outer_compilation_unit_.GetDexFile())) { if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) { - VLOG(compiler) << "Successfully replaced pattern of invoke " << PrettyMethod(method); + VLOG(compiler) << "Successfully replaced pattern of invoke " + << method->PrettyMethod(); MaybeRecordStat(kReplacedInvokeWithSimplePattern); return true; } - VLOG(compiler) << "Won't inline " << PrettyMethod(method) << " in " + VLOG(compiler) << "Won't inline " << method->PrettyMethod() << " in " << outer_compilation_unit_.GetDexFile()->GetLocation() << " (" << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from " << method->GetDexFile()->GetLocation(); @@ -836,14 +821,14 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, const DexFile::CodeItem* code_item = method->GetCodeItem(); if (code_item == nullptr) { - VLOG(compiler) << "Method " << PrettyMethod(method) + VLOG(compiler) << "Method " << method->PrettyMethod() << " is not inlined because it is native"; return false; } size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (code_item->insns_size_in_code_units_ > inline_max_code_units) { - VLOG(compiler) << "Method " << PrettyMethod(method) + VLOG(compiler) << "Method " << method->PrettyMethod() << " is too big to inline: " << code_item->insns_size_in_code_units_ << " > " @@ -852,13 +837,13 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, } if (code_item->tries_size_ != 0) { - VLOG(compiler) << "Method " << PrettyMethod(method) + VLOG(compiler) << "Method " << method->PrettyMethod() << " is not inlined because of try block"; return false; } if (!method->IsCompilable()) { - VLOG(compiler) << "Method " << PrettyMethod(method) + VLOG(compiler) << "Method " << method->PrettyMethod() << " has soft failures un-handled by the compiler, so it cannot be inlined"; } @@ -867,7 +852,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, if (Runtime::Current()->UseJitCompilation() || !compiler_driver_->IsMethodVerifiedWithoutFailures( method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { - VLOG(compiler) << "Method " << PrettyMethod(method) + VLOG(compiler) << "Method " << method->PrettyMethod() << " couldn't be verified, so it cannot be inlined"; return false; } @@ -877,7 +862,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) { // Case of a static method that cannot be inlined because it implicitly // requires an initialization check of its declaring class. - VLOG(compiler) << "Method " << PrettyMethod(method) + VLOG(compiler) << "Method " << method->PrettyMethod() << " is not inlined because it is static and requires a clinit" << " check that cannot be emitted due to Dex cache limitations"; return false; @@ -887,14 +872,14 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - VLOG(compiler) << "Successfully inlined " << PrettyMethod(method); + VLOG(compiler) << "Successfully inlined " << method->PrettyMethod(); MaybeRecordStat(kInlinedInvoke); return true; } static HInstruction* GetInvokeInputForArgVRegIndex(HInvoke* invoke_instruction, size_t arg_vreg_index) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { size_t input_index = 0; for (size_t i = 0; i < arg_vreg_index; ++i, ++input_index) { DCHECK_LT(input_index, invoke_instruction->GetNumberOfArguments()); @@ -1030,7 +1015,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache, uint32_t field_index, HInstruction* obj) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size); DCHECK(resolved_field != nullptr); @@ -1058,7 +1043,7 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex uint32_t field_index, HInstruction* obj, HInstruction* value) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size); DCHECK(resolved_field != nullptr); @@ -1121,7 +1106,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } } - InvokeType invoke_type = invoke_instruction->GetOriginalInvokeType(); + InvokeType invoke_type = invoke_instruction->GetInvokeType(); if (invoke_type == kInterface) { // We have statically resolved the dispatch. To please the class linker // at runtime, we change this call as if it was a virtual call. @@ -1154,19 +1139,19 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, *code_item, compiler_driver_, inline_stats.get(), - resolved_method->GetQuickenedInfo(), + resolved_method->GetQuickenedInfo(class_linker->GetImagePointerSize()), dex_cache, handles_); if (builder.BuildGraph() != kAnalysisSuccess) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be built, so cannot be inlined"; return false; } if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " cannot be inlined because of the register allocator"; return false; } @@ -1216,7 +1201,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, // a throw predecessor. HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because it has an infinite loop"; return false; } @@ -1229,25 +1214,22 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } } if (has_throw_predecessor) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because one branch always throws"; return false; } - HReversePostOrderIterator it(*callee_graph); - it.Advance(); // Past the entry block, it does not contain instructions that prevent inlining. size_t number_of_instructions = 0; bool can_inline_environment = total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters; - for (; !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - + // Skip the entry block, it does not contain instructions that prevent inlining. + for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) { if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) { // Don't inline methods with irreducible loops, they could prevent some // optimizations to run. - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because it contains an irreducible loop"; return false; } @@ -1256,28 +1238,28 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, !instr_it.Done(); instr_it.Advance()) { if (number_of_instructions++ == number_of_instructions_budget) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " is not inlined because its caller has reached" << " its instruction budget limit."; return false; } HInstruction* current = instr_it.Current(); if (!can_inline_environment && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " is not inlined because its caller has reached" << " its environment budget limit."; return false; } if (!same_dex_file && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because " << current->DebugName() << " needs an environment and is in a different dex file"; return false; } if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because " << current->DebugName() << " it is in a different dex file and requires access to the dex cache"; return false; @@ -1285,7 +1267,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, if (current->IsNewInstance() && (current->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectWithAccessCheck)) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because it is using an entrypoint" << " with access checks"; // Allocation entrypoint does not handle inlined frames. @@ -1294,7 +1276,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, if (current->IsNewArray() && (current->AsNewArray()->GetEntrypoint() == kQuickAllocArrayWithAccessCheck)) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because it is using an entrypoint" << " with access checks"; // Allocation entrypoint does not handle inlined frames. @@ -1306,7 +1288,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, current->IsUnresolvedStaticFieldSet() || current->IsUnresolvedInstanceFieldSet()) { // Entrypoint for unresolved fields does not handle inlined frames. - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because it is using an unresolved" << " entrypoint"; return false; @@ -1337,7 +1319,7 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph, HConstantFolding fold(callee_graph); HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_); InstructionSimplifier simplify(callee_graph, stats_); - IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_); + IntrinsicsRecognizer intrinsics(callee_graph, stats_); HOptimization* optimizations[] = { &intrinsics, @@ -1374,7 +1356,7 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph, static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti, bool declared_can_be_null, HInstruction* actual_obj) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { if (declared_can_be_null && !actual_obj->CanBeNull()) { return true; } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 02d3a5f499..a1dcd58a84 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -38,7 +38,7 @@ class HInliner : public HOptimization { const DexCompilationUnit& outer_compilation_unit, const DexCompilationUnit& caller_compilation_unit, CompilerDriver* compiler_driver, - StackHandleScopeCollection* handles, + VariableSizedHandleScope* handles, OptimizingCompilerStats* stats, size_t total_number_of_dex_registers, size_t depth) @@ -64,12 +64,12 @@ class HInliner : public HOptimization { // reference type propagation can run after the inlining. If the inlining is successful, this // method will replace and remove the `invoke_instruction`. bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); bool TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, HInstruction** return_replacement) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); bool TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, @@ -86,7 +86,7 @@ class HInliner : public HOptimization { bool TryPatternSubstitution(HInvoke* invoke_instruction, ArtMethod* resolved_method, HInstruction** return_replacement) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); // Create a new HInstanceFieldGet. HInstanceFieldGet* CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache, @@ -105,38 +105,38 @@ class HInliner : public HOptimization { bool TryInlineMonomorphicCall(HInvoke* invoke_instruction, ArtMethod* resolved_method, const InlineCache& ic) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); // Try to inline targets of a polymorphic call. bool TryInlinePolymorphicCall(HInvoke* invoke_instruction, ArtMethod* resolved_method, const InlineCache& ic) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, ArtMethod* resolved_method, const InlineCache& ic) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker, HInstruction* receiver, uint32_t dex_pc) const - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); // Creates an instance of ReferenceTypeInfo from `klass` if `klass` is // admissible (see ReferenceTypePropagation::IsAdmissible for details). // Otherwise returns inexact Object RTI. - ReferenceTypeInfo GetClassRTI(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); + ReferenceTypeInfo GetClassRTI(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_); bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); // Add a type guard on the given `receiver`. This will add to the graph: // i0 = HFieldGet(receiver, klass) @@ -154,7 +154,7 @@ class HInliner : public HOptimization { bool is_referrer, HInstruction* invoke_instruction, bool with_deoptimization) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); /* * Ad-hoc implementation for implementing a diamond pattern in the graph for @@ -197,7 +197,7 @@ class HInliner : public HOptimization { const size_t total_number_of_dex_registers_; const size_t depth_; size_t number_of_inlined_instructions_; - StackHandleScopeCollection* const handles_; + VariableSizedHandleScope* const handles_; DISALLOW_COPY_AND_ASSIGN(HInliner); }; diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index e5dab569fd..c8c4ca76fd 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -21,7 +21,8 @@ #include "class_linker.h" #include "dex_instruction-inl.h" #include "driver/compiler_options.h" -#include "scoped_thread_state_change.h" +#include "imtable-inl.h" +#include "scoped_thread_state_change-inl.h" namespace art { @@ -80,8 +81,7 @@ void HInstructionBuilder::InitializeBlockLocals() { // locals (guaranteed by HGraphBuilder) and that all try blocks have been // visited already (from HTryBoundary scoping and reverse post order). bool catch_block_visited = false; - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); + for (HBasicBlock* current : graph_->GetReversePostOrder()) { if (current == current_block_) { catch_block_visited = true; } else if (current->IsTryBlock()) { @@ -275,8 +275,8 @@ bool HInstructionBuilder::Build() { FindNativeDebugInfoLocations(native_debug_info_locations); } - for (HReversePostOrderIterator block_it(*graph_); !block_it.Done(); block_it.Advance()) { - current_block_ = block_it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + current_block_ = block; uint32_t block_dex_pc = current_block_->GetDexPc(); InitializeBlockLocals(); @@ -674,7 +674,7 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker(); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); + soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader()))); Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass())); // We fetch the referenced class eagerly (that is, the class pointed by in the MethodId // at method_idx), as `CanAccessResolvedMethod` expects it be be in the dex cache. @@ -767,6 +767,11 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in return resolved_method; } +static bool IsStringConstructor(ArtMethod* method) { + ScopedObjectAccess soa(Thread::Current()); + return method->GetDeclaringClass()->IsStringClass() && method->IsConstructor(); +} + bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, uint32_t dex_pc, uint32_t method_idx, @@ -785,31 +790,46 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, number_of_arguments++; } - MethodReference target_method(dex_file_, method_idx); + ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type); + + if (UNLIKELY(resolved_method == nullptr)) { + MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod); + HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_, + number_of_arguments, + return_type, + dex_pc, + method_idx, + invoke_type); + return HandleInvoke(invoke, + number_of_vreg_arguments, + args, + register_index, + is_range, + descriptor, + nullptr, /* clinit_check */ + true /* is_unresolved */); + } - // Special handling for string init. - int32_t string_init_offset = 0; - bool is_string_init = compiler_driver_->IsStringInit(method_idx, - dex_file_, - &string_init_offset); // Replace calls to String.<init> with StringFactory. - if (is_string_init) { + if (IsStringConstructor(resolved_method)) { + uint32_t string_init_entry_point = WellKnownClasses::StringInitToEntryPoint(resolved_method); HInvokeStaticOrDirect::DispatchInfo dispatch_info = { HInvokeStaticOrDirect::MethodLoadKind::kStringInit, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - dchecked_integral_cast<uint64_t>(string_init_offset), + dchecked_integral_cast<uint64_t>(string_init_entry_point), 0U }; + MethodReference target_method(dex_file_, method_idx); HInvoke* invoke = new (arena_) HInvokeStaticOrDirect( arena_, number_of_arguments - 1, Primitive::kPrimNot /*return_type */, dex_pc, method_idx, - target_method, + nullptr, dispatch_info, invoke_type, - kStatic /* optimized_invoke_type */, + target_method, HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); return HandleStringInit(invoke, number_of_vreg_arguments, @@ -819,26 +839,6 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, descriptor); } - ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type); - - if (UNLIKELY(resolved_method == nullptr)) { - MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod); - HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_, - number_of_arguments, - return_type, - dex_pc, - method_idx, - invoke_type); - return HandleInvoke(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor, - nullptr, /* clinit_check */ - true /* is_unresolved */); - } - // Potential class initialization check, in the case of a static method call. HClinitCheck* clinit_check = nullptr; HInvoke* invoke = nullptr; @@ -853,10 +853,9 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, dex_pc, resolved_method, method_idx, &clinit_check_requirement); } else if (invoke_type == kSuper) { if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) { - // Update the target method to the one resolved. Note that this may be a no-op if + // Update the method index to the one resolved. Note that this may be a no-op if // we resolved to the method referenced by the instruction. method_idx = resolved_method->GetDexMethodIndex(); - target_method = MethodReference(dex_file_, method_idx); } } @@ -866,15 +865,17 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, 0u, 0U }; + MethodReference target_method(resolved_method->GetDexFile(), + resolved_method->GetDexMethodIndex()); invoke = new (arena_) HInvokeStaticOrDirect(arena_, number_of_arguments, return_type, dex_pc, method_idx, - target_method, + resolved_method, dispatch_info, invoke_type, - invoke_type, + target_method, clinit_check_requirement); } else if (invoke_type == kVirtual) { ScopedObjectAccess soa(Thread::Current()); // Needed for the method index @@ -883,16 +884,18 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, return_type, dex_pc, method_idx, + resolved_method, resolved_method->GetMethodIndex()); } else { DCHECK_EQ(invoke_type, kInterface); - ScopedObjectAccess soa(Thread::Current()); // Needed for the method index + ScopedObjectAccess soa(Thread::Current()); // Needed for the IMT index. invoke = new (arena_) HInvokeInterface(arena_, number_of_arguments, return_type, dex_pc, method_idx, - resolved_method->GetImtIndex()); + resolved_method, + ImTable::GetImtIndex(resolved_method)); } return HandleInvoke(invoke, @@ -935,7 +938,8 @@ bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) IsOutermostCompilingClass(type_index), dex_pc, needs_access_check, - /* is_in_dex_cache */ false); + /* is_in_dex_cache */ false, + /* is_in_boot_image */ false); AppendInstruction(load_class); HInstruction* cls = load_class; @@ -957,7 +961,7 @@ bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) } static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class); } @@ -1026,7 +1030,8 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( is_outer_class, dex_pc, /*needs_access_check*/ false, - /* is_in_dex_cache */ false); + /* is_in_dex_cache */ false, + /* is_in_boot_image */ false); AppendInstruction(load_class); clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); AppendInstruction(clinit_check); @@ -1059,7 +1064,7 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, // reject any class where this is violated. However, the verifier only does these checks // on non trivially dead instructions, so we just bailout the compilation. VLOG(compiler) << "Did not compile " - << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) + << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) << " because of non-sequential dex register pair in wide argument"; MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode); return false; @@ -1073,7 +1078,7 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, if (*argument_index != invoke->GetNumberOfArguments()) { VLOG(compiler) << "Did not compile " - << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) + << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) << " because of wrong number of arguments in invoke instruction"; MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode); return false; @@ -1101,7 +1106,7 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, size_t start_index = 0; size_t argument_index = 0; - if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) { // Instance call. + if (invoke->GetInvokeType() != InvokeType::kStatic) { // Instance call. uint32_t obj_reg = is_range ? register_index : args[0]; HInstruction* arg = is_unresolved ? LoadLocal(obj_reg, Primitive::kPrimNot) @@ -1278,7 +1283,7 @@ static mirror::Class* GetClassFrom(CompilerDriver* driver, ScopedObjectAccess soa(Thread::Current()); StackHandleScope<1> hs(soa.Self()); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader()))); + soa.Decode<mirror::ClassLoader>(compilation_unit.GetClassLoader()))); Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache(); return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit); @@ -1297,7 +1302,7 @@ bool HInstructionBuilder::IsOutermostCompilingClass(uint16_t type_index) const { StackHandleScope<3> hs(soa.Self()); Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache(); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); + soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader()))); Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass( soa, dex_cache, class_loader, type_index, dex_compilation_unit_))); Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass())); @@ -1338,7 +1343,7 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, StackHandleScope<3> hs(soa.Self()); Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache(); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); + soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader()))); ArtField* resolved_field = compiler_driver_->ResolveField( soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true); @@ -1384,7 +1389,8 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, is_outer_class, dex_pc, /*needs_access_check*/ false, - /* is_in_dex_cache */ false); + /* is_in_dex_cache */ false, + /* is_in_boot_image */ false); AppendInstruction(constant); HInstruction* cls = constant; @@ -1545,8 +1551,6 @@ void HInstructionBuilder::BuildFillArrayData(HInstruction* object, void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) { HInstruction* array = LoadNullCheckedLocal(instruction.VRegA_31t(), dex_pc); - HInstruction* length = new (arena_) HArrayLength(array, dex_pc); - AppendInstruction(length); int32_t payload_offset = instruction.VRegB_31t() + dex_pc; const Instruction::ArrayDataPayload* payload = @@ -1554,6 +1558,14 @@ void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uin const uint8_t* data = payload->data; uint32_t element_count = payload->element_count; + if (element_count == 0u) { + // For empty payload we emit only the null check above. + return; + } + + HInstruction* length = new (arena_) HArrayLength(array, dex_pc); + AppendInstruction(length); + // Implementation of this DEX instruction seems to be that the bounds check is // done before doing any stores. HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1, dex_pc); @@ -1607,7 +1619,7 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object, } static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { if (cls.Get() == nullptr) { return TypeCheckKind::kUnresolvedCheck; } else if (cls->IsInterface()) { @@ -1653,7 +1665,8 @@ void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, IsOutermostCompilingClass(type_index), dex_pc, !can_access, - /* is_in_dex_cache */ false); + /* is_in_dex_cache */ false, + /* is_in_boot_image */ false); AppendInstruction(cls); TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class); @@ -1803,7 +1816,20 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::MOVE_OBJECT: case Instruction::MOVE_OBJECT_16: case Instruction::MOVE_OBJECT_FROM16: { - HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot); + // The verifier has no notion of a null type, so a move-object of constant 0 + // will lead to the same constant 0 in the destination register. To mimic + // this behavior, we just pretend we haven't seen a type change (int to reference) + // for the 0 constant and phis. We rely on our type propagation to eventually get the + // types correct. + uint32_t reg_number = instruction.VRegB(); + HInstruction* value = (*current_locals_)[reg_number]; + if (value->IsIntConstant()) { + DCHECK_EQ(value->AsIntConstant()->GetValue(), 0); + } else if (value->IsPhi()) { + DCHECK(value->GetType() == Primitive::kPrimInt || value->GetType() == Primitive::kPrimNot); + } else { + value = LoadLocal(reg_number, Primitive::kPrimNot); + } UpdateLocal(instruction.VRegA(), value); break; } @@ -2628,7 +2654,8 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, IsOutermostCompilingClass(type_index), dex_pc, !can_access, - /* is_in_dex_cache */ false)); + /* is_in_dex_cache */ false, + /* is_in_boot_image */ false)); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction()); break; } @@ -2688,7 +2715,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, default: VLOG(compiler) << "Did not compile " - << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) + << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) << " because of unhandled instruction " << instruction.Name(); MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction); diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 517cf76831..aa34ddd1d1 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -103,7 +103,7 @@ class HInstructionBuilder : public ValueObject { bool NeedsAccessCheck(uint32_t type_index, Handle<mirror::DexCache> dex_cache, /*out*/bool* finalizable) const - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const; template<typename T> @@ -255,14 +255,14 @@ class HInstructionBuilder : public ValueObject { ArtMethod* method, uint32_t method_idx, HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); // Build a HNewInstance instruction. bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc); // Return whether the compiler can assume `cls` is initialized. bool IsInitialized(Handle<mirror::Class> cls) const - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); // Try to resolve a method using the class linker. Return null if a method could // not be resolved. diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 4ca0600dba..e4d280f26d 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -18,7 +18,7 @@ #include "intrinsics.h" #include "mirror/class-inl.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" namespace art { @@ -124,20 +124,16 @@ void InstructionSimplifier::Run() { void InstructionSimplifierVisitor::Run() { // Iterate in reverse post order to open up more simplifications to users // of instructions that got simplified. - for (HReversePostOrderIterator it(*GetGraph()); !it.Done();) { + for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { // The simplification of an instruction to another instruction may yield // possibilities for other simplifications. So although we perform a reverse // post order visit, we sometimes need to revisit an instruction index. - simplification_occurred_ = false; - VisitBasicBlock(it.Current()); - if (simplification_occurred_ && - (simplifications_at_current_position_ < kMaxSamePositionSimplifications)) { - // New simplifications may be applicable to the instruction at the - // current index, so don't advance the iterator. - continue; - } + do { + simplification_occurred_ = false; + VisitBasicBlock(block); + } while (simplification_occurred_ && + (simplifications_at_current_position_ < kMaxSamePositionSimplifications)); simplifications_at_current_position_ = 0; - it.Advance(); } } @@ -1577,6 +1573,18 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { return; } + if ((input_cst != nullptr) && input_cst->IsOne() + && input_other->GetType() == Primitive::kPrimBoolean) { + // Replace code looking like + // XOR dst, src, 1 + // with + // BOOLEAN_NOT dst, src + HBooleanNot* boolean_not = new (GetGraph()->GetArena()) HBooleanNot(input_other); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, boolean_not); + RecordSimplification(); + return; + } + if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) { // Replace code looking like // XOR dst, src, 0xFFF...FF @@ -1645,7 +1653,7 @@ void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left, Primitive::Type type) { DCHECK(invoke->IsInvokeStaticOrDirect()); - DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic); + DCHECK_EQ(invoke->GetInvokeType(), InvokeType::kStatic); HInstruction* value = invoke->InputAt(0); HInstruction* distance = invoke->InputAt(1); // Replace the invoke with an HRor. diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index 7905104ed4..7fe1067aa9 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -35,9 +35,9 @@ namespace art { */ class InstructionSimplifier : public HOptimization { public: - InstructionSimplifier(HGraph* graph, - OptimizingCompilerStats* stats = nullptr, - const char* name = kInstructionSimplifierPassName) + explicit InstructionSimplifier(HGraph* graph, + OptimizingCompilerStats* stats = nullptr, + const char* name = kInstructionSimplifierPassName) : HOptimization(graph, name, stats) {} static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index 495f3fd232..56e4c7a9c2 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -44,6 +44,14 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) { size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); Primitive::Type type = instruction->GetType(); + // TODO: Implement reading (length + compression) for String compression feature from + // negative offset (count_offset - data_offset). Thumb2Assembler does not support T4 + // encoding of "LDR (immediate)" at the moment. + // Don't move array pointer if it is charAt because we need to take the count first. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + return; + } + if (type == Primitive::kPrimLong || type == Primitive::kPrimFloat || type == Primitive::kPrimDouble) { diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index 3d297dacc0..782110c40a 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -48,7 +48,9 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { class InstructionSimplifierArm : public HOptimization { public: InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, "instruction_simplifier_arm", stats) {} + : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {} + + static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm"; void Run() OVERRIDE { InstructionSimplifierArmVisitor visitor(graph_, stats_); diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 6d107d571f..d0dd650024 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -140,6 +140,13 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) { void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + // Don't move the array pointer if it is charAt because we need to take the count first. + // TODO: Implement reading (length + compression) for String compression feature from + // negative offset (count_offset - data_offset) using LDP and clobbering an extra temporary. + // Note that "LDR (Immediate)" does not have a "signed offset" encoding. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + return; + } if (TryExtractArrayAccessAddress(instruction, instruction->GetArray(), instruction->GetIndex(), diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 28648b3bea..f71684efe9 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -82,8 +82,9 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { class InstructionSimplifierArm64 : public HOptimization { public: InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, "instruction_simplifier_arm64", stats) {} - + : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {} + static constexpr const char* kInstructionSimplifierArm64PassName + = "instruction_simplifier_arm64"; void Run() OVERRIDE { InstructionSimplifierArm64Visitor visitor(graph_, stats_); visitor.VisitReversePostOrder(); diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index 8f7778fe68..04e063c92e 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -259,7 +259,8 @@ bool TryExtractArrayAccessAddress(HInstruction* access, HIntConstant* offset = graph->GetIntConstant(data_offset); HIntermediateAddress* address = new (arena) HIntermediateAddress(array, offset, kNoDexPc); - address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); + // TODO: Is it ok to not have this on the intermediate address? + // address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); access->GetBlock()->InsertInstructionBefore(address, access); access->ReplaceInput(address, 0); // Both instructions must depend on GC to prevent any instruction that can diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 418d59c6cb..fc6ff7b197 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -18,14 +18,11 @@ #include "art_method.h" #include "class_linker.h" -#include "dex/quick/dex_file_method_inliner.h" -#include "dex/quick/dex_file_to_method_inliner_map.h" #include "driver/compiler_driver.h" #include "invoke_type.h" #include "mirror/dex_cache-inl.h" #include "nodes.h" -#include "quick/inline_method_analyser.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" #include "thread-inl.h" #include "utils.h" @@ -36,7 +33,7 @@ static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) { switch (i) { case Intrinsics::kNone: return kInterface; // Non-sensical for intrinsic. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return IsStatic; #include "intrinsics_list.h" @@ -52,7 +49,7 @@ static inline IntrinsicNeedsEnvironmentOrCache NeedsEnvironmentOrCache(Intrinsic switch (i) { case Intrinsics::kNone: return kNeedsEnvironmentOrCache; // Non-sensical for intrinsic. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return NeedsEnvironmentOrCache; #include "intrinsics_list.h" @@ -68,7 +65,7 @@ static inline IntrinsicSideEffects GetSideEffects(Intrinsics i) { switch (i) { case Intrinsics::kNone: return kAllSideEffects; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return SideEffects; #include "intrinsics_list.h" @@ -84,7 +81,7 @@ static inline IntrinsicExceptions GetExceptions(Intrinsics i) { switch (i) { case Intrinsics::kNone: return kCanThrow; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return Exceptions; #include "intrinsics_list.h" @@ -95,430 +92,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) return kCanThrow; } -static Primitive::Type GetType(uint64_t data, bool is_op_size) { - if (is_op_size) { - switch (static_cast<OpSize>(data)) { - case kSignedByte: - return Primitive::kPrimByte; - case kSignedHalf: - return Primitive::kPrimShort; - case k32: - return Primitive::kPrimInt; - case k64: - return Primitive::kPrimLong; - default: - LOG(FATAL) << "Unknown/unsupported op size " << data; - UNREACHABLE(); - } - } else { - if ((data & kIntrinsicFlagIsLong) != 0) { - return Primitive::kPrimLong; - } - if ((data & kIntrinsicFlagIsObject) != 0) { - return Primitive::kPrimNot; - } - return Primitive::kPrimInt; - } -} - -static Intrinsics GetIntrinsic(InlineMethod method) { - switch (method.opcode) { - // Floating-point conversions. - case kIntrinsicDoubleCvt: - return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ? - Intrinsics::kDoubleDoubleToRawLongBits : Intrinsics::kDoubleLongBitsToDouble; - case kIntrinsicFloatCvt: - return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ? - Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat; - case kIntrinsicFloat2Int: - return Intrinsics::kFloatFloatToIntBits; - case kIntrinsicDouble2Long: - return Intrinsics::kDoubleDoubleToLongBits; - - // Floating-point tests. - case kIntrinsicFloatIsInfinite: - return Intrinsics::kFloatIsInfinite; - case kIntrinsicDoubleIsInfinite: - return Intrinsics::kDoubleIsInfinite; - case kIntrinsicFloatIsNaN: - return Intrinsics::kFloatIsNaN; - case kIntrinsicDoubleIsNaN: - return Intrinsics::kDoubleIsNaN; - - // Bit manipulations. - case kIntrinsicReverseBits: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerReverse; - case Primitive::kPrimLong: - return Intrinsics::kLongReverse; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicReverseBytes: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimShort: - return Intrinsics::kShortReverseBytes; - case Primitive::kPrimInt: - return Intrinsics::kIntegerReverseBytes; - case Primitive::kPrimLong: - return Intrinsics::kLongReverseBytes; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicRotateRight: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerRotateRight; - case Primitive::kPrimLong: - return Intrinsics::kLongRotateRight; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicRotateLeft: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerRotateLeft; - case Primitive::kPrimLong: - return Intrinsics::kLongRotateLeft; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - - // Misc data processing. - case kIntrinsicBitCount: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerBitCount; - case Primitive::kPrimLong: - return Intrinsics::kLongBitCount; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicCompare: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerCompare; - case Primitive::kPrimLong: - return Intrinsics::kLongCompare; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicHighestOneBit: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerHighestOneBit; - case Primitive::kPrimLong: - return Intrinsics::kLongHighestOneBit; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicLowestOneBit: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerLowestOneBit; - case Primitive::kPrimLong: - return Intrinsics::kLongLowestOneBit; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicNumberOfLeadingZeros: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerNumberOfLeadingZeros; - case Primitive::kPrimLong: - return Intrinsics::kLongNumberOfLeadingZeros; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicNumberOfTrailingZeros: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerNumberOfTrailingZeros; - case Primitive::kPrimLong: - return Intrinsics::kLongNumberOfTrailingZeros; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicSignum: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerSignum; - case Primitive::kPrimLong: - return Intrinsics::kLongSignum; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - - // Abs. - case kIntrinsicAbsDouble: - return Intrinsics::kMathAbsDouble; - case kIntrinsicAbsFloat: - return Intrinsics::kMathAbsFloat; - case kIntrinsicAbsInt: - return Intrinsics::kMathAbsInt; - case kIntrinsicAbsLong: - return Intrinsics::kMathAbsLong; - - // Min/max. - case kIntrinsicMinMaxDouble: - return ((method.d.data & kIntrinsicFlagMin) == 0) ? - Intrinsics::kMathMaxDoubleDouble : Intrinsics::kMathMinDoubleDouble; - case kIntrinsicMinMaxFloat: - return ((method.d.data & kIntrinsicFlagMin) == 0) ? - Intrinsics::kMathMaxFloatFloat : Intrinsics::kMathMinFloatFloat; - case kIntrinsicMinMaxInt: - return ((method.d.data & kIntrinsicFlagMin) == 0) ? - Intrinsics::kMathMaxIntInt : Intrinsics::kMathMinIntInt; - case kIntrinsicMinMaxLong: - return ((method.d.data & kIntrinsicFlagMin) == 0) ? - Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong; - - // More math builtins. - case kIntrinsicCos: - return Intrinsics::kMathCos; - case kIntrinsicSin: - return Intrinsics::kMathSin; - case kIntrinsicAcos: - return Intrinsics::kMathAcos; - case kIntrinsicAsin: - return Intrinsics::kMathAsin; - case kIntrinsicAtan: - return Intrinsics::kMathAtan; - case kIntrinsicAtan2: - return Intrinsics::kMathAtan2; - case kIntrinsicCbrt: - return Intrinsics::kMathCbrt; - case kIntrinsicCosh: - return Intrinsics::kMathCosh; - case kIntrinsicExp: - return Intrinsics::kMathExp; - case kIntrinsicExpm1: - return Intrinsics::kMathExpm1; - case kIntrinsicHypot: - return Intrinsics::kMathHypot; - case kIntrinsicLog: - return Intrinsics::kMathLog; - case kIntrinsicLog10: - return Intrinsics::kMathLog10; - case kIntrinsicNextAfter: - return Intrinsics::kMathNextAfter; - case kIntrinsicSinh: - return Intrinsics::kMathSinh; - case kIntrinsicTan: - return Intrinsics::kMathTan; - case kIntrinsicTanh: - return Intrinsics::kMathTanh; - - // Misc math. - case kIntrinsicSqrt: - return Intrinsics::kMathSqrt; - case kIntrinsicCeil: - return Intrinsics::kMathCeil; - case kIntrinsicFloor: - return Intrinsics::kMathFloor; - case kIntrinsicRint: - return Intrinsics::kMathRint; - case kIntrinsicRoundDouble: - return Intrinsics::kMathRoundDouble; - case kIntrinsicRoundFloat: - return Intrinsics::kMathRoundFloat; - - // System.arraycopy. - case kIntrinsicSystemArrayCopyCharArray: - return Intrinsics::kSystemArrayCopyChar; - - case kIntrinsicSystemArrayCopy: - return Intrinsics::kSystemArrayCopy; - - // Thread.currentThread. - case kIntrinsicCurrentThread: - return Intrinsics::kThreadCurrentThread; - - // Memory.peek. - case kIntrinsicPeek: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimByte: - return Intrinsics::kMemoryPeekByte; - case Primitive::kPrimShort: - return Intrinsics::kMemoryPeekShortNative; - case Primitive::kPrimInt: - return Intrinsics::kMemoryPeekIntNative; - case Primitive::kPrimLong: - return Intrinsics::kMemoryPeekLongNative; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - - // Memory.poke. - case kIntrinsicPoke: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimByte: - return Intrinsics::kMemoryPokeByte; - case Primitive::kPrimShort: - return Intrinsics::kMemoryPokeShortNative; - case Primitive::kPrimInt: - return Intrinsics::kMemoryPokeIntNative; - case Primitive::kPrimLong: - return Intrinsics::kMemoryPokeLongNative; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - - // String. - case kIntrinsicCharAt: - return Intrinsics::kStringCharAt; - case kIntrinsicCompareTo: - return Intrinsics::kStringCompareTo; - case kIntrinsicEquals: - return Intrinsics::kStringEquals; - case kIntrinsicGetCharsNoCheck: - return Intrinsics::kStringGetCharsNoCheck; - case kIntrinsicIsEmptyOrLength: - return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ? - Intrinsics::kStringLength : Intrinsics::kStringIsEmpty; - case kIntrinsicIndexOf: - return ((method.d.data & kIntrinsicFlagBase0) == 0) ? - Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf; - case kIntrinsicNewStringFromBytes: - return Intrinsics::kStringNewStringFromBytes; - case kIntrinsicNewStringFromChars: - return Intrinsics::kStringNewStringFromChars; - case kIntrinsicNewStringFromString: - return Intrinsics::kStringNewStringFromString; - - case kIntrinsicCas: - switch (GetType(method.d.data, false)) { - case Primitive::kPrimNot: - return Intrinsics::kUnsafeCASObject; - case Primitive::kPrimInt: - return Intrinsics::kUnsafeCASInt; - case Primitive::kPrimLong: - return Intrinsics::kUnsafeCASLong; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicUnsafeGet: { - const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile); - switch (GetType(method.d.data, false)) { - case Primitive::kPrimInt: - return is_volatile ? Intrinsics::kUnsafeGetVolatile : Intrinsics::kUnsafeGet; - case Primitive::kPrimLong: - return is_volatile ? Intrinsics::kUnsafeGetLongVolatile : Intrinsics::kUnsafeGetLong; - case Primitive::kPrimNot: - return is_volatile ? Intrinsics::kUnsafeGetObjectVolatile : Intrinsics::kUnsafeGetObject; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - } - case kIntrinsicUnsafePut: { - enum Sync { kNoSync, kVolatile, kOrdered }; - const Sync sync = - ((method.d.data & kIntrinsicFlagIsVolatile) != 0) ? kVolatile : - ((method.d.data & kIntrinsicFlagIsOrdered) != 0) ? kOrdered : - kNoSync; - switch (GetType(method.d.data, false)) { - case Primitive::kPrimInt: - switch (sync) { - case kNoSync: - return Intrinsics::kUnsafePut; - case kVolatile: - return Intrinsics::kUnsafePutVolatile; - case kOrdered: - return Intrinsics::kUnsafePutOrdered; - } - break; - case Primitive::kPrimLong: - switch (sync) { - case kNoSync: - return Intrinsics::kUnsafePutLong; - case kVolatile: - return Intrinsics::kUnsafePutLongVolatile; - case kOrdered: - return Intrinsics::kUnsafePutLongOrdered; - } - break; - case Primitive::kPrimNot: - switch (sync) { - case kNoSync: - return Intrinsics::kUnsafePutObject; - case kVolatile: - return Intrinsics::kUnsafePutObjectVolatile; - case kOrdered: - return Intrinsics::kUnsafePutObjectOrdered; - } - break; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - break; - } - - // 1.8. - case kIntrinsicUnsafeGetAndAddInt: - return Intrinsics::kUnsafeGetAndAddInt; - case kIntrinsicUnsafeGetAndAddLong: - return Intrinsics::kUnsafeGetAndAddLong; - case kIntrinsicUnsafeGetAndSetInt: - return Intrinsics::kUnsafeGetAndSetInt; - case kIntrinsicUnsafeGetAndSetLong: - return Intrinsics::kUnsafeGetAndSetLong; - case kIntrinsicUnsafeGetAndSetObject: - return Intrinsics::kUnsafeGetAndSetObject; - case kIntrinsicUnsafeLoadFence: - return Intrinsics::kUnsafeLoadFence; - case kIntrinsicUnsafeStoreFence: - return Intrinsics::kUnsafeStoreFence; - case kIntrinsicUnsafeFullFence: - return Intrinsics::kUnsafeFullFence; - - // Virtual cases. - - case kIntrinsicReferenceGetReferent: - return Intrinsics::kReferenceGetReferent; - - // Quick inliner cases. Remove after refactoring. They are here so that we can use the - // compiler to warn on missing cases. - - case kInlineOpNop: - case kInlineOpReturnArg: - case kInlineOpNonWideConst: - case kInlineOpIGet: - case kInlineOpIPut: - case kInlineOpConstructor: - return Intrinsics::kNone; - - // String init cases, not intrinsics. - - case kInlineStringInit: - return Intrinsics::kNone; - - // No default case to make the compiler warn on missing cases. - } - return Intrinsics::kNone; -} - -static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile& dex_file) { - // The DexFileMethodInliner should have checked whether the methods are agreeing with - // what we expect, i.e., static methods are called as such. Add another check here for - // our expectations: - // +static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) { // Whenever the intrinsic is marked as static, report an error if we find an InvokeVirtual. // // Whenever the intrinsic is marked as direct and we find an InvokeVirtual, a devirtualization @@ -532,9 +106,7 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile // inline. If the precise type is known, however, the instruction will be sharpened to an // InvokeStaticOrDirect. InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic); - InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ? - invoke->AsInvokeStaticOrDirect()->GetOptimizedInvokeType() : - invoke->IsInvokeVirtual() ? kVirtual : kSuper; + InvokeType invoke_type = invoke->GetInvokeType(); switch (intrinsic_type) { case kStatic: return (invoke_type == kStatic); @@ -544,13 +116,9 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile return true; } if (invoke_type == kVirtual) { - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + ArtMethod* art_method = invoke->GetResolvedMethod(); ScopedObjectAccess soa(Thread::Current()); - ArtMethod* art_method = - class_linker->FindDexCache(soa.Self(), dex_file)->GetResolvedMethod( - invoke->GetDexMethodIndex(), class_linker->GetImagePointerSize()); - return art_method != nullptr && - (art_method->IsFinal() || art_method->GetDeclaringClass()->IsFinal()); + return (art_method->IsFinal() || art_method->GetDeclaringClass()->IsFinal()); } return false; @@ -563,35 +131,28 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile } } -// TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod. void IntrinsicsRecognizer::Run() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + ScopedObjectAccess soa(Thread::Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { HInstruction* inst = inst_it.Current(); if (inst->IsInvoke()) { HInvoke* invoke = inst->AsInvoke(); - InlineMethod method; - const DexFile& dex_file = invoke->GetDexFile(); - DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(&dex_file); - DCHECK(inliner != nullptr); - if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) { - Intrinsics intrinsic = GetIntrinsic(method); - - if (intrinsic != Intrinsics::kNone) { - if (!CheckInvokeType(intrinsic, invoke, dex_file)) { - LOG(WARNING) << "Found an intrinsic with unexpected invoke type: " - << intrinsic << " for " - << PrettyMethod(invoke->GetDexMethodIndex(), invoke->GetDexFile()) - << invoke->DebugName(); - } else { - invoke->SetIntrinsic(intrinsic, - NeedsEnvironmentOrCache(intrinsic), - GetSideEffects(intrinsic), - GetExceptions(intrinsic)); - MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized); - } + ArtMethod* art_method = invoke->GetResolvedMethod(); + if (art_method != nullptr && art_method->IsIntrinsic()) { + Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic()); + if (!CheckInvokeType(intrinsic, invoke)) { + LOG(WARNING) << "Found an intrinsic with unexpected invoke type: " + << intrinsic << " for " + << invoke->GetDexFile().PrettyMethod(invoke->GetDexMethodIndex()) + << invoke->DebugName(); + } else { + invoke->SetIntrinsic(intrinsic, + NeedsEnvironmentOrCache(intrinsic), + GetSideEffects(intrinsic), + GetExceptions(intrinsic)); + MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized); } } } @@ -604,7 +165,7 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { case Intrinsics::kNone: os << "None"; break; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ os << # Name; \ break; diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 3429a8fdbb..1e73cf67df 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -27,9 +27,6 @@ namespace art { class CompilerDriver; class DexFile; -// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751 -static constexpr bool kRoundIsPlusPointFive = false; - // Positive floating-point infinities. static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U; static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000); @@ -37,17 +34,14 @@ static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000) // Recognize intrinsics from HInvoke nodes. class IntrinsicsRecognizer : public HOptimization { public: - IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats) - : HOptimization(graph, kIntrinsicsRecognizerPassName, stats), - driver_(driver) {} + IntrinsicsRecognizer(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, kIntrinsicsRecognizerPassName, stats) {} void Run() OVERRIDE; static constexpr const char* kIntrinsicsRecognizerPassName = "intrinsics_recognition"; private: - CompilerDriver* driver_; - DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer); }; @@ -61,7 +55,7 @@ class IntrinsicVisitor : public ValueObject { switch (invoke->GetIntrinsic()) { case Intrinsics::kNone: return; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, ...) \ case Intrinsics::k ## Name: \ Visit ## Name(invoke); \ return; @@ -76,7 +70,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, ...) \ virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } #include "intrinsics_list.h" @@ -246,6 +240,24 @@ UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence) \ UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence) \ UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence) +template <typename IntrinsicLocationsBuilder, typename Codegenerator> +bool IsCallFreeIntrinsic(HInvoke* invoke, Codegenerator* codegen) { + if (invoke->GetIntrinsic() != Intrinsics::kNone) { + // This invoke may have intrinsic code generation defined. However, we must + // now also determine if this code generation is truly there and call-free + // (not unimplemented, no bail on instruction features, or call on slow path). + // This is done by actually calling the locations builder on the instruction + // and clearing out the locations once result is known. We assume this + // call only has creating locations as side effects! + // TODO: Avoid wasting Arena memory. + IntrinsicLocationsBuilder builder(codegen); + bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall(); + invoke->SetLocations(nullptr); + return success; + } + return false; +} + } // namespace art #endif // ART_COMPILER_OPTIMIZING_INTRINSICS_H_ diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index be061f53f7..8790c1e4f1 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -41,6 +41,97 @@ ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() { using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>; +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode { + public: + explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction) + : SlowPathCode(instruction) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + Register dest = locations->InAt(2).AsRegister<Register>(); + Location dest_pos = locations->InAt(3); + Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>(); + Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>(); + Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>(); + Register tmp = locations->GetTemp(3).AsRegister<Register>(); + + __ Bind(GetEntryLabel()); + // Compute the base destination address in `dst_curr_addr`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(dst_curr_addr, dest, element_size * constant + offset); + } else { + __ add(dst_curr_addr, + dest, + ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); + __ AddConstant(dst_curr_addr, offset); + } + + Label loop; + __ Bind(&loop); + __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex)); + __ MaybeUnpoisonHeapReference(tmp); + // TODO: Inline the mark bit check before calling the runtime? + // tmp = ReadBarrier::Mark(tmp); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more + // explanations.) + DCHECK_NE(tmp, SP); + DCHECK_NE(tmp, LR); + DCHECK_NE(tmp, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary (and not preserved). It thus cannot be used by + // any live register in this slow path. + DCHECK_NE(src_curr_addr, IP); + DCHECK_NE(dst_curr_addr, IP); + DCHECK_NE(src_stop_addr, IP); + DCHECK_NE(tmp, IP); + DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp; + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + __ MaybePoisonHeapReference(tmp); + __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex)); + __ cmp(src_curr_addr, ShifterOperand(src_stop_addr)); + __ b(&loop, NE); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM); +}; + +#undef __ + +IntrinsicLocationsBuilderARM::IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen) + : arena_(codegen->GetGraph()->GetArena()), + assembler_(codegen->GetAssembler()), + features_(codegen->GetInstructionSetFeatures()) {} + bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); @@ -561,15 +652,18 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier. @@ -797,8 +891,13 @@ void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke, Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -807,36 +906,65 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, locations->SetInAt(4, Location::RequiresRegister()); // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. - Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot) + // operations to potentially clobber the output. Likewise when + // emitting a (Baker) read barrier, which may call. + Location::OutputOverlap overlaps = + ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) ? Location::kOutputOverlap : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), overlaps); + // Temporary registers used in CAS. In the object case + // (UnsafeCASObject intrinsic), these are also used for + // card-marking, and possibly for (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Pointer. locations->AddTemp(Location::RequiresRegister()); // Temp 1. } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) { +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* codegen) { DCHECK_NE(type, Primitive::kPrimLong); ArmAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); // Boolean result. + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); // Boolean result. - Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. - Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Offset (discard high 4B). - Register expected_lo = locations->InAt(3).AsRegister<Register>(); // Expected. - Register value_lo = locations->InAt(4).AsRegister<Register>(); // Value. + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = offset_loc.AsRegisterPairLow<Register>(); // Offset (discard high 4B). + Register expected = locations->InAt(3).AsRegister<Register>(); // Expected. + Register value = locations->InAt(4).AsRegister<Register>(); // Value. - Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>(); // Pointer to actual memory. - Register tmp_lo = locations->GetTemp(1).AsRegister<Register>(); // Value in memory. + Location tmp_ptr_loc = locations->GetTemp(0); + Register tmp_ptr = tmp_ptr_loc.AsRegister<Register>(); // Pointer to actual memory. + Register tmp = locations->GetTemp(1).AsRegister<Register>(); // Value in memory. if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged // object and scan the receiver at the next GC for nothing. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo, value_can_be_null); + codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + ScaleFactor::TIMES_1, + tmp_ptr_loc, + /* needs_null_check */ false, + /* always_update_field */ true, + &tmp); + } } // Prevent reordering with prior memory operations. @@ -848,12 +976,12 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ add(tmp_ptr, base, ShifterOperand(offset)); if (kPoisonHeapReferences && type == Primitive::kPrimNot) { - codegen->GetAssembler()->PoisonHeapReference(expected_lo); - if (value_lo == expected_lo) { - // Do not poison `value_lo`, as it is the same register as - // `expected_lo`, which has just been poisoned. + __ PoisonHeapReference(expected); + if (value == expected) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. } else { - codegen->GetAssembler()->PoisonHeapReference(value_lo); + __ PoisonHeapReference(value); } } @@ -865,37 +993,29 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat Label loop_head; __ Bind(&loop_head); - // TODO: When `type == Primitive::kPrimNot`, add a read barrier for - // the reference stored in the object before attempting the CAS, - // similar to the one in the art::Unsafe_compareAndSwapObject JNI - // implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject). - DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); - __ ldrex(tmp_lo, tmp_ptr); + __ ldrex(tmp, tmp_ptr); - __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); + __ subs(tmp, tmp, ShifterOperand(expected)); __ it(EQ, ItState::kItT); - __ strex(tmp_lo, value_lo, tmp_ptr, EQ); - __ cmp(tmp_lo, ShifterOperand(1), EQ); + __ strex(tmp, value, tmp_ptr, EQ); + __ cmp(tmp, ShifterOperand(1), EQ); __ b(&loop_head, EQ); __ dmb(ISH); - __ rsbs(out, tmp_lo, ShifterOperand(1)); + __ rsbs(out, tmp, ShifterOperand(1)); __ it(CC); __ mov(out, ShifterOperand(0), CC); if (kPoisonHeapReferences && type == Primitive::kPrimNot) { - codegen->GetAssembler()->UnpoisonHeapReference(expected_lo); - if (value_lo == expected_lo) { - // Do not unpoison `value_lo`, as it is the same register as - // `expected_lo`, which has just been unpoisoned. + __ UnpoisonHeapReference(expected); + if (value == expected) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. } else { - codegen->GetAssembler()->UnpoisonHeapReference(value_lo); + __ UnpoisonHeapReference(value); } } } @@ -904,33 +1024,23 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + GenCas(invoke, Primitive::kPrimNot, codegen_); } void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) { @@ -945,6 +1055,11 @@ void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + // Need temporary registers for String compression's feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } @@ -959,10 +1074,16 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { Register temp0 = locations->GetTemp(0).AsRegister<Register>(); Register temp1 = locations->GetTemp(1).AsRegister<Register>(); Register temp2 = locations->GetTemp(2).AsRegister<Register>(); + Register temp3, temp4; + if (mirror::kUseStringCompression) { + temp3 = locations->GetTemp(3).AsRegister<Register>(); + temp4 = locations->GetTemp(4).AsRegister<Register>(); + } Label loop; Label find_char_diff; Label end; + Label different_compression; // Get offsets of count and value fields within a string object. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); @@ -983,20 +1104,40 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { // Reference equality check, return 0 if same reference. __ subs(out, str, ShifterOperand(arg)); __ b(&end, EQ); - // Load lengths of this and argument strings. - __ ldr(temp2, Address(str, count_offset)); - __ ldr(temp1, Address(arg, count_offset)); + if (mirror::kUseStringCompression) { + // Load lengths of this and argument strings. + __ ldr(temp3, Address(str, count_offset)); + __ ldr(temp4, Address(arg, count_offset)); + // Clean out compression flag from lengths. + __ bic(temp0, temp3, ShifterOperand(0x80000000)); + __ bic(IP, temp4, ShifterOperand(0x80000000)); + } else { + // Load lengths of this and argument strings. + __ ldr(temp0, Address(str, count_offset)); + __ ldr(IP, Address(arg, count_offset)); + } // out = length diff. - __ subs(out, temp2, ShifterOperand(temp1)); + __ subs(out, temp0, ShifterOperand(IP)); // temp0 = min(len(str), len(arg)). - __ it(Condition::LT, kItElse); - __ mov(temp0, ShifterOperand(temp2), Condition::LT); - __ mov(temp0, ShifterOperand(temp1), Condition::GE); + __ it(GT); + __ mov(temp0, ShifterOperand(IP), GT); // Shorter string is empty? __ CompareAndBranchIfZero(temp0, &end); + if (mirror::kUseStringCompression) { + // Check if both strings using same compression style to use this comparison loop. + __ eors(temp3, temp3, ShifterOperand(temp4)); + __ b(&different_compression, MI); + } // Store offset of string value in preparation for comparison loop. __ mov(temp1, ShifterOperand(value_offset)); + if (mirror::kUseStringCompression) { + // For string compression, calculate the number of bytes to compare (not chars). + // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. + __ cmp(temp4, ShifterOperand(0)); + __ it(GE); + __ add(temp0, temp0, ShifterOperand(temp0), GE); + } // Assertions that must hold in order to compare multiple characters at a time. CHECK_ALIGNED(value_offset, 8); @@ -1006,6 +1147,7 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); DCHECK_EQ(char_size, 2u); + Label find_char_diff_2nd_cmp; // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment). __ Bind(&loop); __ ldr(IP, Address(str, temp1)); @@ -1013,43 +1155,113 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { __ cmp(IP, ShifterOperand(temp2)); __ b(&find_char_diff, NE); __ add(temp1, temp1, ShifterOperand(char_size * 2)); - __ sub(temp0, temp0, ShifterOperand(2)); __ ldr(IP, Address(str, temp1)); __ ldr(temp2, Address(arg, temp1)); __ cmp(IP, ShifterOperand(temp2)); - __ b(&find_char_diff, NE); + __ b(&find_char_diff_2nd_cmp, NE); __ add(temp1, temp1, ShifterOperand(char_size * 2)); - __ subs(temp0, temp0, ShifterOperand(2)); - - __ b(&loop, GT); + // With string compression, we have compared 8 bytes, otherwise 4 chars. + __ subs(temp0, temp0, ShifterOperand(mirror::kUseStringCompression ? 8 : 4)); + __ b(&loop, HI); __ b(&end); - // Find the single 16-bit character difference. + __ Bind(&find_char_diff_2nd_cmp); + if (mirror::kUseStringCompression) { + __ subs(temp0, temp0, ShifterOperand(4)); // 4 bytes previously compared. + __ b(&end, LS); // Was the second comparison fully beyond the end? + } else { + // Without string compression, we can start treating temp0 as signed + // and rely on the signed comparison below. + __ sub(temp0, temp0, ShifterOperand(2)); + } + + // Find the single character difference. __ Bind(&find_char_diff); // Get the bit position of the first character that differs. __ eor(temp1, temp2, ShifterOperand(IP)); __ rbit(temp1, temp1); __ clz(temp1, temp1); - // temp0 = number of 16-bit characters remaining to compare. - // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and - // after the end of the shorter string data). - - // (temp1 >> 4) = character where difference occurs between the last two words compared, on the - // interval [0,1] (0 for low half-word different, 1 for high half-word different). - - // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just - // return length diff (out). - __ cmp(temp0, ShifterOperand(temp1, LSR, 4)); - __ b(&end, LE); + // temp0 = number of characters remaining to compare. + // (Without string compression, it could be < 1 if a difference is found by the second CMP + // in the comparison loop, and after the end of the shorter string data). + + // Without string compression (temp1 >> 4) = character where difference occurs between the last + // two words compared, in the interval [0,1]. + // (0 for low half-word different, 1 for high half-word different). + // With string compression, (temp1 << 3) = byte where the difference occurs, + // in the interval [0,3]. + + // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside + // the remaining string data, so just return length diff (out). + // The comparison is unsigned for string compression, otherwise signed. + __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4)); + __ b(&end, mirror::kUseStringCompression ? LS : LE); // Extract the characters and calculate the difference. + Label uncompressed_string, continue_process; + if (mirror::kUseStringCompression) { + __ cmp(temp4, ShifterOperand(0)); + __ b(&uncompressed_string, GE); + __ bic(temp1, temp1, ShifterOperand(0x7)); + __ b(&continue_process); + } + __ Bind(&uncompressed_string); __ bic(temp1, temp1, ShifterOperand(0xf)); + __ Bind(&continue_process); + __ Lsr(temp2, temp2, temp1); __ Lsr(IP, IP, temp1); + Label calculate_difference, uncompressed_string_extract_chars; + if (mirror::kUseStringCompression) { + __ cmp(temp4, ShifterOperand(0)); + __ b(&uncompressed_string_extract_chars, GE); + __ ubfx(temp2, temp2, 0, 8); + __ ubfx(IP, IP, 0, 8); + __ b(&calculate_difference); + } + __ Bind(&uncompressed_string_extract_chars); __ movt(temp2, 0); __ movt(IP, 0); + __ Bind(&calculate_difference); __ sub(out, IP, ShifterOperand(temp2)); + __ b(&end); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + Label loop_arg_compressed, loop_this_compressed, find_diff; + // Comparison for different compression style. + // This part is when THIS is compressed and ARG is not. + __ Bind(&different_compression); + __ add(temp2, str, ShifterOperand(value_offset)); + __ add(temp3, arg, ShifterOperand(value_offset)); + __ cmp(temp4, ShifterOperand(0)); + __ b(&loop_arg_compressed, LT); + + __ Bind(&loop_this_compressed); + __ ldrb(IP, Address(temp2, c_char_size, Address::PostIndex)); + __ ldrh(temp4, Address(temp3, char_size, Address::PostIndex)); + __ cmp(IP, ShifterOperand(temp4)); + __ b(&find_diff, NE); + __ subs(temp0, temp0, ShifterOperand(1)); + __ b(&loop_this_compressed, GT); + __ b(&end); + + // This part is when THIS is not compressed and ARG is. + __ Bind(&loop_arg_compressed); + __ ldrh(IP, Address(temp2, char_size, Address::PostIndex)); + __ ldrb(temp4, Address(temp3, c_char_size, Address::PostIndex)); + __ cmp(IP, ShifterOperand(temp4)); + __ b(&find_diff, NE); + __ subs(temp0, temp0, ShifterOperand(1)); + __ b(&loop_arg_compressed, GT); + __ b(&end); + + // Calculate the difference. + __ Bind(&find_diff); + __ sub(out, IP, ShifterOperand(temp4)); + } __ Bind(&end); @@ -1086,7 +1298,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { Register temp1 = locations->GetTemp(1).AsRegister<Register>(); Register temp2 = locations->GetTemp(2).AsRegister<Register>(); - Label loop; + Label loop, preloop; Label end; Label return_true; Label return_false; @@ -1120,11 +1332,15 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { __ ldr(temp, Address(str, count_offset)); __ ldr(temp1, Address(arg, count_offset)); // Check if lengths are equal, return false if they're not. + // Also compares the compression style, if differs return false. __ cmp(temp, ShifterOperand(temp1)); __ b(&return_false, NE); // Return true if both strings are empty. + if (mirror::kUseStringCompression) { + // Length needs to be masked out first because 0 is treated as compressed. + __ bic(temp, temp, ShifterOperand(0x80000000)); + } __ cbz(temp, &return_true); - // Reference equality check, return true if same reference. __ cmp(str, ShifterOperand(arg)); __ b(&return_true, EQ); @@ -1133,10 +1349,19 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { DCHECK_ALIGNED(value_offset, 4); static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare."); - __ LoadImmediate(temp1, value_offset); - + if (mirror::kUseStringCompression) { + // If not compressed, directly to fast compare. Else do preprocess on length. + __ cmp(temp1, ShifterOperand(0)); + __ b(&preloop, GT); + // Mask out compression flag and adjust length for compressed string (8-bit) + // as if it is a 16-bit data, new_length = (length + 1) / 2. + __ add(temp, temp, ShifterOperand(1)); + __ Lsr(temp, temp, 1); + __ Bind(&preloop); + } // Loop to compare strings 2 characters at a time starting at the front of the string. // Ok to do this because strings with an odd length are zero-padded. + __ LoadImmediate(temp1, value_offset); __ Bind(&loop); __ ldr(out, Address(str, temp1)); __ ldr(temp2, Address(arg, temp1)); @@ -1200,10 +1425,8 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke, __ LoadImmediate(tmp_reg, 0); } - __ LoadFromOffset(kLoadWord, LR, TR, - QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pIndexOf).Int32Value()); + codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); - __ blx(LR); if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); @@ -1212,7 +1435,7 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke, void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. @@ -1232,7 +1455,7 @@ void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) { void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. @@ -1250,7 +1473,7 @@ void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) { void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1270,13 +1493,8 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) { codegen_->AddSlowPath(slow_path); __ b(slow_path->GetEntryLabel(), EQ); - __ LoadFromOffset(kLoadWord, - LR, - TR, - QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromBytes).Int32Value()); + codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); - __ blx(LR); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -1292,26 +1510,19 @@ void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke } void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) { - ArmAssembler* assembler = GetAssembler(); - // No need to emit code checking whether `locations->InAt(2)` is a null // pointer, as callers of the native method // // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) // // all include a null check on `data` before calling that method. - __ LoadFromOffset(kLoadWord, - LR, - TR, - QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromChars).Int32Value()); + codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); - __ blx(LR); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1328,18 +1539,16 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) codegen_->AddSlowPath(slow_path); __ b(slow_path->GetEntryLabel(), EQ); - __ LoadFromOffset(kLoadWord, - LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pAllocStringFromString).Int32Value()); + codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path); CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); - __ blx(LR); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Bind(slow_path->GetExitLabel()); } void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1362,6 +1571,13 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) { locations->SetInAt(4, Location::RequiresRegister()); } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register IP cannot be used in + // ReadBarrierSystemArrayCopySlowPathARM (because that register + // is clobbered by ReadBarrierMarkRegX entry points). Get an extra + // temporary register from the register allocator. + locations->AddTemp(Location::RequiresRegister()); + } } static void CheckPosition(ArmAssembler* assembler, @@ -1427,9 +1643,9 @@ static void CheckPosition(ArmAssembler* assembler, } void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); ArmAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1438,18 +1654,22 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); Register src = locations->InAt(0).AsRegister<Register>(); Location src_pos = locations->InAt(1); Register dest = locations->InAt(2).AsRegister<Register>(); Location dest_pos = locations->InAt(3); Location length = locations->InAt(4); - Register temp1 = locations->GetTemp(0).AsRegister<Register>(); - Register temp2 = locations->GetTemp(1).AsRegister<Register>(); - Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Location temp2_loc = locations->GetTemp(1); + Register temp2 = temp2_loc.AsRegister<Register>(); + Location temp3_loc = locations->GetTemp(2); + Register temp3 = temp3_loc.AsRegister<Register>(); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); Label conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -1465,7 +1685,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ cmp(src, ShifterOperand(dest)); - __ b(slow_path->GetEntryLabel(), EQ); + __ b(intrinsic_slow_path->GetEntryLabel(), EQ); } // Checked when building locations. @@ -1477,7 +1697,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { __ b(&conditions_on_positions_validated, NE); } __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant)); - __ b(slow_path->GetEntryLabel(), GT); + __ b(intrinsic_slow_path->GetEntryLabel(), GT); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -1490,19 +1710,19 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { } else { __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>())); } - __ b(slow_path->GetEntryLabel(), LT); + __ b(intrinsic_slow_path->GetEntryLabel(), LT); } __ Bind(&conditions_on_positions_validated); if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. - __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel()); + __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. - __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel()); + __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel()); } // If the length is negative, bail out. @@ -1511,7 +1731,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { __ cmp(length.AsRegister<Register>(), ShifterOperand(0)); - __ b(slow_path->GetEntryLabel(), LT); + __ b(intrinsic_slow_path->GetEntryLabel(), LT); } // Validity checks: source. @@ -1519,7 +1739,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -1528,7 +1748,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); @@ -1537,112 +1757,287 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - __ LoadFromOffset(kLoadWord, temp1, dest, class_offset); - __ LoadFromOffset(kLoadWord, temp2, src, class_offset); - bool did_unpoison = false; - if (!optimizations.GetDestinationIsNonPrimitiveArray() || - !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoison them - // both to make the identity check valid. - __ MaybeUnpoisonHeapReference(temp1); - __ MaybeUnpoisonHeapReference(temp2); - did_unpoison = true; - } - if (!optimizations.GetDestinationIsNonPrimitiveArray()) { - // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); - __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp3); - __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); - } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); + } - if (!optimizations.GetSourceIsNonPrimitiveArray()) { - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp2->component_type_ - __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset); - __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp3); - __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); - } + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false); + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); + __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel()); + } + + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); + // Note: if heap poisoning is on, we are comparing two unpoisoned references here. + __ cmp(temp1, ShifterOperand(temp2)); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + Label do_copy; + __ b(&do_copy, EQ); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ b(intrinsic_slow_path->GetEntryLabel(), NE); + } + } else { + // Non read barrier code. + + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ LoadFromOffset(kLoadWord, temp1, dest, class_offset); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ LoadFromOffset(kLoadWord, temp2, src, class_offset); + bool did_unpoison = false; + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoison them + // both to make the identity check valid. + __ MaybeUnpoisonHeapReference(temp1); + __ MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } - __ cmp(temp1, ShifterOperand(temp2)); + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp2->component_type_ + __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); + } - if (optimizations.GetDestinationIsTypedObjectArray()) { - Label do_copy; - __ b(&do_copy, EQ); - if (!did_unpoison) { + __ cmp(temp1, ShifterOperand(temp2)); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + Label do_copy; + __ b(&do_copy, EQ); + if (!did_unpoison) { + __ MaybeUnpoisonHeapReference(temp1); + } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // No need to unpoison the result, we're comparing against null. + __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ b(intrinsic_slow_path->GetEntryLabel(), NE); } - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); - // No need to unpoison the result, we're comparing against null. - __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_copy); - } else { - __ b(slow_path->GetEntryLabel(), NE); } } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ LoadFromOffset(kLoadWord, temp1, src, class_offset); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); - __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp3); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp3` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ LoadFromOffset(kLoadWord, temp1, src, class_offset); + __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + } + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); + __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); } - // Compute base source address, base destination address, and end source address. - int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + // Compute the base source address in `temp1`. if (src_pos.IsConstant()) { int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); __ AddConstant(temp1, src, element_size * constant + offset); } else { - __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2)); + __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift)); __ AddConstant(temp1, offset); } - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ AddConstant(temp2, dest, element_size * constant + offset); - } else { - __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2)); - __ AddConstant(temp2, offset); - } - + // Compute the end source address in `temp3`. if (length.IsConstant()) { int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); __ AddConstant(temp3, temp1, element_size * constant); } else { - __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2)); + __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift)); } - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - Label loop, done; - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&done, EQ); - __ Bind(&loop); - __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); - __ str(IP, Address(temp2, element_size, Address::PostIndex)); - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&loop, NE); - __ Bind(&done); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // The base destination address is computed later, as `temp2` is + // used for intermediate computations. + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + Label loop, done; + + // Don't enter copy loop if `length == 0`. + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&done, EQ); + + // /* int32_t */ monitor = src->monitor_ + __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `temp2`. + __ add(src, src, ShifterOperand(temp2, LSR, 32)); + + // Slow path used to copy array when `src` is gray. + SlowPathCode* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); + // Carry flag is the last bit shifted out by LSRS. + __ b(read_barrier_slow_path->GetEntryLabel(), CS); + + // Fast-path copy. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(temp2, dest, element_size * constant + offset); + } else { + __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); + __ AddConstant(temp2, offset); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); + __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); + __ str(IP, Address(temp2, element_size, Address::PostIndex)); + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&loop, NE); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); + } else { + // Non read barrier code. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(temp2, dest, element_size * constant + offset); + } else { + __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); + __ AddConstant(temp2, offset); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + Label loop, done; + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&done, EQ); + __ Bind(&loop); + __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); + __ str(IP, Address(temp2, element_size, Address::PostIndex)); + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&loop, NE); + __ Bind(&done); + } // We only need one card marking on the destination array. codegen_->MarkGCCard(temp1, @@ -1651,7 +2046,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { Register(kNoRegister), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -1722,13 +2117,11 @@ static void GenFPToFPCall(HInvoke* invoke, DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(0))); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(1))); - __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmPointerSize>(entry).Int32Value()); // Native code uses the soft float ABI. __ vmovrrd(calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1), FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>())); - __ blx(LR); - codegen->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); __ vmovdrr(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()), calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)); @@ -1748,7 +2141,6 @@ static void GenFPFPToFPCall(HInvoke* invoke, DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(2))); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(3))); - __ LoadFromOffset(kLoadWord, LR, TR, GetThreadOffset<kArmPointerSize>(entry).Int32Value()); // Native code uses the soft float ABI. __ vmovrrd(calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1), @@ -1756,8 +2148,7 @@ static void GenFPFPToFPCall(HInvoke* invoke, __ vmovrrd(calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3), FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>())); - __ blx(LR); - codegen->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); __ vmovdrr(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()), calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)); @@ -2070,22 +2461,31 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register src_ptr = locations->GetTemp(1).AsRegister<Register>(); Register dst_ptr = locations->GetTemp(2).AsRegister<Register>(); - // src range to copy. - __ add(src_ptr, srcObj, ShifterOperand(value_offset)); - __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1)); - + Label done, compressed_string_loop; // dst to be copied. __ add(dst_ptr, dstObj, ShifterOperand(data_offset)); __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1)); __ subs(num_chr, srcEnd, ShifterOperand(srcBegin)); - - // Do the copy. - Label loop, remainder, done; - // Early out for valid zero-length retrievals. __ b(&done, EQ); + // src range to copy. + __ add(src_ptr, srcObj, ShifterOperand(value_offset)); + Label compressed_string_preloop; + if (mirror::kUseStringCompression) { + // Location of count in string. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + // String's length. + __ ldr(IP, Address(srcObj, count_offset)); + __ cmp(IP, ShifterOperand(0)); + __ b(&compressed_string_preloop, LT); + } + __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1)); + + // Do the copy. + Label loop, remainder; + // Save repairing the value of num_chr on the < 4 character path. __ subs(IP, num_chr, ShifterOperand(4)); __ b(&remainder, LT); @@ -2114,6 +2514,20 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ subs(num_chr, num_chr, ShifterOperand(1)); __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex)); __ b(&remainder, GT); + __ b(&done); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. + __ Bind(&compressed_string_preloop); + __ add(src_ptr, src_ptr, ShifterOperand(srcBegin)); + __ Bind(&compressed_string_loop); + __ ldrb(IP, Address(src_ptr, c_char_size, Address::PostIndex)); + __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex)); + __ subs(num_chr, num_chr, ShifterOperand(1)); + __ b(&compressed_string_loop, GT); + } __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h index e01b6fffb8..7f20ea4b1f 100644 --- a/compiler/optimizing/intrinsics_arm.h +++ b/compiler/optimizing/intrinsics_arm.h @@ -33,14 +33,11 @@ class CodeGeneratorARM; class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor { public: - IntrinsicLocationsBuilderARM(ArenaAllocator* arena, - ArmAssembler* assembler, - const ArmInstructionSetFeatures& features) - : arena_(arena), assembler_(assembler), features_(features) {} + explicit IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen); // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -67,7 +64,7 @@ class IntrinsicCodeGeneratorARM FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index e3a9d27a53..db1c022868 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -29,11 +29,11 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) -// TODO: make vixl clean wrt -Wshadow. +// TODO(VIXL): Make VIXL compile with -Wshadow. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop namespace art { @@ -144,6 +144,73 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64); }; +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp) + : SlowPathCodeARM64(instruction), tmp_(tmp) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + + Register src_curr_addr = XRegisterFrom(locations->GetTemp(0)); + Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1)); + Register src_stop_addr = XRegisterFrom(locations->GetTemp(2)); + Register tmp_reg = WRegisterFrom(tmp_); + + __ Bind(GetEntryLabel()); + vixl::aarch64::Label slow_copy_loop; + __ Bind(&slow_copy_loop); + __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex)); + codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg); + // TODO: Inline the mark bit check before calling the runtime? + // tmp_reg = ReadBarrier::Mark(tmp_reg); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more + // explanations.) + DCHECK_NE(tmp_.reg(), LR); + DCHECK_NE(tmp_.reg(), WSP); + DCHECK_NE(tmp_.reg(), WZR); + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary (and not preserved). It thus cannot be used by + // any live register in this slow path. + DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0); + DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0); + DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0); + DCHECK_NE(tmp_.reg(), IP0); + DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg(); + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg()); + // This runtime call does not require a stack map. + codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg); + __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex)); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&slow_copy_loop, ne); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; } + + private: + Location tmp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64); +}; #undef __ bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { @@ -796,9 +863,9 @@ static void GenUnsafeGet(HInvoke* invoke, codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, trg_loc, base, - /* offset */ 0U, + /* offset */ 0u, /* index */ offset_loc, - /* scale_factor */ 0U, + /* scale_factor */ 0u, temp, /* needs_null_check */ false, is_volatile); @@ -813,7 +880,7 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == Primitive::kPrimNot) { DCHECK(trg.IsW()); - codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc); } } } @@ -823,15 +890,18 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) { @@ -1016,8 +1086,13 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke, Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1026,20 +1101,29 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, locations->SetInAt(4, Location::RequiresRegister()); // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. - Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot) + // operations to potentially clobber the output. Likewise when + // emitting a (Baker) read barrier, which may call. + Location::OutputOverlap overlaps = + ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) ? Location::kOutputOverlap : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), overlaps); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register for (Baker) read barrier. + locations->AddTemp(Location::RequiresRegister()); + } } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) { +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) { MacroAssembler* masm = codegen->GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); - Register out = WRegisterFrom(locations->Out()); // Boolean result. + Location out_loc = locations->Out(); + Register out = WRegisterFrom(out_loc); // Boolean result. Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. + Location offset_loc = locations->InAt(2); + Register offset = XRegisterFrom(offset_loc); // Long offset. Register expected = RegisterFrom(locations->InAt(3), type); // Expected. Register value = RegisterFrom(locations->InAt(4), type); // Value. @@ -1048,6 +1132,27 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); + + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Register temp = WRegisterFrom(locations->GetTemp(0)); + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + /* scale_factor */ 0u, + temp, + /* needs_null_check */ false, + /* use_load_acquire */ false, + /* always_update_field */ true); + } } UseScratchRegisterScope temps(masm); @@ -1075,14 +1180,6 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat vixl::aarch64::Label loop_head, exit_loop; __ Bind(&loop_head); - // TODO: When `type == Primitive::kPrimNot`, add a read barrier for - // the reference stored in the object before attempting the CAS, - // similar to the one in the art::Unsafe_compareAndSwapObject JNI - // implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). - DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); @@ -1109,14 +1206,9 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1124,22 +1216,17 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_); + GenCas(invoke, Primitive::kPrimLong, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + GenCas(invoke, Primitive::kPrimNot, codegen_); } void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { @@ -1153,6 +1240,11 @@ void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + // Need temporary registers for String compression's feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } @@ -1160,17 +1252,25 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); - Register str = XRegisterFrom(locations->InAt(0)); - Register arg = XRegisterFrom(locations->InAt(1)); + Register str = InputRegisterAt(invoke, 0); + Register arg = InputRegisterAt(invoke, 1); + DCHECK(str.IsW()); + DCHECK(arg.IsW()); Register out = OutputRegister(invoke); Register temp0 = WRegisterFrom(locations->GetTemp(0)); Register temp1 = WRegisterFrom(locations->GetTemp(1)); Register temp2 = WRegisterFrom(locations->GetTemp(2)); + Register temp3, temp5; + if (mirror::kUseStringCompression) { + temp3 = WRegisterFrom(locations->GetTemp(3)); + temp5 = WRegisterFrom(locations->GetTemp(4)); + } vixl::aarch64::Label loop; vixl::aarch64::Label find_char_diff; vixl::aarch64::Label end; + vixl::aarch64::Label different_compression; // Get offsets of count and value fields within a string object. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); @@ -1191,9 +1291,18 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Reference equality check, return 0 if same reference. __ Subs(out, str, arg); __ B(&end, eq); - // Load lengths of this and argument strings. - __ Ldr(temp0, MemOperand(str.X(), count_offset)); - __ Ldr(temp1, MemOperand(arg.X(), count_offset)); + if (mirror::kUseStringCompression) { + // Load lengths of this and argument strings. + __ Ldr(temp3, HeapOperand(str, count_offset)); + __ Ldr(temp5, HeapOperand(arg, count_offset)); + // Clean out compression flag from lengths. + __ Bic(temp0, temp3, Operand(static_cast<int32_t>(0x80000000))); + __ Bic(temp1, temp5, Operand(static_cast<int32_t>(0x80000000))); + } else { + // Load lengths of this and argument strings. + __ Ldr(temp0, HeapOperand(str, count_offset)); + __ Ldr(temp1, HeapOperand(arg, count_offset)); + } // Return zero if both strings are empty. __ Orr(out, temp0, temp1); __ Cbz(out, &end); @@ -1204,8 +1313,22 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Shorter string is empty? __ Cbz(temp2, &end); + if (mirror::kUseStringCompression) { + // Check if both strings using same compression style to use this comparison loop. + __ Eor(temp3.W(), temp3, Operand(temp5)); + __ Tbnz(temp3.W(), kWRegSize - 1, &different_compression); + } // Store offset of string value in preparation for comparison loop. __ Mov(temp1, value_offset); + if (mirror::kUseStringCompression) { + // For string compression, calculate the number of bytes to compare (not chars). + // This could be in theory exceed INT32_MAX, so treat temp2 as unsigned. + vixl::aarch64::Label let_it_signed; + __ Cmp(temp5, Operand(0)); + __ B(lt, &let_it_signed); + __ Add(temp2, temp2, Operand(temp2)); + __ Bind(&let_it_signed); + } UseScratchRegisterScope scratch_scope(masm); Register temp4 = scratch_scope.AcquireX(); @@ -1222,34 +1345,95 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Loop to compare 4x16-bit characters at a time (ok because of string data alignment). __ Bind(&loop); - __ Ldr(temp4, MemOperand(str.X(), temp1)); - __ Ldr(temp0, MemOperand(arg.X(), temp1)); + __ Ldr(temp4, MemOperand(str.X(), temp1.X())); + __ Ldr(temp0, MemOperand(arg.X(), temp1.X())); __ Cmp(temp4, temp0); __ B(ne, &find_char_diff); __ Add(temp1, temp1, char_size * 4); - __ Subs(temp2, temp2, 4); - __ B(gt, &loop); + // With string compression, we have compared 8 bytes, otherwise 4 chars. + __ Subs(temp2, temp2, (mirror::kUseStringCompression) ? 8 : 4); + __ B(hi, &loop); __ B(&end); // Promote temp1 to an X reg, ready for EOR. temp1 = temp1.X(); - // Find the single 16-bit character difference. + // Find the single character difference. __ Bind(&find_char_diff); // Get the bit position of the first character that differs. __ Eor(temp1, temp0, temp4); __ Rbit(temp1, temp1); __ Clz(temp1, temp1); - // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then + // If the number of chars remaining <= the index where the difference occurs (0-3), then // the difference occurs outside the remaining string data, so just return length diff (out). - __ Cmp(temp2, Operand(temp1, LSR, 4)); - __ B(le, &end); + // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the + // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or + // unsigned when string compression is disabled. + // When it's enabled, the comparison must be unsigned. + __ Cmp(temp2, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4)); + __ B(ls, &end); // Extract the characters and calculate the difference. + vixl::aarch64::Label uncompressed_string, continue_process; + if (mirror:: kUseStringCompression) { + __ Tbz(temp5, kWRegSize - 1, &uncompressed_string); + __ Bic(temp1, temp1, 0x7); + __ B(&continue_process); + } + __ Bind(&uncompressed_string); __ Bic(temp1, temp1, 0xf); + __ Bind(&continue_process); + __ Lsr(temp0, temp0, temp1); __ Lsr(temp4, temp4, temp1); + vixl::aarch64::Label uncompressed_string_extract_chars; + if (mirror::kUseStringCompression) { + __ Tbz(temp5, kWRegSize - 1, &uncompressed_string_extract_chars); + __ And(temp4, temp4, 0xff); + __ Sub(out, temp4.W(), Operand(temp0.W(), UXTB)); + __ B(&end); + } + __ Bind(&uncompressed_string_extract_chars); __ And(temp4, temp4, 0xffff); - __ Sub(out, temp4, Operand(temp0, UXTH)); + __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH)); + __ B(&end); + + if (mirror::kUseStringCompression) { + vixl::aarch64::Label loop_this_compressed, loop_arg_compressed, find_diff; + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + temp0 = temp0.W(); + temp1 = temp1.W(); + // Comparison for different compression style. + // This part is when THIS is compressed and ARG is not. + __ Bind(&different_compression); + __ Add(temp0, str, Operand(value_offset)); + __ Add(temp1, arg, Operand(value_offset)); + __ Cmp(temp5, Operand(0)); + __ B(lt, &loop_arg_compressed); + + __ Bind(&loop_this_compressed); + __ Ldrb(temp3, MemOperand(temp0.X(), c_char_size, PostIndex)); + __ Ldrh(temp5, MemOperand(temp1.X(), char_size, PostIndex)); + __ Cmp(temp3, Operand(temp5)); + __ B(ne, &find_diff); + __ Subs(temp2, temp2, 1); + __ B(gt, &loop_this_compressed); + __ B(&end); + + // This part is when THIS is not compressed and ARG is. + __ Bind(&loop_arg_compressed); + __ Ldrh(temp3, MemOperand(temp0.X(), char_size, PostIndex)); + __ Ldrb(temp5, MemOperand(temp1.X(), c_char_size, PostIndex)); + __ Cmp(temp3, Operand(temp5)); + __ B(ne, &find_diff); + __ Subs(temp2, temp2, 1); + __ B(gt, &loop_arg_compressed); + __ B(&end); + + // Calculate the difference. + __ Bind(&find_diff); + __ Sub(out, temp3.W(), Operand(temp5.W(), UXTH)); + } __ Bind(&end); @@ -1284,7 +1468,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { Register temp1 = WRegisterFrom(locations->GetTemp(0)); Register temp2 = WRegisterFrom(locations->GetTemp(1)); - vixl::aarch64::Label loop; + vixl::aarch64::Label loop, preloop; vixl::aarch64::Label end; vixl::aarch64::Label return_true; vixl::aarch64::Label return_false; @@ -1322,22 +1506,37 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { __ Ldr(temp, MemOperand(str.X(), count_offset)); __ Ldr(temp1, MemOperand(arg.X(), count_offset)); // Check if lengths are equal, return false if they're not. + // Also compares the compression style, if differs return false. __ Cmp(temp, temp1); __ B(&return_false, ne); - // Store offset of string value in preparation for comparison loop - __ Mov(temp1, value_offset); // Return true if both strings are empty. + if (mirror::kUseStringCompression) { + // Length needs to be masked out first because 0 is treated as compressed. + __ Bic(temp, temp, Operand(static_cast<int32_t>(0x80000000))); + } __ Cbz(temp, &return_true); // Assertions that must hold in order to compare strings 4 characters at a time. DCHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); + if (mirror::kUseStringCompression) { + // If not compressed, directly to fast compare. Else do preprocess on length. + __ Cmp(temp1, Operand(0)); + __ B(&preloop, gt); + // Mask out compression flag and adjust length for compressed string (8-bit) + // as if it is a 16-bit data, new_length = (length + 1) / 2 + __ Add(temp, temp, 1); + __ Lsr(temp, temp, 1); + } + temp1 = temp1.X(); temp2 = temp2.X(); - // Loop to compare strings 4 characters at a time starting at the beginning of the string. // Ok to do this because strings are zero-padded to be 8-byte aligned. + // Store offset of string value in preparation for comparison loop + __ Bind(&preloop); + __ Mov(temp1, value_offset); __ Bind(&loop); __ Ldr(out, MemOperand(str.X(), temp1)); __ Ldr(temp2, MemOperand(arg.X(), temp1)); @@ -1397,9 +1596,8 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke, __ Mov(tmp_reg, 0); } - __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pIndexOf).Int32Value())); + codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); - __ Blr(lr); if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); @@ -1408,7 +1606,7 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke, void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. @@ -1428,7 +1626,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. @@ -1446,7 +1644,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); @@ -1466,12 +1664,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) codegen_->AddSlowPath(slow_path); __ B(eq, slow_path->GetEntryLabel()); - __ Ldr(lr, - MemOperand(tr, - QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromBytes).Int32Value())); + codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); - __ Blr(lr); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -1487,25 +1681,19 @@ void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invo } void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) { - MacroAssembler* masm = GetVIXLAssembler(); - // No need to emit code checking whether `locations->InAt(2)` is a null // pointer, as callers of the native method // // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) // // all include a null check on `data` before calling that method. - __ Ldr(lr, - MemOperand(tr, - QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromChars).Int32Value())); + codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); - __ Blr(lr); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); @@ -1522,12 +1710,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke codegen_->AddSlowPath(slow_path); __ B(eq, slow_path->GetEntryLabel()); - __ Ldr(lr, - MemOperand(tr, - QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pAllocStringFromString).Int32Value())); + codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path); CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); - __ Blr(lr); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -1562,13 +1746,9 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) } static void GenFPToFPCall(HInvoke* invoke, - MacroAssembler* masm, CodeGeneratorARM64* codegen, QuickEntrypointEnum entry) { - __ Ldr(lr, MemOperand(tr, - GetThreadOffset<kArm64PointerSize>(entry).Int32Value())); - __ Blr(lr); - codegen->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); } void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) { @@ -1576,7 +1756,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCos); + GenFPToFPCall(invoke, codegen_, kQuickCos); } void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) { @@ -1584,7 +1764,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSin); + GenFPToFPCall(invoke, codegen_, kQuickSin); } void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) { @@ -1592,7 +1772,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAcos); + GenFPToFPCall(invoke, codegen_, kQuickAcos); } void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) { @@ -1600,7 +1780,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAsin); + GenFPToFPCall(invoke, codegen_, kQuickAsin); } void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) { @@ -1608,7 +1788,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan); + GenFPToFPCall(invoke, codegen_, kQuickAtan); } void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) { @@ -1616,7 +1796,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCbrt); + GenFPToFPCall(invoke, codegen_, kQuickCbrt); } void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) { @@ -1624,7 +1804,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickCosh); + GenFPToFPCall(invoke, codegen_, kQuickCosh); } void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) { @@ -1632,7 +1812,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExp); + GenFPToFPCall(invoke, codegen_, kQuickExp); } void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) { @@ -1640,7 +1820,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickExpm1); + GenFPToFPCall(invoke, codegen_, kQuickExpm1); } void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) { @@ -1648,7 +1828,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog); + GenFPToFPCall(invoke, codegen_, kQuickLog); } void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) { @@ -1656,7 +1836,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickLog10); + GenFPToFPCall(invoke, codegen_, kQuickLog10); } void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) { @@ -1664,7 +1844,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickSinh); + GenFPToFPCall(invoke, codegen_, kQuickSinh); } void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) { @@ -1672,7 +1852,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTan); + GenFPToFPCall(invoke, codegen_, kQuickTan); } void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) { @@ -1680,7 +1860,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickTanh); + GenFPToFPCall(invoke, codegen_, kQuickTanh); } void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) { @@ -1688,7 +1868,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickAtan2); + GenFPToFPCall(invoke, codegen_, kQuickAtan2); } void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) { @@ -1696,7 +1876,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickHypot); + GenFPToFPCall(invoke, codegen_, kQuickHypot); } void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) { @@ -1704,7 +1884,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) { - GenFPToFPCall(invoke, GetVIXLAssembler(), codegen_, kQuickNextAfter); + GenFPToFPCall(invoke, codegen_, kQuickNextAfter); } void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -1720,6 +1900,10 @@ void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + // Need temporary register for String compression feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -1747,29 +1931,41 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register src_ptr = XRegisterFrom(locations->GetTemp(0)); Register num_chr = XRegisterFrom(locations->GetTemp(1)); Register tmp1 = XRegisterFrom(locations->GetTemp(2)); + Register tmp3; + if (mirror::kUseStringCompression) { + tmp3 = WRegisterFrom(locations->GetTemp(3)); + } UseScratchRegisterScope temps(masm); Register dst_ptr = temps.AcquireX(); Register tmp2 = temps.AcquireX(); - // src address to copy from. - __ Add(src_ptr, srcObj, Operand(value_offset)); - __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1)); + vixl::aarch64::Label done; + vixl::aarch64::Label compressed_string_loop; + __ Sub(num_chr, srcEnd, srcBegin); + // Early out for valid zero-length retrievals. + __ Cbz(num_chr, &done); // dst address start to copy to. __ Add(dst_ptr, dstObj, Operand(data_offset)); __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1)); - __ Sub(num_chr, srcEnd, srcBegin); + // src address to copy from. + __ Add(src_ptr, srcObj, Operand(value_offset)); + vixl::aarch64::Label compressed_string_preloop; + if (mirror::kUseStringCompression) { + // Location of count in string. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + // String's length. + __ Ldr(tmp3, MemOperand(srcObj, count_offset)); + __ Tbnz(tmp3, kWRegSize - 1, &compressed_string_preloop); + } + __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1)); // Do the copy. vixl::aarch64::Label loop; - vixl::aarch64::Label done; vixl::aarch64::Label remainder; - // Early out for valid zero-length retrievals. - __ Cbz(num_chr, &done); - // Save repairing the value of num_chr on the < 8 character path. __ Subs(tmp1, num_chr, 8); __ B(lt, &remainder); @@ -1795,6 +1991,20 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Subs(num_chr, num_chr, 1); __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); __ B(gt, &remainder); + __ B(&done); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + __ Bind(&compressed_string_preloop); + __ Add(src_ptr, src_ptr, Operand(srcBegin)); + // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. + __ Bind(&compressed_string_loop); + __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex)); + __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); + __ Subs(num_chr, num_chr, Operand(1)); + __ B(gt, &compressed_string_loop); + } __ Bind(&done); } @@ -2033,9 +2243,9 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128; // We want to use two temporary registers in order to reduce the register pressure in arm64. // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary. void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2088,12 +2298,20 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register IP0, obtained from the VIXL scratch register + // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64 + // (because that register is clobbered by ReadBarrierMarkRegX + // entry points). Get an extra temporary register from the + // register allocator. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2102,6 +2320,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); Register src = XRegisterFrom(locations->InAt(0)); Location src_pos = locations->InAt(1); @@ -2109,10 +2328,12 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { Location dest_pos = locations->InAt(3); Location length = locations->InAt(4); Register temp1 = WRegisterFrom(locations->GetTemp(0)); + Location temp1_loc = LocationFrom(temp1); Register temp2 = WRegisterFrom(locations->GetTemp(1)); + Location temp2_loc = LocationFrom(temp2); - SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); vixl::aarch64::Label conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -2128,7 +2349,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ Cmp(src, dest); - __ B(slow_path->GetEntryLabel(), eq); + __ B(intrinsic_slow_path->GetEntryLabel(), eq); } // Checked when building locations. DCHECK(!optimizations.GetDestinationIsSource() @@ -2139,7 +2360,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { __ B(&conditions_on_positions_validated, ne); } __ Cmp(WRegisterFrom(dest_pos), src_pos_constant); - __ B(slow_path->GetEntryLabel(), gt); + __ B(intrinsic_slow_path->GetEntryLabel(), gt); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -2148,19 +2369,19 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()), OperandFrom(dest_pos, invoke->InputAt(3)->GetType())); - __ B(slow_path->GetEntryLabel(), lt); + __ B(intrinsic_slow_path->GetEntryLabel(), lt); } __ Bind(&conditions_on_positions_validated); if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. - __ Cbz(src, slow_path->GetEntryLabel()); + __ Cbz(src, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. - __ Cbz(dest, slow_path->GetEntryLabel()); + __ Cbz(dest, intrinsic_slow_path->GetEntryLabel()); } // We have already checked in the LocationsBuilder for the constant case. @@ -2168,17 +2389,17 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { // If the length is negative, bail out. - __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel()); + __ Tbnz(WRegisterFrom(length), kWRegSize - 1, intrinsic_slow_path->GetEntryLabel()); // If the length >= 128 then (currently) prefer native implementation. __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold); - __ B(slow_path->GetEntryLabel(), ge); + __ B(intrinsic_slow_path->GetEntryLabel(), ge); } // Validity checks: source. CheckSystemArrayCopyPosition(masm, src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -2187,90 +2408,236 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); { // We use a block to end the scratch scope before the write barrier, thus // freeing the temporary registers so they can be used in `MarkGCCard`. UseScratchRegisterScope temps(masm); + // Note: Because it is acquired from VIXL's scratch register pool, + // `temp3` might be IP0, and thus cannot be used as `ref` argument + // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier + // calls below (see ReadBarrierMarkSlowPathARM64 for more details). Register temp3 = temps.AcquireW(); + if (!optimizations.GetDoesNotNeedTypeCheck()) { // Check whether all elements of the source array are assignable to the component // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - __ Ldr(temp1, MemOperand(dest, class_offset)); - __ Ldr(temp2, MemOperand(src, class_offset)); - bool did_unpoison = false; - if (!optimizations.GetDestinationIsNonPrimitiveArray() || - !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoison them - // both to make the identity check valid. - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); - did_unpoison = true; - } - if (!optimizations.GetDestinationIsNonPrimitiveArray()) { - // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ Ldr(temp3, HeapOperand(temp1, component_offset)); - __ Cbz(temp3, slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); - __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp3, slow_path->GetEntryLabel()); - } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + src.W(), + class_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + temp1, + component_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); + __ Ldrh(temp1, HeapOperand(temp1, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + } - if (!optimizations.GetSourceIsNonPrimitiveArray()) { - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp2->component_type_ - __ Ldr(temp3, HeapOperand(temp2, component_offset)); - __ Cbz(temp3, slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); - __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp3, slow_path->GetEntryLabel()); - } + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + dest.W(), + class_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp2_loc, + temp1, + component_offset, + temp3, + /* needs_null_check */ false, + /* use_load_acquire */ false); + __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); + __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); + } - __ Cmp(temp1, temp2); + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp2_loc, + src.W(), + class_offset, + temp3, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // Note: if heap poisoning is on, we are comparing two unpoisoned references here. + __ Cmp(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + vixl::aarch64::Label do_copy; + __ B(&do_copy, eq); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + temp1, + component_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ Ldr(temp1, HeapOperand(temp1, super_offset)); + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ B(intrinsic_slow_path->GetEntryLabel(), ne); + } + } else { + // Non read barrier code. + + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ Ldr(temp1, MemOperand(dest, class_offset)); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ Ldr(temp2, MemOperand(src, class_offset)); + bool did_unpoison = false; + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoison them + // both to make the identity check valid. + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ Ldr(temp3, HeapOperand(temp1, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } - if (optimizations.GetDestinationIsTypedObjectArray()) { - vixl::aarch64::Label do_copy; - __ B(&do_copy, eq); - if (!did_unpoison) { + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp2->component_type_ + __ Ldr(temp3, HeapOperand(temp2, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + __ Cmp(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + vixl::aarch64::Label do_copy; + __ B(&do_copy, eq); + if (!did_unpoison) { + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ Ldr(temp1, HeapOperand(temp1, component_offset)); codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ Ldr(temp1, HeapOperand(temp1, super_offset)); + // No need to unpoison the result, we're comparing against null. + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ B(intrinsic_slow_path->GetEntryLabel(), ne); } - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ Ldr(temp1, HeapOperand(temp1, component_offset)); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ Ldr(temp1, HeapOperand(temp1, super_offset)); - // No need to unpoison the result, we're comparing against null. - __ Cbnz(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_copy); - } else { - __ B(slow_path->GetEntryLabel(), ne); } } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ Ldr(temp1, HeapOperand(src.W(), class_offset)); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ Ldr(temp3, HeapOperand(temp1, component_offset)); - __ Cbz(temp3, slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); - __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + src.W(), + class_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp2_loc, + temp1, + component_offset, + temp3, + /* needs_null_check */ false, + /* use_load_acquire */ false); + __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ Ldr(temp1, HeapOperand(src.W(), class_offset)); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + __ Ldr(temp2, HeapOperand(temp1, component_offset)); + __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); + } + // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); + __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp3, slow_path->GetEntryLabel()); + __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); } Register src_curr_addr = temp1.X(); Register dst_curr_addr = temp2.X(); - Register src_stop_addr = temp3.X(); + Register src_stop_addr; + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register IP0, obtained from the VIXL scratch + // register pool as `temp3`, cannot be used in + // ReadBarrierSystemArrayCopySlowPathARM64 (because that + // register is clobbered by ReadBarrierMarkRegX entry points). + // So another temporary register allocated by the register + // allocator instead. + DCHECK_EQ(LocationFrom(temp3).reg(), IP0); + src_stop_addr = XRegisterFrom(locations->GetTemp(2)); + } else { + src_stop_addr = temp3.X(); + } GenSystemArrayCopyAddresses(masm, Primitive::kPrimNot, @@ -2283,25 +2650,98 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { dst_curr_addr, src_stop_addr); - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - vixl::aarch64::Label loop, done; const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); - __ Bind(&loop); - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&done, eq); - { + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + vixl::aarch64::Label loop, done; + + // Don't enter copy loop if `length == 0`. + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&done, eq); + Register tmp = temps.AcquireW(); + // Make sure `tmp` is not IP0, as it is clobbered by + // ReadBarrierMarkRegX entry points in + // ReadBarrierSystemArrayCopySlowPathARM64. + DCHECK_NE(LocationFrom(tmp).reg(), IP0); + + // /* int32_t */ monitor = src->monitor_ + __ Ldr(tmp, HeapOperand(src.W(), monitor_offset)); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `tmp`. + __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32)); + + // Slow path used to copy array when `src` is gray. + SlowPathCodeARM64* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp)); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&loop, ne); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); + } else { + // Non read barrier code. + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl::aarch64::Label loop, done; + __ Bind(&loop); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&done, eq); + { + Register tmp = temps.AcquireW(); + __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); + __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + } + __ B(&loop); + __ Bind(&done); } - __ B(&loop); - __ Bind(&done); } // We only need one card marking on the destination array. codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } static void GenIsInfinite(LocationSummary* locations, diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index 525153621b..28e41cb086 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -42,7 +42,7 @@ class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -66,7 +66,7 @@ class IntrinsicCodeGeneratorARM64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h deleted file mode 100644 index db60238fb4..0000000000 --- a/compiler/optimizing/intrinsics_list.h +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ -#define ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ - -// All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected -// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual), then whether it requires an -// environment, may have side effects, or may throw exceptions. - -#define INTRINSICS_LIST(V) \ - V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(DoubleIsInfinite, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(DoubleIsNaN, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(DoubleLongBitsToDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(FloatFloatToRawIntBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(FloatFloatToIntBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(FloatIsInfinite, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(FloatIsNaN, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerCompare, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerHighestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerLowestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerSignum, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongCompare, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongHighestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongLowestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongSignum, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(ShortReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAbsDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAbsFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAbsLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAbsInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMinDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMinFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMinLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMinIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMaxDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathCos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathSin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAcos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAsin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAtan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAtan2, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathCbrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathCosh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathExp, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathExpm1, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathHypot, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathLog, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathLog10, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathSinh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathTan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathTanh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathSqrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathCeil, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathFloor, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathRint, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(MemoryPeekLongNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(MemoryPeekShortNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(MemoryPokeByte, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ - V(MemoryPokeIntNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ - V(MemoryPokeLongNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ - V(MemoryPokeShortNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ - V(StringCharAt, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringCompareTo, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringEquals, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringIsEmpty, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \ - V(StringLength, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \ - V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeCASInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeCASLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeCASObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGet, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePut, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutObjectOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutLongOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetAndAddInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetAndAddLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetAndSetInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetAndSetLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetAndSetObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeLoadFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeStoreFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeFullFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) - -#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ -#undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ // #define is only for lint. diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 9449f79169..5239f8f020 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -634,7 +634,7 @@ static void GenBitCount(LocationSummary* locations, // For 64-bit quantities, this algorithm gets executed twice, (once // for in_lo, and again for in_hi), but saves a few instructions // because the mask values only have to be loaded once. Using this - // algorithm the count for a 64-bit operand can be performed in 33 + // algorithm the count for a 64-bit operand can be performed in 29 // instructions compared to a loop-based algorithm which required 47 // instructions. @@ -687,37 +687,36 @@ static void GenBitCount(LocationSummary* locations, __ Srl(tmp_lo, tmp_lo, 2); __ And(tmp_lo, tmp_lo, AT); __ Addu(tmp_lo, out_lo, tmp_lo); - __ Srl(out_lo, tmp_lo, 4); - __ Addu(out_lo, out_lo, tmp_lo); __ And(out_hi, tmp_hi, AT); __ Srl(tmp_hi, tmp_hi, 2); __ And(tmp_hi, tmp_hi, AT); __ Addu(tmp_hi, out_hi, tmp_hi); - __ Srl(out_hi, tmp_hi, 4); - __ Addu(out_hi, out_hi, tmp_hi); + // Here we deviate from the original algorithm a bit. We've reached + // the stage where the bitfields holding the subtotals are large + // enough to hold the combined subtotals for both the low word, and + // the high word. This means that we can add the subtotals for the + // the high, and low words into a single word, and compute the final + // result for both the high, and low words using fewer instructions. __ LoadConst32(AT, 0x0F0F0F0F); - __ And(out_lo, out_lo, AT); - __ And(out_hi, out_hi, AT); + __ Addu(TMP, tmp_hi, tmp_lo); + + __ Srl(out, TMP, 4); + __ And(out, out, AT); + __ And(TMP, TMP, AT); + __ Addu(out, out, TMP); __ LoadConst32(AT, 0x01010101); if (isR6) { - __ MulR6(out_lo, out_lo, AT); - - __ MulR6(out_hi, out_hi, AT); + __ MulR6(out, out, AT); } else { - __ MulR2(out_lo, out_lo, AT); - - __ MulR2(out_hi, out_hi, AT); + __ MulR2(out, out, AT); } - __ Srl(out_lo, out_lo, 24); - __ Srl(out_hi, out_hi, 24); - - __ Addu(out, out_hi, out_lo); + __ Srl(out, out, 24); } } @@ -1875,7 +1874,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) { // int java.lang.String.compareTo(String anotherString) void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1895,13 +1894,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) { SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke); codegen_->AddSlowPath(slow_path); __ Beqz(argument, slow_path->GetEntryLabel()); - - __ LoadFromOffset(kLoadWord, - T9, - TR, - QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pStringCompareTo).Int32Value()); - __ Jalr(T9); - __ Nop(); + codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path); __ Bind(slow_path->GetExitLabel()); } @@ -2055,13 +2048,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, __ Clear(tmp_reg); } - __ LoadFromOffset(kLoadWord, - T9, - TR, - QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pIndexOf).Int32Value()); - __ Jalr(T9); - __ Nop(); - + codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); } @@ -2070,7 +2057,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // int java.lang.String.indexOf(int ch) void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. @@ -2095,7 +2082,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringIndexOf(HInvoke* invoke) { // int java.lang.String.indexOf(int ch, int fromIndex) void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. @@ -2121,7 +2108,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -2140,14 +2127,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke); codegen_->AddSlowPath(slow_path); __ Beqz(byte_array, slow_path->GetEntryLabel()); - - __ LoadFromOffset(kLoadWord, - T9, - TR, - QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromBytes).Int32Value()); - __ Jalr(T9); - __ Nop(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); __ Bind(slow_path->GetExitLabel()); } @@ -2165,28 +2145,19 @@ void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromChars(HInvoke* invok } void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromChars(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - // No need to emit code checking whether `locations->InAt(2)` is a null // pointer, as callers of the native method // // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) // // all include a null check on `data` before calling that method. - - __ LoadFromOffset(kLoadWord, - T9, - TR, - QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromChars).Int32Value()); - __ Jalr(T9); - __ Nop(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); } // java.lang.StringFactory.newStringFromString(String toCopy) void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -2202,14 +2173,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromString(HInvoke* invoke) SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke); codegen_->AddSlowPath(slow_path); __ Beqz(string_to_copy, slow_path->GetEntryLabel()); - - __ LoadFromOffset(kLoadWord, - T9, - TR, - QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromString).Int32Value()); - __ Jalr(T9); - __ Nop(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -2456,16 +2420,18 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { __ FloorWS(FTMP, in); __ Mfc1(out, FTMP); - __ LoadConst32(TMP, 1); + if (!IsR6()) { + __ LoadConst32(TMP, -1); + } - // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0; + // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0; __ LoadConst32(AT, std::numeric_limits<int32_t>::max()); __ Bne(AT, out, &finite); __ Mtc1(ZERO, FTMP); if (IsR6()) { __ CmpLtS(FTMP, in, FTMP); - __ Mfc1(AT, FTMP); + __ Mfc1(TMP, FTMP); } else { __ ColtS(in, FTMP); } @@ -2474,28 +2440,26 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { __ Bind(&finite); - // TMP = (0.5f <= (in - out)) ? 1 : 0; + // TMP = (0.5f <= (in - out)) ? -1 : 0; __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); __ SubS(FTMP, in, FTMP); __ Mtc1(AT, half); if (IsR6()) { __ CmpLeS(FTMP, half, FTMP); - __ Mfc1(AT, FTMP); + __ Mfc1(TMP, FTMP); } else { __ ColeS(half, FTMP); } __ Bind(&add); - if (IsR6()) { - __ Selnez(TMP, TMP, AT); - } else { + if (!IsR6()) { __ Movf(TMP, ZERO); } - // Return out += TMP. - __ Addu(out, out, TMP); + // Return out -= TMP. + __ Subu(out, out, TMP); __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index 575a7d0a23..e134cb882e 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -60,7 +60,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 8d4d3e5e91..1d153e2e18 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1519,7 +1519,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { // int java.lang.String.compareTo(String anotherString) void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1540,12 +1540,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) { codegen_->AddSlowPath(slow_path); __ Beqzc(argument, slow_path->GetEntryLabel()); - __ LoadFromOffset(kLoadDoubleword, - T9, - TR, - QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pStringCompareTo).Int32Value()); - __ Jalr(T9); - __ Nop(); + codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path); __ Bind(slow_path->GetExitLabel()); } @@ -1691,13 +1686,8 @@ static void GenerateStringIndexOf(HInvoke* invoke, __ Clear(tmp_reg); } - __ LoadFromOffset(kLoadDoubleword, - T9, - TR, - QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pIndexOf).Int32Value()); + codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); - __ Jalr(T9); - __ Nop(); if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); @@ -1707,7 +1697,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // int java.lang.String.indexOf(int ch) void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. @@ -1728,7 +1718,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) { // int java.lang.String.indexOf(int ch, int fromIndex) void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. @@ -1748,7 +1738,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1768,15 +1758,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke codegen_->AddSlowPath(slow_path); __ Beqzc(byte_array, slow_path->GetEntryLabel()); - __ LoadFromOffset(kLoadDoubleword, - T9, - TR, - QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, - pAllocStringFromBytes).Int32Value()); + codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); - __ Jalr(T9); - __ Nop(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -1794,29 +1777,20 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* inv } void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - // No need to emit code checking whether `locations->InAt(2)` is a null // pointer, as callers of the native method // // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) // // all include a null check on `data` before calling that method. - __ LoadFromOffset(kLoadDoubleword, - T9, - TR, - QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, - pAllocStringFromChars).Int32Value()); + codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); - __ Jalr(T9); - __ Nop(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } // java.lang.StringFactory.newStringFromString(String toCopy) void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1833,15 +1807,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromString(HInvoke* invok codegen_->AddSlowPath(slow_path); __ Beqzc(string_to_copy, slow_path->GetEntryLabel()); - __ LoadFromOffset(kLoadDoubleword, - T9, - TR, - QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, - pAllocStringFromString).Int32Value()); + codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path); CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); - __ Jalr(T9); - __ Nop(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -1890,11 +1857,11 @@ static void GenHighestOneBit(LocationSummary* locations, if (type == Primitive::kPrimLong) { __ Dclz(TMP, in); __ LoadConst64(AT, INT64_C(0x8000000000000000)); - __ Dsrlv(out, AT, TMP); + __ Dsrlv(AT, AT, TMP); } else { __ Clz(TMP, in); __ LoadConst32(AT, 0x80000000); - __ Srlv(out, AT, TMP); + __ Srlv(AT, AT, TMP); } // For either value of "type", when "in" is zero, "out" should also // be zero. Without this extra "and" operation, when "in" is zero, @@ -1902,7 +1869,7 @@ static void GenHighestOneBit(LocationSummary* locations, // the MIPS logical shift operations "dsrlv", and "srlv" don't use // the shift amount (TMP) directly; they use either (TMP % 64) or // (TMP % 32), respectively. - __ And(out, out, in); + __ And(out, AT, in); } // int java.lang.Integer.highestOneBit(int) diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h index 4137fbd1b6..5b95c26a21 100644 --- a/compiler/optimizing/intrinsics_mips64.h +++ b/compiler/optimizing/intrinsics_mips64.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -60,7 +60,7 @@ class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 65f4def48b..aae3899847 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -70,6 +70,105 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>; +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { + public: + explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction) + : SlowPathCode(instruction) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + Register src = locations->InAt(0).AsRegister<Register>(); + Location src_pos = locations->InAt(1); + Register dest = locations->InAt(2).AsRegister<Register>(); + Location dest_pos = locations->InAt(3); + Location length = locations->InAt(4); + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + + __ Bind(GetEntryLabel()); + // In this code path, registers `temp1`, `temp2`, and `temp3` + // (resp.) are not used for the base source address, the base + // destination address, and the end source address (resp.), as in + // other SystemArrayCopy intrinsic code paths. Instead they are + // (resp.) used for: + // - the loop index (`i`); + // - the source index (`src_index`) and the loaded (source) + // reference (`value`); and + // - the destination index (`dest_index`). + + // i = 0 + __ xorl(temp1, temp1); + NearLabel loop; + __ Bind(&loop); + // value = src_array[i + src_pos] + if (src_pos.IsConstant()) { + int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + int32_t adjusted_offset = offset + constant * element_size; + __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset)); + } else { + __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); + __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset)); + } + __ MaybeUnpoisonHeapReference(temp2); + // TODO: Inline the mark bit check before calling the runtime? + // value = ReadBarrier::Mark(value) + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more + // explanations.) + DCHECK_NE(temp2, ESP); + DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2; + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2); + // This runtime call does not require a stack map. + x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + __ MaybePoisonHeapReference(temp2); + // dest_array[i + dest_pos] = value + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + int32_t adjusted_offset = offset + constant * element_size; + __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2); + } else { + __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); + __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2); + } + // ++i + __ addl(temp1, Immediate(1)); + // if (i != length) goto loop + x86_codegen->GenerateIntCompare(temp1_loc, length); + __ j(kNotEqual, &loop); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86); +}; + +#undef __ + #define __ assembler-> static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) { @@ -752,20 +851,20 @@ void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); } -// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble, -// as it needs 64 bit instructions. void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } - // Do we have instruction support? if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { + HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); + DCHECK(static_or_direct != nullptr); LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); + if (static_or_direct->HasSpecialInput() && + invoke->InputAt( + static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); @@ -774,7 +873,7 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { // We have to fall back to a call to the intrinsic. LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly); + LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::RegisterLocation(EAX)); @@ -784,47 +883,54 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); - if (locations->WillCall()) { + if (locations->WillCall()) { // TODO: can we reach this? InvokeOutOfLineIntrinsic(codegen_, invoke); return; } - // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); Register out = locations->Out().AsRegister<Register>(); - XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - NearLabel done, nan; + NearLabel skip_incr, done; X86Assembler* assembler = GetAssembler(); - // Generate 0.5 into inPlusPointFive. - __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f))); - __ movd(inPlusPointFive, out); - - // Add in the input. - __ addss(inPlusPointFive, in); - - // And truncate to an integer. - __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); - + // Since no direct x86 rounding instruction matches the required semantics, + // this intrinsic is implemented as follows: + // result = floor(in); + // if (in - result >= 0.5f) + // result = result + 1.0f; + __ movss(t2, in); + __ roundss(t1, in, Immediate(1)); + __ subss(t2, t1); + if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { + // Direct constant area available. + Register constant_area = locations->InAt(1).AsRegister<Register>(); + __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area)); + __ j(kBelow, &skip_incr); + __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area)); + __ Bind(&skip_incr); + } else { + // No constant area: go through stack. + __ pushl(Immediate(bit_cast<int32_t, float>(0.5f))); + __ pushl(Immediate(bit_cast<int32_t, float>(1.0f))); + __ comiss(t2, Address(ESP, 4)); + __ j(kBelow, &skip_incr); + __ addss(t1, Address(ESP, 0)); + __ Bind(&skip_incr); + __ addl(ESP, Immediate(8)); + } + + // Final conversion to an integer. Unfortunately this also does not have a + // direct x86 instruction, since NaN should map to 0 and large positive + // values need to be clipped to the extreme value. __ movl(out, Immediate(kPrimIntMax)); - // maxInt = int-to-float(out) - __ cvtsi2ss(maxInt, out); - - // if inPlusPointFive >= maxInt goto done - __ comiss(inPlusPointFive, maxInt); - __ j(kAboveEqual, &done); - - // if input == NaN goto nan - __ j(kUnordered, &nan); - - // output = float-to-int-truncate(input) - __ cvttss2si(out, inPlusPointFive); - __ jmp(&done); - __ Bind(&nan); - - // output = 0 - __ xorl(out, out); + __ cvtsi2ss(t2, out); + __ comiss(t1, t2); + __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered + __ movl(out, Immediate(0)); // does not change flags + __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out) + __ cvttss2si(out, t1); __ Bind(&done); } @@ -857,7 +963,7 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntry } // Now do the actual call. - __ fs()->call(Address::Absolute(GetThreadOffset<kX86PointerSize>(entry))); + codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); // Extract the return value from the FP stack. __ fstpl(Address(ESP, 0)); @@ -866,8 +972,6 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntry // And clean up the stack. __ addl(ESP, Immediate(16)); __ cfi().AdjustCFAOffset(-16); - - codegen->RecordPcInfo(invoke, invoke->GetDexPc()); } void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) { @@ -1216,7 +1320,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) { void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1237,7 +1341,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pStringCompareTo))); + codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path); __ Bind(slow_path->GetExitLabel()); } @@ -1297,23 +1401,39 @@ void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { __ cmpl(str, arg); __ j(kEqual, &return_true); - // Load length of receiver string. + // Load length and compression flag of receiver string. __ movl(ecx, Address(str, count_offset)); - // Check if lengths are equal, return false if they're not. + // Check if lengths and compression flags are equal, return false if they're not. + // Two identical strings will always have same compression style since + // compression style is decided on alloc. __ cmpl(ecx, Address(arg, count_offset)); __ j(kNotEqual, &return_false); - // Return true if both strings are empty. - __ jecxz(&return_true); + if (mirror::kUseStringCompression) { + NearLabel string_uncompressed; + // Differ cases into both compressed or both uncompressed. Different compression style + // is cut above. + __ cmpl(ecx, Immediate(0)); + __ j(kGreaterEqual, &string_uncompressed); + // Divide string length by 2, rounding up, and continue as if uncompressed. + // Merge clearing the compression flag (+0x80000000) with +1 for rounding. + __ addl(ecx, Immediate(0x80000001)); + __ shrl(ecx, Immediate(1)); + __ Bind(&string_uncompressed); + } + // Return true if strings are empty. + __ jecxz(&return_true); // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction. __ leal(esi, Address(str, value_offset)); __ leal(edi, Address(arg, value_offset)); - // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths. + // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not + // divisible by 2. __ addl(ecx, Immediate(1)); __ shrl(ecx, Immediate(1)); - // Assertions that must hold in order to compare strings 2 characters at a time. + // Assertions that must hold in order to compare strings 2 characters (uncompressed) + // or 4 characters (compressed) at a time. DCHECK_ALIGNED(value_offset, 4); static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); @@ -1357,6 +1477,10 @@ static void CreateStringIndexOfLocations(HInvoke* invoke, locations->AddTemp(Location::RegisterLocation(ECX)); // Need another temporary to be able to compute the result. locations->AddTemp(Location::RequiresRegister()); + if (mirror::kUseStringCompression) { + // Need another temporary to be able to save unflagged string length. + locations->AddTemp(Location::RequiresRegister()); + } } static void GenerateStringIndexOf(HInvoke* invoke, @@ -1374,6 +1498,8 @@ static void GenerateStringIndexOf(HInvoke* invoke, Register counter = locations->GetTemp(0).AsRegister<Register>(); Register string_length = locations->GetTemp(1).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); + // Only used when string compression feature is on. + Register string_length_flagged; // Check our assumptions for registers. DCHECK_EQ(string_obj, EDI); @@ -1411,6 +1537,12 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Load string length, i.e., the count field of the string. __ movl(string_length, Address(string_obj, count_offset)); + if (mirror::kUseStringCompression) { + string_length_flagged = locations->GetTemp(2).AsRegister<Register>(); + __ movl(string_length_flagged, string_length); + // Mask out first bit used as compression flag. + __ andl(string_length, Immediate(INT32_MAX)); + } // Do a zero-length check. // TODO: Support jecxz. @@ -1436,20 +1568,50 @@ static void GenerateStringIndexOf(HInvoke* invoke, __ cmpl(start_index, Immediate(0)); __ cmovl(kGreater, counter, start_index); - // Move to the start of the string: string_obj + value_offset + 2 * start_index. - __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); - - // Now update ecx (the repne scasw work counter). We have string.length - start_index left to - // compare. + if (mirror::kUseStringCompression) { + NearLabel modify_counter, offset_uncompressed_label; + __ cmpl(string_length_flagged, Immediate(0)); + __ j(kGreaterEqual, &offset_uncompressed_label); + // Move to the start of the string: string_obj + value_offset + start_index. + __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset)); + __ jmp(&modify_counter); + + // Move to the start of the string: string_obj + value_offset + 2 * start_index. + __ Bind(&offset_uncompressed_label); + __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + + // Now update ecx (the repne scasw work counter). We have string.length - start_index left to + // compare. + __ Bind(&modify_counter); + } else { + __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + } __ negl(counter); __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); } - // Everything is set up for repne scasw: - // * Comparison address in EDI. - // * Counter in ECX. - __ repne_scasw(); - + if (mirror::kUseStringCompression) { + NearLabel uncompressed_string_comparison; + NearLabel comparison_done; + __ cmpl(string_length_flagged, Immediate(0)); + __ j(kGreater, &uncompressed_string_comparison); + + // Check if EAX (search_value) is ASCII. + __ cmpl(search_value, Immediate(127)); + __ j(kGreater, ¬_found_label); + // Comparing byte-per-byte. + __ repne_scasb(); + __ jmp(&comparison_done); + + // Everything is set up for repne scasw: + // * Comparison address in EDI. + // * Counter in ECX. + __ Bind(&uncompressed_string_comparison); + __ repne_scasw(); + __ Bind(&comparison_done); + } else { + __ repne_scasw(); + } // Did we find a match? __ j(kNotEqual, ¬_found_label); @@ -1490,7 +1652,7 @@ void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1510,9 +1672,8 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromBytes))); + codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -1528,22 +1689,19 @@ void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke } void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) { - X86Assembler* assembler = GetAssembler(); - // No need to emit code checking whether `locations->InAt(2)` is a null // pointer, as callers of the native method // // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) // // all include a null check on `data` before calling that method. - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromChars))); + codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1560,10 +1718,8 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); - __ fs()->call( - Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pAllocStringFromString))); + codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -1608,38 +1764,64 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); DCHECK_EQ(char_size, 2u); - // Compute the address of the destination buffer. - __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); - - // Compute the address of the source string. - if (srcBegin.IsConstant()) { - // Compute the address of the source string by adding the number of chars from - // the source beginning to the value offset of a string. - __ leal(ESI, Address(obj, srcBegin_value * char_size + value_offset)); - } else { - __ leal(ESI, Address(obj, srcBegin.AsRegister<Register>(), - ScaleFactor::TIMES_2, value_offset)); - } - // Compute the number of chars (words) to move. - // Now is the time to save ECX, since we don't know if it will be used later. + // Save ECX, since we don't know if it will be used later. __ pushl(ECX); int stack_adjust = kX86WordSize; __ cfi().AdjustCFAOffset(stack_adjust); DCHECK_EQ(srcEnd, ECX); if (srcBegin.IsConstant()) { - if (srcBegin_value != 0) { - __ subl(ECX, Immediate(srcBegin_value)); - } + __ subl(ECX, Immediate(srcBegin_value)); } else { DCHECK(srcBegin.IsRegister()); __ subl(ECX, srcBegin.AsRegister<Register>()); } - // Do the move. + NearLabel done; + if (mirror::kUseStringCompression) { + // Location of count in string + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + __ pushl(EAX); + __ cfi().AdjustCFAOffset(stack_adjust); + + NearLabel copy_loop, copy_uncompressed; + __ cmpl(Address(obj, count_offset), Immediate(0)); + __ j(kGreaterEqual, ©_uncompressed); + // Compute the address of the source string by adding the number of chars from + // the source beginning to the value offset of a string. + __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset)); + + // Start the loop to copy String's value to Array of Char. + __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); + __ Bind(©_loop); + __ jecxz(&done); + // Use EAX temporary (convert byte from ESI to word). + // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0. + __ movzxb(EAX, Address(ESI, 0)); + __ movw(Address(EDI, 0), EAX); + __ leal(EDI, Address(EDI, char_size)); + __ leal(ESI, Address(ESI, c_char_size)); + // TODO: Add support for LOOP to X86Assembler. + __ subl(ECX, Immediate(1)); + __ jmp(©_loop); + __ Bind(©_uncompressed); + } + + // Do the copy for uncompressed string. + // Compute the address of the destination buffer. + __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); + __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset)); __ rep_movsw(); - // And restore ECX. + __ Bind(&done); + if (mirror::kUseStringCompression) { + // Restore EAX. + __ popl(EAX); + __ cfi().AdjustCFAOffset(-stack_adjust); + } + // Restore ECX. __ popl(ECX); __ cfi().AdjustCFAOffset(-stack_adjust); } @@ -1828,10 +2010,9 @@ static void GenUnsafeGet(HInvoke* invoke, Register output = output_loc.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, temp, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -1875,10 +2056,13 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -1892,12 +2076,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, } } else { locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); - } - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } } @@ -2076,10 +2255,16 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, + Primitive::Type type, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -2099,7 +2284,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty // Force a byte register for the output. locations->SetOut(Location::RegisterLocation(EAX)); if (type == Primitive::kPrimNot) { - // Need temp registers for card-marking. + // Need temporary registers for card-marking, and possibly for + // (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. // Need a byte register for marking. locations->AddTemp(Location::RegisterLocation(ECX)); @@ -2115,14 +2301,9 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2138,7 +2319,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code Location out = locations->Out(); DCHECK_EQ(out.AsRegister<Register>(), EAX); + // The address of the field within the holding object. + Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); + if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Register expected = locations->InAt(3).AsRegister<Register>(); // Ensure `expected` is in EAX (required by the CMPXCHG instruction). DCHECK_EQ(expected, EAX); @@ -2146,11 +2338,20 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), - locations->GetTemp(1).AsRegister<Register>(), - base, - value, - value_can_be_null); + codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + temp1_loc, // Unused, used only as a "temporary" within the read barrier. + base, + field_addr, + /* needs_null_check */ false, + /* always_update_field */ true, + &temp2); + } bool base_equals_value = (base == value); if (kPoisonHeapReferences) { @@ -2158,7 +2359,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // If `base` and `value` are the same register location, move // `value` to a temporary register. This way, poisoning // `value` won't invalidate `base`. - value = locations->GetTemp(0).AsRegister<Register>(); + value = temp1; __ movl(value, base); } @@ -2177,19 +2378,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ PoisonHeapReference(value); } - // TODO: Add a read barrier for the reference stored in the object - // before attempting the CAS, similar to the one in the - // art::Unsafe_compareAndSwapObject JNI implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject). - DCHECK(!kEmitCompilerReadBarrier); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + __ LockCmpxchgl(field_addr, value); // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); @@ -2213,8 +2407,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // Ensure the expected value is in EAX (required by the CMPXCHG // instruction). DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), - locations->InAt(4).AsRegister<Register>()); + __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>()); } else if (type == Primitive::kPrimLong) { // Ensure the expected value is in EAX:EDX and that the new // value is in EBX:ECX (required by the CMPXCHG8B instruction). @@ -2222,7 +2415,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX); DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX); DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX); - __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0)); + __ LockCmpxchg8b(field_addr); } else { LOG(FATAL) << "Unexpected CAS type " << type; } @@ -2230,7 +2423,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we // don't need scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); } @@ -2245,14 +2438,9 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); GenCAS(Primitive::kPrimNot, invoke, codegen_); } @@ -2671,9 +2859,9 @@ static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) } void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2703,9 +2891,9 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); X86Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2714,17 +2902,21 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); Register src = locations->InAt(0).AsRegister<Register>(); Location src_pos = locations->InAt(1); Register dest = locations->InAt(2).AsRegister<Register>(); Location dest_pos = locations->InAt(3); - Location length = locations->InAt(4); - Register temp1 = locations->GetTemp(0).AsRegister<Register>(); - Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Location length_arg = locations->InAt(4); + Location length = length_arg; + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Location temp2_loc = locations->GetTemp(1); + Register temp2 = temp2_loc.AsRegister<Register>(); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); NearLabel conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -2740,7 +2932,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ cmpl(src, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -2748,7 +2940,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, &conditions_on_positions_validated); } __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant)); - __ j(kGreater, slow_path->GetEntryLabel()); + __ j(kGreater, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -2758,10 +2950,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (dest_pos.IsConstant()) { int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant)); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } else { __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } } @@ -2770,16 +2962,17 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. __ testl(src, src); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. __ testl(dest, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } - Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + Location temp3_loc = locations->GetTemp(2); + Register temp3 = temp3_loc.AsRegister<Register>(); if (length.IsStackSlot()) { __ movl(temp3, Address(ESP, length.GetStackIndex())); length = Location::RegisterLocation(temp3); @@ -2791,7 +2984,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { __ testl(length.AsRegister<Register>(), length.AsRegister<Register>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } // Validity checks: source. @@ -2799,7 +2992,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -2808,7 +3001,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); @@ -2817,72 +3010,159 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. + if (!optimizations.GetSourceIsNonPrimitiveArray()) { - // /* HeapReference<Class> */ temp1 = temp1->klass_ - __ movl(temp1, Address(src, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ movl(temp1, Address(temp1, component_offset)); - __ testl(temp1, temp1); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp1); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ movl(temp1, Address(src, class_offset)); + __ MaybeUnpoisonHeapReference(temp1); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp1); + } __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - if (!optimizations.GetDestinationIsNonPrimitiveArray()) { - // /* HeapReference<Class> */ temp1 = temp1->klass_ - __ movl(temp1, Address(dest, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ temp2 = temp1->component_type_ - __ movl(temp2, Address(temp1, component_offset)); - __ testl(temp2, temp2); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp2); - __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - // Re-poison the heap reference to make the compare instruction below - // compare two poisoned references. - __ PoisonHeapReference(temp1); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (length.Equals(Location::RegisterLocation(temp3))) { + // When Baker read barriers are enabled, register `temp3`, + // which in the present case contains the `length` parameter, + // will be overwritten below. Make the `length` location + // reference the original stack location; it will be moved + // back to `temp3` later if necessary. + DCHECK(length_arg.IsStackSlot()); + length = length_arg; + } + + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(temp2, temp2); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + } + + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + // Note: if heap poisoning is on, we are comparing two unpoisoned references here. + __ cmpl(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + NearLabel do_copy; + __ j(kEqual, &do_copy); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ cmpl(Address(temp1, super_offset), Immediate(0)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + } } else { - // /* HeapReference<Class> */ temp1 = temp1->klass_ - __ movl(temp1, Address(dest, class_offset)); - } + // Non read barrier code. - // Note: if poisoning is on, we are here comparing two poisoned references. - __ cmpl(temp1, Address(src, class_offset)); + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ movl(temp1, Address(dest, class_offset)); + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + __ MaybeUnpoisonHeapReference(temp1); + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + __ movl(temp2, Address(temp1, component_offset)); + __ testl(temp2, temp2); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp2); + __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + // Re-poison the heap reference to make the compare instruction below + // compare two poisoned references. + __ PoisonHeapReference(temp1); + } - if (optimizations.GetDestinationIsTypedObjectArray()) { - NearLabel do_copy; - __ j(kEqual, &do_copy); + // Note: if heap poisoning is on, we are comparing two poisoned references here. + __ cmpl(temp1, Address(src, class_offset)); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + NearLabel do_copy; + __ j(kEqual, &do_copy); + __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); + __ MaybeUnpoisonHeapReference(temp1); + __ cmpl(Address(temp1, super_offset), Immediate(0)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + } + } + } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { + DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + // Bail out if the source is not a non primitive array. + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ movl(temp1, Address(src, class_offset)); __ MaybeUnpoisonHeapReference(temp1); // /* HeapReference<Class> */ temp1 = temp1->component_type_ __ movl(temp1, Address(temp1, component_offset)); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); __ MaybeUnpoisonHeapReference(temp1); - __ cmpl(Address(temp1, super_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ Bind(&do_copy); - } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); } - } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { - DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ movl(temp1, Address(src, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ movl(temp1, Address(temp1, component_offset)); - __ testl(temp1, temp1); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp1); __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - // Compute base source address, base destination address, and end source address. + // Compute the base source address in `temp1`. int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); DCHECK_EQ(element_size, 4); uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); @@ -2893,35 +3173,138 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); } - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp2, Address(dest, element_size * constant + offset)); - } else { - __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); - } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // If it is needed (in the case of the fast-path loop), the base + // destination address is computed later, as `temp2` is used for + // intermediate computations. - if (length.IsConstant()) { - int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp3, Address(temp1, element_size * constant)); + // Compute the end source address in `temp3`. + if (length.IsConstant()) { + int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp3, Address(temp1, element_size * constant)); + } else { + if (length.IsStackSlot()) { + // Location `length` is again pointing at a stack slot, as + // register `temp3` (which was containing the length parameter + // earlier) has been overwritten; restore it now + DCHECK(length.Equals(length_arg)); + __ movl(temp3, Address(ESP, length.GetStackIndex())); + length = Location::RegisterLocation(temp3); + } + __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); + } + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // for (size_t i = 0; i != length; ++i) { + // dest_array[dest_pos + i] = + // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i]))); + // } + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + NearLabel loop, done; + + // Don't enter copy loop if `length == 0`. + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // goto slow_path; + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); + + // Load fence to prevent load-load reordering. + // Note that this is a no-op, thanks to the x86 memory model. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + + // Slow path used to copy array when `src` is gray. + SlowPathCode* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + + // Set the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp2, Address(dest, element_size * constant + offset)); + } else { + __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); + __ pushl(Address(temp1, 0)); + __ cfi().AdjustCFAOffset(4); + __ popl(Address(temp2, 0)); + __ cfi().AdjustCFAOffset(-4); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); } else { - __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); - } - - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - NearLabel loop, done; - __ cmpl(temp1, temp3); - __ j(kEqual, &done); - __ Bind(&loop); - __ pushl(Address(temp1, 0)); - __ cfi().AdjustCFAOffset(4); - __ popl(Address(temp2, 0)); - __ cfi().AdjustCFAOffset(-4); - __ addl(temp1, Immediate(element_size)); - __ addl(temp2, Immediate(element_size)); - __ cmpl(temp1, temp3); - __ j(kNotEqual, &loop); - __ Bind(&done); + // Non read barrier code. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp2, Address(dest, element_size * constant + offset)); + } else { + __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); + } + + // Compute the end source address in `temp3`. + if (length.IsConstant()) { + int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp3, Address(temp1, element_size * constant)); + } else { + __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + NearLabel loop, done; + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + __ Bind(&loop); + __ pushl(Address(temp1, 0)); + __ cfi().AdjustCFAOffset(4); + __ popl(Address(temp2, 0)); + __ cfi().AdjustCFAOffset(-4); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + __ Bind(&done); + } // We only need one card marking on the destination array. codegen_->MarkGCCard(temp1, @@ -2930,7 +3313,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { Register(kNoRegister), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index 08bd197400..3743cb1371 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -61,7 +61,7 @@ class IntrinsicCodeGeneratorX86 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 7e0d72930c..cdef22f6de 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -64,6 +64,65 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>; +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { + public: + explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction) + : SlowPathCode(instruction) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + + CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>(); + CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>(); + + __ Bind(GetEntryLabel()); + NearLabel loop; + __ Bind(&loop); + __ movl(CpuRegister(TMP), Address(src_curr_addr, 0)); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + // TODO: Inline the mark bit check before calling the runtime? + // TMP = ReadBarrier::Mark(TMP); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP); + // This runtime call does not require a stack map. + x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + __ MaybePoisonHeapReference(CpuRegister(TMP)); + __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP)); + __ addl(src_curr_addr, Immediate(element_size)); + __ addl(dst_curr_addr, Immediate(element_size)); + __ cmpl(src_curr_addr, src_stop_addr); + __ j(kNotEqual, &loop); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64); +}; + +#undef __ + #define __ assembler-> static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -583,6 +642,7 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); return; } @@ -597,10 +657,7 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (kRoundIsPlusPointFive) { - CreateSSE41FPToIntLocations(arena_, invoke, codegen_); - } + CreateSSE41FPToIntLocations(arena_, invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { @@ -610,47 +667,41 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { return; } - // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - NearLabel done, nan; + XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + NearLabel skip_incr, done; X86_64Assembler* assembler = GetAssembler(); - // Load 0.5 into inPlusPointFive. - __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f)); - - // Add in the input. - __ addss(inPlusPointFive, in); - - // And truncate to an integer. - __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); - - // Load maxInt into out. - codegen_->Load64BitValue(out, kPrimIntMax); - - // if inPlusPointFive >= maxInt goto done - __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax))); - __ j(kAboveEqual, &done); - - // if input == NaN goto nan - __ j(kUnordered, &nan); - - // output = float-to-int-truncate(input) - __ cvttss2si(out, inPlusPointFive); - __ jmp(&done); - __ Bind(&nan); - - // output = 0 - __ xorl(out, out); + // Since no direct x86 rounding instruction matches the required semantics, + // this intrinsic is implemented as follows: + // result = floor(in); + // if (in - result >= 0.5f) + // result = result + 1.0f; + __ movss(t2, in); + __ roundss(t1, in, Immediate(1)); + __ subss(t2, t1); + __ comiss(t2, codegen_->LiteralFloatAddress(0.5f)); + __ j(kBelow, &skip_incr); + __ addss(t1, codegen_->LiteralFloatAddress(1.0f)); + __ Bind(&skip_incr); + + // Final conversion to an integer. Unfortunately this also does not have a + // direct x86 instruction, since NaN should map to 0 and large positive + // values need to be clipped to the extreme value. + codegen_->Load32BitValue(out, kPrimIntMax); + __ cvtsi2ss(t2, out); + __ comiss(t1, t2); + __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered + __ movl(out, Immediate(0)); // does not change flags + __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out) + __ cvttss2si(out, t1); __ Bind(&done); } void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { - // See intrinsics.h. - if (kRoundIsPlusPointFive) { - CreateSSE41FPToIntLocations(arena_, invoke, codegen_); - } + CreateSSE41FPToIntLocations(arena_, invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { @@ -660,39 +711,36 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { return; } - // Implement RoundDouble as t1 = floor(input + 0.5); convert to long. XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - NearLabel done, nan; + XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + NearLabel skip_incr, done; X86_64Assembler* assembler = GetAssembler(); - // Load 0.5 into inPlusPointFive. - __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5)); - - // Add in the input. - __ addsd(inPlusPointFive, in); - - // And truncate to an integer. - __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1)); - - // Load maxLong into out. + // Since no direct x86 rounding instruction matches the required semantics, + // this intrinsic is implemented as follows: + // result = floor(in); + // if (in - result >= 0.5) + // result = result + 1.0f; + __ movsd(t2, in); + __ roundsd(t1, in, Immediate(1)); + __ subsd(t2, t1); + __ comisd(t2, codegen_->LiteralDoubleAddress(0.5)); + __ j(kBelow, &skip_incr); + __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f)); + __ Bind(&skip_incr); + + // Final conversion to an integer. Unfortunately this also does not have a + // direct x86 instruction, since NaN should map to 0 and large positive + // values need to be clipped to the extreme value. codegen_->Load64BitValue(out, kPrimLongMax); - - // if inPlusPointFive >= maxLong goto done - __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax))); - __ j(kAboveEqual, &done); - - // if input == NaN goto nan - __ j(kUnordered, &nan); - - // output = double-to-long-truncate(input) - __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true); - __ jmp(&done); - __ Bind(&nan); - - // output = 0 - __ xorl(out, out); + __ cvtsi2sd(t2, out, /* is64bit */ true); + __ comisd(t1, t2); + __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered + __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit + __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out) + __ cvttsd2si(out, t1, /* is64bit */ true); __ Bind(&done); } @@ -718,10 +766,8 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen, LocationSummary* locations = invoke->GetLocations(); DCHECK(locations->WillCall()); DCHECK(invoke->IsInvokeStaticOrDirect()); - X86_64Assembler* assembler = codegen->GetAssembler(); - __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64PointerSize>(entry), true)); - codegen->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); } void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) { @@ -1064,9 +1110,9 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1074,9 +1120,9 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1085,18 +1131,23 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); Location src_pos = locations->InAt(1); CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); Location dest_pos = locations->InAt(3); Location length = locations->InAt(4); - CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); - CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>(); + Location temp1_loc = locations->GetTemp(0); + CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>(); + Location temp2_loc = locations->GetTemp(1); + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + Location temp3_loc = locations->GetTemp(2); + CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>(); + Location TMP_loc = Location::RegisterLocation(TMP); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); NearLabel conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -1112,7 +1163,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ cmpl(src, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -1120,7 +1171,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, &conditions_on_positions_validated); } __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant)); - __ j(kGreater, slow_path->GetEntryLabel()); + __ j(kGreater, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -1130,10 +1181,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (dest_pos.IsConstant()) { int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant)); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } else { __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } } @@ -1142,13 +1193,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. __ testl(src, src); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. __ testl(dest, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } // If the length is negative, bail out. @@ -1157,7 +1208,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } // Validity checks: source. @@ -1165,7 +1216,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -1174,7 +1225,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); @@ -1183,38 +1234,80 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - __ movl(temp1, Address(dest, class_offset)); - __ movl(temp2, Address(src, class_offset)); + bool did_unpoison = false; - if (!optimizations.GetDestinationIsNonPrimitiveArray() || - !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoison them - // both to make the identity check valid. - __ MaybeUnpoisonHeapReference(temp1); - __ MaybeUnpoisonHeapReference(temp2); - did_unpoison = true; + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + // If heap poisoning is enabled, `temp1` and `temp2` have been + // unpoisoned by the the previous calls to + // GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ movl(temp1, Address(dest, class_offset)); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ movl(temp2, Address(src, class_offset)); + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoison them + // both to make the identity check valid. + __ MaybeUnpoisonHeapReference(temp1); + __ MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } } if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ TMP = temp1->component_type_ - __ movl(CpuRegister(TMP), Address(temp1, component_offset)); - __ testl(CpuRegister(TMP), CpuRegister(TMP)); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ TMP = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `TMP` has been unpoisoned by + // the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ TMP = temp1->component_type_ + __ movl(CpuRegister(TMP), Address(temp1, component_offset)); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + } __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetSourceIsNonPrimitiveArray()) { // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ TMP = temp2->component_type_ - __ movl(CpuRegister(TMP), Address(temp2, component_offset)); - __ testl(CpuRegister(TMP), CpuRegister(TMP)); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ TMP = temp2->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `TMP` has been unpoisoned by + // the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ TMP = temp2->component_type_ + __ movl(CpuRegister(TMP), Address(temp2, component_offset)); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + } __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } __ cmpl(temp1, temp2); @@ -1222,34 +1315,56 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { NearLabel do_copy; __ j(kEqual, &do_copy); - if (!did_unpoison) { + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ cmpl(Address(temp1, super_offset), Immediate(0)); + } else { + if (!did_unpoison) { + __ MaybeUnpoisonHeapReference(temp1); + } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); __ MaybeUnpoisonHeapReference(temp1); + // No need to unpoison the following heap reference load, as + // we're comparing against null. + __ cmpl(Address(temp1, super_offset), Immediate(0)); } - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ movl(temp1, Address(temp1, component_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ movl(temp1, Address(temp1, super_offset)); - // No need to unpoison the result, we're comparing against null. - __ testl(temp1, temp1); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); __ Bind(&do_copy); } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ movl(temp1, Address(src, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ TMP = temp1->component_type_ - __ movl(CpuRegister(TMP), Address(temp1, component_offset)); - __ testl(CpuRegister(TMP), CpuRegister(TMP)); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + // /* HeapReference<Class> */ TMP = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ movl(temp1, Address(src, class_offset)); + __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ TMP = temp1->component_type_ + __ movl(CpuRegister(TMP), Address(temp1, component_offset)); + // No need to unpoison `TMP` now, as we're comparing against null. + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + } __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } // Compute base source address, base destination address, and end source address. @@ -1277,19 +1392,88 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0)); } - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - NearLabel loop, done; - __ cmpl(temp1, temp3); - __ j(kEqual, &done); - __ Bind(&loop); - __ movl(CpuRegister(TMP), Address(temp1, 0)); - __ movl(Address(temp2, 0), CpuRegister(TMP)); - __ addl(temp1, Immediate(element_size)); - __ addl(temp2, Immediate(element_size)); - __ cmpl(temp1, temp3); - __ j(kNotEqual, &loop); - __ Bind(&done); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + NearLabel loop, done; + + // Don't enter copy loop if `length == 0`. + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // goto slow_path; + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); + + // Load fence to prevent load-load reordering. + // Note that this is a no-op, thanks to the x86-64 memory model. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + + // Slow path used to copy array when `src` is gray. + SlowPathCode* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); + __ movl(CpuRegister(TMP), Address(temp1, 0)); + __ movl(Address(temp2, 0), CpuRegister(TMP)); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); + } else { + // Non read barrier code. + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + NearLabel loop, done; + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + __ Bind(&loop); + __ movl(CpuRegister(TMP), Address(temp1, 0)); + __ movl(Address(temp2, 0), CpuRegister(TMP)); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + __ Bind(&done); + } // We only need one card marking on the destination array. codegen_->MarkGCCard(temp1, @@ -1298,12 +1482,12 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { CpuRegister(kNoRegister), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1324,8 +1508,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, pStringCompareTo), - /* no_rip */ true)); + codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path); __ Bind(slow_path->GetExitLabel()); } @@ -1385,14 +1568,27 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { __ cmpl(str, arg); __ j(kEqual, &return_true); - // Load length of receiver string. + // Load length and compression flag of receiver string. __ movl(rcx, Address(str, count_offset)); - // Check if lengths are equal, return false if they're not. + // Check if lengths and compressiond flags are equal, return false if they're not. + // Two identical strings will always have same compression style since + // compression style is decided on alloc. __ cmpl(rcx, Address(arg, count_offset)); __ j(kNotEqual, &return_false); + + if (mirror::kUseStringCompression) { + NearLabel string_uncompressed; + // Both string are compressed. + __ cmpl(rcx, Immediate(0)); + __ j(kGreaterEqual, &string_uncompressed); + // Divide string length by 2, rounding up, and continue as if uncompressed. + // Merge clearing the compression flag with +1 for rounding. + __ addl(rcx, Immediate(static_cast<int32_t>(0x80000001))); + __ shrl(rcx, Immediate(1)); + __ Bind(&string_uncompressed); + } // Return true if both strings are empty. __ jrcxz(&return_true); - // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction. __ leal(rsi, Address(str, value_offset)); __ leal(rdi, Address(arg, value_offset)); @@ -1401,7 +1597,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { __ addl(rcx, Immediate(3)); __ shrl(rcx, Immediate(2)); - // Assertions that must hold in order to compare strings 4 characters at a time. + // Assertions that must hold in order to compare strings 4 characters (uncompressed) + // or 8 characters (compressed) at a time. DCHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded"); @@ -1491,7 +1688,8 @@ static void GenerateStringIndexOf(HInvoke* invoke, __ j(kAbove, slow_path->GetEntryLabel()); } - // From here down, we know that we are looking for a char that fits in 16 bits. + // From here down, we know that we are looking for a char that fits in + // 16 bits (uncompressed) or 8 bits (compressed). // Location of reference to data array within the String object. int32_t value_offset = mirror::String::ValueOffset().Int32Value(); // Location of count within the String object. @@ -1499,6 +1697,12 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Load string length, i.e., the count field of the string. __ movl(string_length, Address(string_obj, count_offset)); + if (mirror::kUseStringCompression) { + // Use TMP to keep string_length_flagged. + __ movl(CpuRegister(TMP), string_length); + // Mask out first bit used as compression flag. + __ andl(string_length, Immediate(INT32_MAX)); + } // Do a length check. // TODO: Support jecxz. @@ -1509,7 +1713,6 @@ static void GenerateStringIndexOf(HInvoke* invoke, if (start_at_zero) { // Number of chars to scan is the same as the string length. __ movl(counter, string_length); - // Move to the start of the string. __ addq(string_obj, Immediate(value_offset)); } else { @@ -1524,19 +1727,44 @@ static void GenerateStringIndexOf(HInvoke* invoke, __ cmpl(start_index, Immediate(0)); __ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough. - // Move to the start of the string: string_obj + value_offset + 2 * start_index. - __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); - + if (mirror::kUseStringCompression) { + NearLabel modify_counter, offset_uncompressed_label; + __ cmpl(CpuRegister(TMP), Immediate(0)); + __ j(kGreaterEqual, &offset_uncompressed_label); + __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset)); + __ jmp(&modify_counter); + // Move to the start of the string: string_obj + value_offset + 2 * start_index. + __ Bind(&offset_uncompressed_label); + __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + __ Bind(&modify_counter); + } else { + __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + } // Now update ecx, the work counter: it's gonna be string.length - start_index. __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit. __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); } - // Everything is set up for repne scasw: - // * Comparison address in RDI. - // * Counter in ECX. - __ repne_scasw(); - + if (mirror::kUseStringCompression) { + NearLabel uncompressed_string_comparison; + NearLabel comparison_done; + __ cmpl(CpuRegister(TMP), Immediate(0)); + __ j(kGreater, &uncompressed_string_comparison); + // Check if RAX (search_value) is ASCII. + __ cmpl(search_value, Immediate(127)); + __ j(kGreater, ¬_found_label); + // Comparing byte-per-byte. + __ repne_scasb(); + __ jmp(&comparison_done); + // Everything is set up for repne scasw: + // * Comparison address in RDI. + // * Counter in ECX. + __ Bind(&uncompressed_string_comparison); + __ repne_scasw(); + __ Bind(&comparison_done); + } else { + __ repne_scasw(); + } // Did we find a match? __ j(kNotEqual, ¬_found_label); @@ -1577,7 +1805,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1597,11 +1825,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, - pAllocStringFromBytes), - /* no_rip */ true)); + codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -1617,24 +1842,19 @@ void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* inv } void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { - X86_64Assembler* assembler = GetAssembler(); - // No need to emit code checking whether `locations->InAt(2)` is a null // pointer, as callers of the native method // // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) // // all include a null check on `data` before calling that method. - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, - pAllocStringFromChars), - /* no_rip */ true)); + codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1651,11 +1871,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invok codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, - pAllocStringFromString), - /* no_rip */ true)); + codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -1699,32 +1916,54 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); DCHECK_EQ(char_size, 2u); - // Compute the address of the destination buffer. - __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); - - // Compute the address of the source string. - if (srcBegin.IsConstant()) { - // Compute the address of the source string by adding the number of chars from - // the source beginning to the value offset of a string. - __ leaq(CpuRegister(RSI), Address(obj, srcBegin_value * char_size + value_offset)); - } else { - __ leaq(CpuRegister(RSI), Address(obj, srcBegin.AsRegister<CpuRegister>(), - ScaleFactor::TIMES_2, value_offset)); - } - + NearLabel done; // Compute the number of chars (words) to move. __ movl(CpuRegister(RCX), srcEnd); if (srcBegin.IsConstant()) { - if (srcBegin_value != 0) { - __ subl(CpuRegister(RCX), Immediate(srcBegin_value)); - } + __ subl(CpuRegister(RCX), Immediate(srcBegin_value)); } else { DCHECK(srcBegin.IsRegister()); __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>()); } + if (mirror::kUseStringCompression) { + NearLabel copy_uncompressed, copy_loop; + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + // Location of count in string. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ cmpl(Address(obj, count_offset), Immediate(0)); + __ j(kGreaterEqual, ©_uncompressed); + // Compute the address of the source string by adding the number of chars from + // the source beginning to the value offset of a string. + __ leaq(CpuRegister(RSI), + CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset)); + // Start the loop to copy String's value to Array of Char. + __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); + + __ Bind(©_loop); + __ jrcxz(&done); + // Use TMP as temporary (convert byte from RSI to word). + // TODO: Selecting RAX as the temporary and using LODSB/STOSW. + __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0)); + __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP)); + __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size)); + __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size)); + // TODO: Add support for LOOP to X86_64Assembler. + __ subl(CpuRegister(RCX), Immediate(1)); + __ jmp(©_loop); + + __ Bind(©_uncompressed); + } + + __ leaq(CpuRegister(RSI), + CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset)); + // Compute the address of the destination buffer. + __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); // Do the move. __ rep_movsw(); + + __ Bind(&done); } static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { @@ -1903,10 +2142,9 @@ static void GenUnsafeGet(HInvoke* invoke, case Primitive::kPrimNot: { if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, temp, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -1929,46 +2167,42 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, - HInvoke* invoke, - Primitive::Type type) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(arena_, invoke); } @@ -2099,10 +2333,16 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, + Primitive::Type type, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -2113,7 +2353,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty locations->SetOut(Location::RequiresRegister()); if (type == Primitive::kPrimNot) { - // Need temp registers for card-marking. + // Need temporary registers for card-marking, and possibly for + // (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); } @@ -2128,14 +2369,9 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2152,16 +2388,37 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // Ensure `expected` is in RAX (required by the CMPXCHG instruction). DCHECK_EQ(expected.AsRegister(), RAX); CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); + // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), - locations->GetTemp(1).AsRegister<CpuRegister>(), - base, - value, - value_can_be_null); + codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); + + // The address of the field within the holding object. + Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + field_addr, + /* needs_null_check */ false, + /* always_update_field */ true, + &temp1, + &temp2); + } bool base_equals_value = (base.AsRegister() == value.AsRegister()); Register value_reg = value.AsRegister(); @@ -2170,7 +2427,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // If `base` and `value` are the same register location, move // `value_reg` to a temporary register. This way, poisoning // `value_reg` won't invalidate `base`. - value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister(); + value_reg = temp1.AsRegister(); __ movl(CpuRegister(value_reg), base); } @@ -2189,19 +2446,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ PoisonHeapReference(CpuRegister(value_reg)); } - // TODO: Add a read barrier for the reference stored in the object - // before attempting the CAS, similar to the one in the - // art::Unsafe_compareAndSwapObject JNI implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject). - DCHECK(!kEmitCompilerReadBarrier); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); + __ LockCmpxchgl(field_addr, CpuRegister(value_reg)); // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setcc(kZero, out); __ movzxb(out, out); @@ -2234,7 +2484,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setcc(kZero, out); __ movzxb(out, out); } @@ -2249,14 +2499,9 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); GenCAS(Primitive::kPrimNot, invoke, codegen_); } diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index 155ff6548b..97404aa568 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -61,7 +61,7 @@ class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index a0ded74d6d..eb2d18dd88 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -15,6 +15,7 @@ */ #include "licm.h" + #include "side_effects_analysis.h" namespace art { @@ -90,8 +91,7 @@ void LICM::Run() { } // Post order visit to visit inner loops before outer loops. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { if (!block->IsLoopHeader()) { // Only visit the loop when we reach the header. continue; diff --git a/compiler/optimizing/linear_order.cc b/compiler/optimizing/linear_order.cc new file mode 100644 index 0000000000..80cecd41dc --- /dev/null +++ b/compiler/optimizing/linear_order.cc @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linear_order.h" + +namespace art { + +static bool InSameLoop(HLoopInformation* first_loop, HLoopInformation* second_loop) { + return first_loop == second_loop; +} + +static bool IsLoop(HLoopInformation* info) { + return info != nullptr; +} + +static bool IsInnerLoop(HLoopInformation* outer, HLoopInformation* inner) { + return (inner != outer) + && (inner != nullptr) + && (outer != nullptr) + && inner->IsIn(*outer); +} + +// Helper method to update work list for linear order. +static void AddToListForLinearization(ArenaVector<HBasicBlock*>* worklist, HBasicBlock* block) { + HLoopInformation* block_loop = block->GetLoopInformation(); + auto insert_pos = worklist->rbegin(); // insert_pos.base() will be the actual position. + for (auto end = worklist->rend(); insert_pos != end; ++insert_pos) { + HBasicBlock* current = *insert_pos; + HLoopInformation* current_loop = current->GetLoopInformation(); + if (InSameLoop(block_loop, current_loop) + || !IsLoop(current_loop) + || IsInnerLoop(current_loop, block_loop)) { + // The block can be processed immediately. + break; + } + } + worklist->insert(insert_pos.base(), block); +} + +// Helper method to validate linear order. +static bool IsLinearOrderWellFormed(const HGraph* graph, ArenaVector<HBasicBlock*>* linear_order) { + for (HBasicBlock* header : graph->GetBlocks()) { + if (header == nullptr || !header->IsLoopHeader()) { + continue; + } + HLoopInformation* loop = header->GetLoopInformation(); + size_t num_blocks = loop->GetBlocks().NumSetBits(); + size_t found_blocks = 0u; + for (HBasicBlock* block : *linear_order) { + if (loop->Contains(*block)) { + found_blocks++; + if (found_blocks == 1u && block != header) { + // First block is not the header. + return false; + } else if (found_blocks == num_blocks && !loop->IsBackEdge(*block)) { + // Last block is not a back edge. + return false; + } + } else if (found_blocks != 0u && found_blocks != num_blocks) { + // Blocks are not adjacent. + return false; + } + } + DCHECK_EQ(found_blocks, num_blocks); + } + return true; +} + +void LinearizeGraph(const HGraph* graph, + ArenaAllocator* allocator, + ArenaVector<HBasicBlock*>* linear_order) { + DCHECK(linear_order->empty()); + // Create a reverse post ordering with the following properties: + // - Blocks in a loop are consecutive, + // - Back-edge is the last block before loop exits. + // + // (1): Record the number of forward predecessors for each block. This is to + // ensure the resulting order is reverse post order. We could use the + // current reverse post order in the graph, but it would require making + // order queries to a GrowableArray, which is not the best data structure + // for it. + ArenaVector<uint32_t> forward_predecessors(graph->GetBlocks().size(), + allocator->Adapter(kArenaAllocLinearOrder)); + for (HBasicBlock* block : graph->GetReversePostOrder()) { + size_t number_of_forward_predecessors = block->GetPredecessors().size(); + if (block->IsLoopHeader()) { + number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges(); + } + forward_predecessors[block->GetBlockId()] = number_of_forward_predecessors; + } + // (2): Following a worklist approach, first start with the entry block, and + // iterate over the successors. When all non-back edge predecessors of a + // successor block are visited, the successor block is added in the worklist + // following an order that satisfies the requirements to build our linear graph. + linear_order->reserve(graph->GetReversePostOrder().size()); + ArenaVector<HBasicBlock*> worklist(allocator->Adapter(kArenaAllocLinearOrder)); + worklist.push_back(graph->GetEntryBlock()); + do { + HBasicBlock* current = worklist.back(); + worklist.pop_back(); + linear_order->push_back(current); + for (HBasicBlock* successor : current->GetSuccessors()) { + int block_id = successor->GetBlockId(); + size_t number_of_remaining_predecessors = forward_predecessors[block_id]; + if (number_of_remaining_predecessors == 1) { + AddToListForLinearization(&worklist, successor); + } + forward_predecessors[block_id] = number_of_remaining_predecessors - 1; + } + } while (!worklist.empty()); + + DCHECK(graph->HasIrreducibleLoops() || IsLinearOrderWellFormed(graph, linear_order)); +} + +} // namespace art diff --git a/compiler/optimizing/linear_order.h b/compiler/optimizing/linear_order.h new file mode 100644 index 0000000000..7122d67be9 --- /dev/null +++ b/compiler/optimizing/linear_order.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_ +#define ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_ + +#include "nodes.h" + +namespace art { + +// Linearizes the 'graph' such that: +// (1): a block is always after its dominator, +// (2): blocks of loops are contiguous. +// +// Storage is obtained through 'allocator' and the linear order it computed +// into 'linear_order'. Once computed, iteration can be expressed as: +// +// for (HBasicBlock* block : linear_order) // linear order +// +// for (HBasicBlock* block : ReverseRange(linear_order)) // linear post order +// +void LinearizeGraph(const HGraph* graph, + ArenaAllocator* allocator, + ArenaVector<HBasicBlock*>* linear_order); + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_ diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index bd74368e17..37b58ded59 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -56,8 +56,7 @@ static void TestCode(const uint16_t* data, const char* expected) { liveness.Analyze(); std::ostringstream buffer; - for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph->GetBlocks()) { buffer << "Block " << block->GetBlockId() << std::endl; size_t ssa_values = liveness.GetNumberOfSsaValues(); BitVector* live_in = liveness.GetLiveInSet(*block); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 7347686830..b91e9e6868 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -168,7 +168,9 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { const int16_t declaring_class_def_index_; // declaring class's def's dex index. bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop // side effects because this location is stored - // into inside a loop. + // into inside a loop. This gives + // better info on whether a singleton's location + // value may be killed by loop side effects. DISALLOW_COPY_AND_ASSIGN(HeapLocation); }; @@ -420,8 +422,26 @@ class HeapLocationCollector : public HGraphVisitor { void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; - if (instruction->GetBlock()->GetLoopInformation() != nullptr) { - location->SetValueKilledByLoopSideEffects(true); + if (location->GetReferenceInfo()->IsSingleton()) { + // A singleton's location value may be killed by loop side effects if it's + // defined before that loop, and it's stored into inside that loop. + HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); + if (loop_info != nullptr) { + HInstruction* ref = location->GetReferenceInfo()->GetReference(); + DCHECK(ref->IsNewInstance()); + if (loop_info->IsDefinedOutOfTheLoop(ref)) { + // ref's location value may be killed by this loop's side effects. + location->SetValueKilledByLoopSideEffects(true); + } else { + // ref is defined inside this loop so this loop's side effects cannot + // kill its location value at the loop header since ref/its location doesn't + // exist yet at the loop header. + } + } + } else { + // For non-singletons, value_killed_by_loop_side_effects_ is inited to + // true. + DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true); } } @@ -810,9 +830,6 @@ class LSEVisitor : public HGraphVisitor { if (loop_info != nullptr) { // instruction is a store in the loop so the loop must does write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); - // If it's a singleton, IsValueKilledByLoopSideEffects() must be true. - DCHECK(!ref_info->IsSingleton() || - heap_location_collector_.GetHeapLocation(idx)->IsValueKilledByLoopSideEffects()); if (loop_info->IsDefinedOutOfTheLoop(original_ref)) { DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); @@ -1029,8 +1046,8 @@ void LoadStoreElimination::Run() { return; } HeapLocationCollector heap_location_collector(graph_); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - heap_location_collector.VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + heap_location_collector.VisitBasicBlock(block); } if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) { // Bail out if there are too many heap locations to deal with. @@ -1048,8 +1065,8 @@ void LoadStoreElimination::Run() { } heap_location_collector.BuildAliasingMatrix(); LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - lse_visitor.VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + lse_visitor.VisitBasicBlock(block); } lse_visitor.RemoveInstructions(); } diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 83596da41a..d157509758 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -27,12 +27,14 @@ LocationSummary::LocationSummary(HInstruction* instruction, : inputs_(instruction->InputCount(), instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)), temps_(instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)), - output_overlaps_(Location::kOutputOverlap), call_kind_(call_kind), + intrinsified_(intrinsified), + has_custom_slow_path_calling_convention_(false), + output_overlaps_(Location::kOutputOverlap), stack_mask_(nullptr), register_mask_(0), - live_registers_(), - intrinsified_(intrinsified) { + live_registers_(RegisterSet::Empty()), + custom_slow_path_caller_saves_(RegisterSet::Empty()) { instruction->SetLocations(this); if (NeedsSafepoint()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 7a78bfdc8d..da27928ef2 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -20,6 +20,7 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/bit_field.h" +#include "base/bit_utils.h" #include "base/bit_vector.h" #include "base/value_object.h" @@ -376,6 +377,10 @@ class Location : public ValueObject { return PolicyField::Decode(GetPayload()); } + bool RequiresRegisterKind() const { + return GetPolicy() == kRequiresRegister || GetPolicy() == kRequiresFpuRegister; + } + uintptr_t GetEncoding() const { return GetPayload(); } @@ -415,7 +420,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs); class RegisterSet : public ValueObject { public: - RegisterSet() : core_registers_(0), floating_point_registers_(0) {} + static RegisterSet Empty() { return RegisterSet(); } void Add(Location loc) { if (loc.IsRegister()) { @@ -448,7 +453,7 @@ class RegisterSet : public ValueObject { } size_t GetNumberOfRegisters() const { - return __builtin_popcount(core_registers_) + __builtin_popcount(floating_point_registers_); + return POPCOUNT(core_registers_) + POPCOUNT(floating_point_registers_); } uint32_t GetCoreRegisters() const { @@ -460,10 +465,10 @@ class RegisterSet : public ValueObject { } private: + RegisterSet() : core_registers_(0), floating_point_registers_(0) {} + uint32_t core_registers_; uint32_t floating_point_registers_; - - DISALLOW_COPY_AND_ASSIGN(RegisterSet); }; static constexpr bool kIntrinsified = true; @@ -480,13 +485,14 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { public: enum CallKind { kNoCall, + kCallOnMainAndSlowPath, kCallOnSlowPath, kCallOnMainOnly }; - LocationSummary(HInstruction* instruction, - CallKind call_kind = kNoCall, - bool intrinsified = false); + explicit LocationSummary(HInstruction* instruction, + CallKind call_kind = kNoCall, + bool intrinsified = false); void SetInAt(uint32_t at, Location location) { inputs_[at] = location; @@ -540,10 +546,44 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { Location Out() const { return output_; } - bool CanCall() const { return call_kind_ != kNoCall; } - bool WillCall() const { return call_kind_ == kCallOnMainOnly; } - bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; } - bool NeedsSafepoint() const { return CanCall(); } + bool CanCall() const { + return call_kind_ != kNoCall; + } + + bool WillCall() const { + return call_kind_ == kCallOnMainOnly || call_kind_ == kCallOnMainAndSlowPath; + } + + bool CallsOnSlowPath() const { + return call_kind_ == kCallOnSlowPath || call_kind_ == kCallOnMainAndSlowPath; + } + + bool OnlyCallsOnSlowPath() const { + return call_kind_ == kCallOnSlowPath; + } + + bool CallsOnMainAndSlowPath() const { + return call_kind_ == kCallOnMainAndSlowPath; + } + + bool NeedsSafepoint() const { + return CanCall(); + } + + void SetCustomSlowPathCallerSaves(const RegisterSet& caller_saves) { + DCHECK(OnlyCallsOnSlowPath()); + has_custom_slow_path_calling_convention_ = true; + custom_slow_path_caller_saves_ = caller_saves; + } + + bool HasCustomSlowPathCallingConvention() const { + return has_custom_slow_path_calling_convention_; + } + + const RegisterSet& GetCustomSlowPathCallerSaves() const { + DCHECK(HasCustomSlowPathCallingConvention()); + return custom_slow_path_caller_saves_; + } void SetStackBit(uint32_t index) { stack_mask_->SetBit(index); @@ -604,18 +644,18 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { return intrinsified_; } - void SetIntrinsified(bool intrinsified) { - intrinsified_ = intrinsified; - } - private: ArenaVector<Location> inputs_; ArenaVector<Location> temps_; + const CallKind call_kind_; + // Whether these are locations for an intrinsified call. + const bool intrinsified_; + // Whether the slow path has default or custom calling convention. + bool has_custom_slow_path_calling_convention_; // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot // share the same register as the inputs. Location::OutputOverlap output_overlaps_; Location output_; - const CallKind call_kind_; // Mask of objects that live in the stack. BitVector* stack_mask_; @@ -626,11 +666,10 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { // Registers that are in use at this position. RegisterSet live_registers_; - // Whether these are locations for an intrinsified call. - bool intrinsified_; + // Custom slow path caller saves. Valid only if indicated by slow_path_calling_convention_. + RegisterSet custom_slow_path_caller_saves_; - ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint); - ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint); + friend class RegisterAllocatorTest; DISALLOW_COPY_AND_ASSIGN(LocationSummary); }; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc new file mode 100644 index 0000000000..51be1d1e91 --- /dev/null +++ b/compiler/optimizing/loop_optimization.cc @@ -0,0 +1,377 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loop_optimization.h" + +#include "linear_order.h" + +namespace art { + +// Remove the instruction from the graph. A bit more elaborate than the usual +// instruction removal, since there may be a cycle in the use structure. +static void RemoveFromCycle(HInstruction* instruction) { + instruction->RemoveAsUserOfAllInputs(); + instruction->RemoveEnvironmentUsers(); + instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false); +} + +// +// Class methods. +// + +HLoopOptimization::HLoopOptimization(HGraph* graph, + HInductionVarAnalysis* induction_analysis) + : HOptimization(graph, kLoopOptimizationPassName), + induction_range_(induction_analysis), + loop_allocator_(nullptr), + top_loop_(nullptr), + last_loop_(nullptr), + iset_(nullptr), + induction_simplication_count_(0) { +} + +void HLoopOptimization::Run() { + // Well-behaved loops only. + // TODO: make this less of a sledgehammer. + if (graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) { + return; + } + + // Phase-local allocator that draws from the global pool. Since the allocator + // itself resides on the stack, it is destructed on exiting Run(), which + // implies its underlying memory is released immediately. + ArenaAllocator allocator(graph_->GetArena()->GetArenaPool()); + loop_allocator_ = &allocator; + + // Perform loop optimizations. + LocalRun(); + + // Detach. + loop_allocator_ = nullptr; + last_loop_ = top_loop_ = nullptr; +} + +void HLoopOptimization::LocalRun() { + // Build the linear order using the phase-local allocator. This step enables building + // a loop hierarchy that properly reflects the outer-inner and previous-next relation. + ArenaVector<HBasicBlock*> linear_order(loop_allocator_->Adapter(kArenaAllocLinearOrder)); + LinearizeGraph(graph_, loop_allocator_, &linear_order); + + // Build the loop hierarchy. + for (HBasicBlock* block : linear_order) { + if (block->IsLoopHeader()) { + AddLoop(block->GetLoopInformation()); + } + } + + // Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use + // a temporary set that stores instructions using the phase-local allocator. + if (top_loop_ != nullptr) { + ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + iset_ = &iset; + TraverseLoopsInnerToOuter(top_loop_); + iset_ = nullptr; // detach + } +} + +void HLoopOptimization::AddLoop(HLoopInformation* loop_info) { + DCHECK(loop_info != nullptr); + LoopNode* node = new (loop_allocator_) LoopNode(loop_info); // phase-local allocator + if (last_loop_ == nullptr) { + // First loop. + DCHECK(top_loop_ == nullptr); + last_loop_ = top_loop_ = node; + } else if (loop_info->IsIn(*last_loop_->loop_info)) { + // Inner loop. + node->outer = last_loop_; + DCHECK(last_loop_->inner == nullptr); + last_loop_ = last_loop_->inner = node; + } else { + // Subsequent loop. + while (last_loop_->outer != nullptr && !loop_info->IsIn(*last_loop_->outer->loop_info)) { + last_loop_ = last_loop_->outer; + } + node->outer = last_loop_->outer; + node->previous = last_loop_; + DCHECK(last_loop_->next == nullptr); + last_loop_ = last_loop_->next = node; + } +} + +void HLoopOptimization::RemoveLoop(LoopNode* node) { + DCHECK(node != nullptr); + DCHECK(node->inner == nullptr); + if (node->previous != nullptr) { + // Within sequence. + node->previous->next = node->next; + if (node->next != nullptr) { + node->next->previous = node->previous; + } + } else { + // First of sequence. + if (node->outer != nullptr) { + node->outer->inner = node->next; + } else { + top_loop_ = node->next; + } + if (node->next != nullptr) { + node->next->outer = node->outer; + node->next->previous = nullptr; + } + } +} + +void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { + for ( ; node != nullptr; node = node->next) { + int current_induction_simplification_count = induction_simplication_count_; + if (node->inner != nullptr) { + TraverseLoopsInnerToOuter(node->inner); + } + // Visit loop after its inner loops have been visited. If the induction of any inner + // loop has been simplified, recompute the induction information of this loop first. + if (current_induction_simplification_count != induction_simplication_count_) { + induction_range_.ReVisit(node->loop_info); + } + SimplifyBlocks(node); + SimplifyInduction(node); + SimplifyBlocks(node); + if (node->inner == nullptr) { + RemoveIfEmptyInnerLoop(node); + } + } +} + +void HLoopOptimization::SimplifyInduction(LoopNode* node) { + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + // Scan the phis in the header to find opportunities to simplify an induction + // cycle that is only used outside the loop. Replace these uses, if any, with + // the last value and remove the induction cycle. + // Examples: for (int i = 0; x != null; i++) { .... no i .... } + // for (int i = 0; i < 10; i++, k++) { .... no k .... } return k; + for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + iset_->clear(); + int32_t use_count = 0; + if (IsPhiInduction(phi) && + IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) && + TryReplaceWithLastValue(phi, use_count, preheader)) { + for (HInstruction* i : *iset_) { + RemoveFromCycle(i); + } + induction_simplication_count_++; + } + } +} + +void HLoopOptimization::SimplifyBlocks(LoopNode* node) { + for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + // Remove instructions that are dead. + for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) { + HInstruction* instruction = i.Current(); + if (instruction->IsDeadAndRemovable()) { + block->RemoveInstruction(instruction); + } + } + // Remove trivial control flow blocks from the loop-body. + if (block->GetPredecessors().size() == 1 && + block->GetSuccessors().size() == 1 && + block->GetFirstInstruction()->IsGoto()) { + HBasicBlock* pred = block->GetSinglePredecessor(); + HBasicBlock* succ = block->GetSingleSuccessor(); + if (succ->GetPredecessors().size() == 1) { + pred->ReplaceSuccessor(block, succ); + block->ClearDominanceInformation(); + block->SetDominator(pred); // needed by next disconnect. + block->DisconnectAndDelete(); + pred->AddDominatedBlock(succ); + succ->SetDominator(pred); + } + } + } +} + +void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) { + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + // Ensure loop header logic is finite. + if (!induction_range_.IsFinite(node->loop_info)) { + return; + } + // Ensure there is only a single loop-body (besides the header). + HBasicBlock* body = nullptr; + for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { + if (it.Current() != header) { + if (body != nullptr) { + return; + } + body = it.Current(); + } + } + // Ensure there is only a single exit point. + if (header->GetSuccessors().size() != 2) { + return; + } + HBasicBlock* exit = (header->GetSuccessors()[0] == body) + ? header->GetSuccessors()[1] + : header->GetSuccessors()[0]; + // Ensure exit can only be reached by exiting loop. + if (exit->GetPredecessors().size() != 1) { + return; + } + // Detect an empty loop: no side effects other than plain iteration. Replace + // subsequent index uses, if any, with the last value and remove the loop. + iset_->clear(); + int32_t use_count = 0; + if (IsEmptyHeader(header) && + IsEmptyBody(body) && + IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) && + TryReplaceWithLastValue(header->GetFirstPhi(), use_count, preheader)) { + body->DisconnectAndDelete(); + exit->RemovePredecessor(header); + header->RemoveSuccessor(exit); + header->ClearDominanceInformation(); + header->SetDominator(preheader); // needed by next disconnect. + header->DisconnectAndDelete(); + preheader->AddSuccessor(exit); + preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator + preheader->AddDominatedBlock(exit); + exit->SetDominator(preheader); + // Update hierarchy. + RemoveLoop(node); + } +} + +bool HLoopOptimization::IsPhiInduction(HPhi* phi) { + ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi); + if (set != nullptr) { + for (HInstruction* i : *set) { + // Check that, other than phi, instruction are removable with uses contained in the cycle. + // TODO: investigate what cases are no longer in the graph. + if (i != phi) { + if (!i->IsInBlock() || !i->IsRemovable()) { + return false; + } + for (const HUseListNode<HInstruction*>& use : i->GetUses()) { + if (set->find(use.GetUser()) == set->end()) { + return false; + } + } + } + } + DCHECK(iset_->empty()); + iset_->insert(set->begin(), set->end()); // copy + return true; + } + return false; +} + +// Find: phi: Phi(init, addsub) +// s: SuspendCheck +// c: Condition(phi, bound) +// i: If(c) +// TODO: Find a less pattern matching approach? +bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) { + DCHECK(iset_->empty()); + HInstruction* phi = block->GetFirstPhi(); + if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi())) { + HInstruction* s = block->GetFirstInstruction(); + if (s != nullptr && s->IsSuspendCheck()) { + HInstruction* c = s->GetNext(); + if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) { + HInstruction* i = c->GetNext(); + if (i != nullptr && i->IsIf() && i->InputAt(0) == c) { + iset_->insert(c); + iset_->insert(s); + return true; + } + } + } + } + return false; +} + +bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) { + if (block->GetFirstPhi() == nullptr) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) { + return false; + } + } + return true; + } + return false; +} + +bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info, + HInstruction* instruction, + /*out*/ int32_t* use_count) { + for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { + HInstruction* user = use.GetUser(); + if (iset_->find(user) == iset_->end()) { // not excluded? + HLoopInformation* other_loop_info = user->GetBlock()->GetLoopInformation(); + if (other_loop_info != nullptr && other_loop_info->IsIn(*loop_info)) { + return false; + } + ++*use_count; + } + } + return true; +} + +void HLoopOptimization::ReplaceAllUses(HInstruction* instruction, HInstruction* replacement) { + const HUseList<HInstruction*>& uses = instruction->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end;) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + ++it; // increment before replacing + if (iset_->find(user) == iset_->end()) { // not excluded? + user->ReplaceInput(replacement, index); + induction_range_.Replace(user, instruction, replacement); // update induction + } + } + const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses(); + for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) { + HEnvironment* user = it->GetUser(); + size_t index = it->GetIndex(); + ++it; // increment before replacing + if (iset_->find(user->GetHolder()) == iset_->end()) { // not excluded? + user->RemoveAsUserOfInput(index); + user->SetRawEnvAt(index, replacement); + replacement->AddEnvUseAt(user, index); + } + } +} + +bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, + int32_t use_count, + HBasicBlock* block) { + // If true uses appear after the loop, replace these uses with the last value. Environment + // uses can consume this value too, since any first true use is outside the loop (although + // this may imply that de-opting may look "ahead" a bit on the phi value). If there are only + // environment uses, the value is dropped altogether, since the computations have no effect. + if (use_count > 0) { + if (!induction_range_.CanGenerateLastValue(instruction)) { + return false; + } + ReplaceAllUses(instruction, induction_range_.GenerateLastValue(instruction, graph_, block)); + } + return true; +} + +} // namespace art diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h new file mode 100644 index 0000000000..e18d17531e --- /dev/null +++ b/compiler/optimizing/loop_optimization.h @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ +#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ + +#include "induction_var_range.h" +#include "nodes.h" +#include "optimization.h" + +namespace art { + +/** + * Loop optimizations. Builds a loop hierarchy and applies optimizations to + * the detected nested loops, such as removal of dead induction and empty loops. + */ +class HLoopOptimization : public HOptimization { + public: + HLoopOptimization(HGraph* graph, HInductionVarAnalysis* induction_analysis); + + void Run() OVERRIDE; + + static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; + + private: + /** + * A single loop inside the loop hierarchy representation. + */ + struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> { + explicit LoopNode(HLoopInformation* lp_info) + : loop_info(lp_info), + outer(nullptr), + inner(nullptr), + previous(nullptr), + next(nullptr) {} + HLoopInformation* const loop_info; + LoopNode* outer; + LoopNode* inner; + LoopNode* previous; + LoopNode* next; + }; + + void LocalRun(); + + void AddLoop(HLoopInformation* loop_info); + void RemoveLoop(LoopNode* node); + + void TraverseLoopsInnerToOuter(LoopNode* node); + + void SimplifyInduction(LoopNode* node); + void SimplifyBlocks(LoopNode* node); + void RemoveIfEmptyInnerLoop(LoopNode* node); + + bool IsPhiInduction(HPhi* phi); + bool IsEmptyHeader(HBasicBlock* block); + bool IsEmptyBody(HBasicBlock* block); + + bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info, + HInstruction* instruction, + /*out*/ int32_t* use_count); + void ReplaceAllUses(HInstruction* instruction, HInstruction* replacement); + bool TryReplaceWithLastValue(HInstruction* instruction, + int32_t use_count, + HBasicBlock* block); + + // Range information based on prior induction variable analysis. + InductionVarRange induction_range_; + + // Phase-local heap memory allocator for the loop optimizer. Storage obtained + // through this allocator is immediately released when the loop optimizer is done. + ArenaAllocator* loop_allocator_; + + // Entries into the loop hierarchy representation. The hierarchy resides + // in phase-local heap memory. + LoopNode* top_loop_; + LoopNode* last_loop_; + + // Temporary bookkeeping of a set of instructions. + // Contents reside in phase-local heap memory. + ArenaSet<HInstruction*>* iset_; + + // Counter that tracks how many induction cycles have been simplified. Useful + // to trigger incremental updates of induction variable analysis of outer loops + // when the induction of inner loops has changed. + int32_t induction_simplication_count_; + + friend class LoopOptimizationTest; + + DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc new file mode 100644 index 0000000000..7805a69a06 --- /dev/null +++ b/compiler/optimizing/loop_optimization_test.cc @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loop_optimization.h" +#include "optimizing_unit_test.h" + +namespace art { + +/** + * Fixture class for the loop optimization tests. These unit tests focus + * constructing the loop hierarchy. Actual optimizations are tested + * through the checker tests. + */ +class LoopOptimizationTest : public CommonCompilerTest { + public: + LoopOptimizationTest() + : pool_(), + allocator_(&pool_), + graph_(CreateGraph(&allocator_)), + iva_(new (&allocator_) HInductionVarAnalysis(graph_)), + loop_opt_(new (&allocator_) HLoopOptimization(graph_, iva_)) { + BuildGraph(); + } + + ~LoopOptimizationTest() { } + + /** Constructs bare minimum graph. */ + void BuildGraph() { + graph_->SetNumberOfVRegs(1); + entry_block_ = new (&allocator_) HBasicBlock(graph_); + return_block_ = new (&allocator_) HBasicBlock(graph_); + exit_block_ = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry_block_); + graph_->AddBlock(return_block_); + graph_->AddBlock(exit_block_); + graph_->SetEntryBlock(entry_block_); + graph_->SetExitBlock(exit_block_); + parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); + entry_block_->AddInstruction(parameter_); + return_block_->AddInstruction(new (&allocator_) HReturnVoid()); + exit_block_->AddInstruction(new (&allocator_) HExit()); + entry_block_->AddSuccessor(return_block_); + return_block_->AddSuccessor(exit_block_); + } + + /** Adds a loop nest at given position before successor. */ + HBasicBlock* AddLoop(HBasicBlock* position, HBasicBlock* successor) { + HBasicBlock* header = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* body = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(header); + graph_->AddBlock(body); + // Control flow. + position->ReplaceSuccessor(successor, header); + header->AddSuccessor(body); + header->AddSuccessor(successor); + header->AddInstruction(new (&allocator_) HIf(parameter_)); + body->AddSuccessor(header); + body->AddInstruction(new (&allocator_) HGoto()); + return header; + } + + /** Performs analysis. */ + void PerformAnalysis() { + graph_->BuildDominatorTree(); + iva_->Run(); + // Do not release the loop hierarchy. + loop_opt_->loop_allocator_ = &allocator_; + loop_opt_->LocalRun(); + } + + /** Constructs string representation of computed loop hierarchy. */ + std::string LoopStructure() { + return LoopStructureRecurse(loop_opt_->top_loop_); + } + + // Helper method + std::string LoopStructureRecurse(HLoopOptimization::LoopNode* node) { + std::string s; + for ( ; node != nullptr; node = node->next) { + s.append("["); + s.append(LoopStructureRecurse(node->inner)); + s.append("]"); + } + return s; + } + + // General building fields. + ArenaPool pool_; + ArenaAllocator allocator_; + HGraph* graph_; + HInductionVarAnalysis* iva_; + HLoopOptimization* loop_opt_; + + HBasicBlock* entry_block_; + HBasicBlock* return_block_; + HBasicBlock* exit_block_; + + HInstruction* parameter_; +}; + +// +// The actual tests. +// + +TEST_F(LoopOptimizationTest, NoLoops) { + PerformAnalysis(); + EXPECT_EQ("", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, SingleLoop) { + AddLoop(entry_block_, return_block_); + PerformAnalysis(); + EXPECT_EQ("[]", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopNest10) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + s = AddLoop(b, s); + b = s->GetSuccessors()[0]; + } + PerformAnalysis(); + EXPECT_EQ("[[[[[[[[[[]]]]]]]]]]", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopSequence10) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + b = AddLoop(b, s); + s = b->GetSuccessors()[1]; + } + PerformAnalysis(); + EXPECT_EQ("[][][][][][][][][][]", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopSequenceOfNests) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + b = AddLoop(b, s); + s = b->GetSuccessors()[1]; + HBasicBlock* bi = b->GetSuccessors()[0]; + HBasicBlock* si = b; + for (int j = 0; j < i; j++) { + si = AddLoop(bi, si); + bi = si->GetSuccessors()[0]; + } + } + PerformAnalysis(); + EXPECT_EQ("[]" + "[[]]" + "[[[]]]" + "[[[[]]]]" + "[[[[[]]]]]" + "[[[[[[]]]]]]" + "[[[[[[[]]]]]]]" + "[[[[[[[[]]]]]]]]" + "[[[[[[[[[]]]]]]]]]" + "[[[[[[[[[[]]]]]]]]]]", + LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopNestWithSequence) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + s = AddLoop(b, s); + b = s->GetSuccessors()[0]; + } + b = s; + s = b->GetSuccessors()[1]; + for (int i = 0; i < 9; i++) { + b = AddLoop(b, s); + s = b->GetSuccessors()[1]; + } + PerformAnalysis(); + EXPECT_EQ("[[[[[[[[[[][][][][][][][][][]]]]]]]]]]", LoopStructure()); +} + +} // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 2808e1b5fc..45c7eb1a46 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -25,7 +25,7 @@ #include "base/stl_util.h" #include "intrinsics.h" #include "mirror/class-inl.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" namespace art { @@ -35,7 +35,7 @@ namespace art { // double). static constexpr bool kEnableFloatingPointStaticEvaluation = (FLT_EVAL_METHOD == 0); -void HGraph::InitializeInexactObjectRTI(StackHandleScopeCollection* handles) { +void HGraph::InitializeInexactObjectRTI(VariableSizedHandleScope* handles) { ScopedObjectAccess soa(Thread::Current()); // Create the inexact Object reference type and store it in the HGraph. ClassLinker* linker = Runtime::Current()->GetClassLinker(); @@ -179,16 +179,16 @@ GraphAnalysisResult HGraph::BuildDominatorTree() { } void HGraph::ClearDominanceInformation() { - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - it.Current()->ClearDominanceInformation(); + for (HBasicBlock* block : GetReversePostOrder()) { + block->ClearDominanceInformation(); } reverse_post_order_.clear(); } void HGraph::ClearLoopInformation() { SetHasIrreducibleLoops(false); - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - it.Current()->SetLoopInformation(nullptr); + for (HBasicBlock* block : GetReversePostOrder()) { + block->SetLoopInformation(nullptr); } } @@ -275,8 +275,7 @@ void HGraph::ComputeDominanceInformation() { bool update_occurred = true; while (update_occurred) { update_occurred = false; - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { for (HBasicBlock* successor : block->GetSuccessors()) { update_occurred |= UpdateDominatorOfSuccessor(block, successor); } @@ -287,8 +286,7 @@ void HGraph::ComputeDominanceInformation() { // Make sure that there are no remaining blocks whose dominator information // needs to be updated. if (kIsDebugBuild) { - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { for (HBasicBlock* successor : block->GetSuccessors()) { DCHECK(!UpdateDominatorOfSuccessor(block, successor)); } @@ -297,8 +295,7 @@ void HGraph::ComputeDominanceInformation() { // Populate `dominated_blocks_` information after computing all dominators. // The potential presence of irreducible loops requires to do it after. - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { if (!block->IsEntryBlock()) { block->GetDominator()->AddDominatedBlock(block); } @@ -375,8 +372,7 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { void HGraph::ComputeTryBlockInformation() { // Iterate in reverse post order to propagate try membership information from // predecessors to their successors. - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { if (block->IsEntryBlock() || block->IsCatchBlock()) { // Catch blocks after simplification have only exceptional predecessors // and hence are never in tries. @@ -446,8 +442,7 @@ GraphAnalysisResult HGraph::AnalyzeLoops() const { // We iterate post order to ensure we visit inner loops before outer loops. // `PopulateRecursive` needs this guarantee to know whether a natural loop // contains an irreducible loop. - for (HPostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetPostOrder()) { if (block->IsLoopHeader()) { if (block->IsCatchBlock()) { // TODO: Dealing with exceptional back edges could be tricky because @@ -1134,8 +1129,8 @@ void HGraphVisitor::VisitInsertionOrder() { } void HGraphVisitor::VisitReversePostOrder() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + VisitBasicBlock(block); } } @@ -1986,10 +1981,8 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Update the environments in this graph to have the invoke's environment // as parent. { - HReversePostOrderIterator it(*this); - it.Advance(); // Skip the entry block, we do not need to update the entry's suspend check. - for (; !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + // Skip the entry block, we do not need to update the entry's suspend check. + for (HBasicBlock* block : GetReversePostOrderSkipEntryBlock()) { for (HInstructionIterator instr_it(block->GetInstructions()); !instr_it.Done(); instr_it.Advance()) { @@ -2070,8 +2063,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Do a reverse post order of the blocks in the callee and do (1), (2), (3) // and (4) to the blocks that apply. - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); + for (HBasicBlock* current : GetReversePostOrder()) { if (current != exit_block_ && current != entry_block_ && current != first) { DCHECK(current->GetTryCatchInformation() == nullptr); DCHECK(current->GetGraph() == this); @@ -2242,7 +2234,7 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { } static void CheckAgainstUpperBound(ReferenceTypeInfo rti, ReferenceTypeInfo upper_bound_rti) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { if (rti.IsValid()) { DCHECK(upper_bound_rti.IsSupertypeOf(rti)) << " upper_bound_rti: " << upper_bound_rti @@ -2295,7 +2287,7 @@ std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { ScopedObjectAccess soa(Thread::Current()); os << "[" << " is_valid=" << rhs.IsValid() - << " type=" << (!rhs.IsValid() ? "?" : PrettyClass(rhs.GetTypeHandle().Get())) + << " type=" << (!rhs.IsValid() ? "?" : mirror::Class::PrettyClass(rhs.GetTypeHandle().Get())) << " is_exact=" << rhs.IsExact() << " ]"; return os; @@ -2500,12 +2492,8 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { LoadKind load_kind = GetLoadKind(); if (HasAddress(load_kind)) { return GetAddress() == other_load_string->GetAddress(); - } else if (HasStringReference(load_kind)) { - return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile()); } else { - DCHECK(HasDexCacheReference(load_kind)) << load_kind; - // If the string indexes and dex files are the same, dex cache element offsets - // must also be the same, so we don't need to compare them. + DCHECK(HasStringReference(load_kind)) << load_kind; return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile()); } } @@ -2535,8 +2523,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { return os << "BootImageAddress"; case HLoadString::LoadKind::kDexCacheAddress: return os << "DexCacheAddress"; - case HLoadString::LoadKind::kDexCachePcRelative: - return os << "DexCachePcRelative"; + case HLoadString::LoadKind::kBssEntry: + return os << "BssEntry"; case HLoadString::LoadKind::kDexCacheViaMethod: return os << "DexCacheViaMethod"; default: diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index dfa8276651..6a45149509 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -24,20 +24,22 @@ #include "base/arena_bit_vector.h" #include "base/arena_containers.h" #include "base/arena_object.h" +#include "base/array_ref.h" +#include "base/iteration_range.h" #include "base/stl_util.h" +#include "base/transform_array_ref.h" #include "dex_file.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "handle.h" #include "handle_scope.h" #include "invoke_type.h" +#include "intrinsics_enum.h" #include "locations.h" #include "method_reference.h" #include "mirror/class.h" #include "offsets.h" #include "primitive.h" -#include "utils/array_ref.h" #include "utils/intrusive_forward_list.h" -#include "utils/transform_array_ref.h" namespace art { @@ -109,6 +111,9 @@ enum IfCondition { kCondBE, // <= kCondA, // > kCondAE, // >= + // First and last aliases. + kCondFirst = kCondEQ, + kCondLast = kCondAE, }; enum GraphAnalysisResult { @@ -171,7 +176,7 @@ class ReferenceTypeInfo : ValueObject { static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact); - static ReferenceTypeInfo Create(TypeHandle type_handle) SHARED_REQUIRES(Locks::mutator_lock_) { + static ReferenceTypeInfo Create(TypeHandle type_handle) REQUIRES_SHARED(Locks::mutator_lock_) { return Create(type_handle, type_handle->CannotBeAssignedFromOtherTypes()); } @@ -191,49 +196,49 @@ class ReferenceTypeInfo : ValueObject { bool IsExact() const { return is_exact_; } - bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsObjectClass() const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); return GetTypeHandle()->IsObjectClass(); } - bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsStringClass() const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); return GetTypeHandle()->IsStringClass(); } - bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsObjectArray() const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass(); } - bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsInterface() const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); return GetTypeHandle()->IsInterface(); } - bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsArrayClass() const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); return GetTypeHandle()->IsArrayClass(); } - bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsPrimitiveArrayClass() const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); return GetTypeHandle()->IsPrimitiveArray(); } - bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsNonPrimitiveArrayClass() const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray(); } - bool CanArrayHold(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + bool CanArrayHold(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); if (!IsExact()) return false; if (!IsArrayClass()) return false; return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get()); } - bool CanArrayHoldValuesOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + bool CanArrayHoldValuesOf(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); if (!IsExact()) return false; if (!IsArrayClass()) return false; @@ -244,13 +249,13 @@ class ReferenceTypeInfo : ValueObject { Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } - bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsSupertypeOf(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); DCHECK(rti.IsValid()); return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); } - bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); DCHECK(rti.IsValid()); return GetTypeHandle().Get() != rti.GetTypeHandle().Get() && @@ -260,7 +265,7 @@ class ReferenceTypeInfo : ValueObject { // Returns true if the type information provide the same amount of details. // Note that it does not mean that the instructions have the same actual type // (because the type can be the result of a merge). - bool IsEqual(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsEqual(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) { if (!IsValid() && !rti.IsValid()) { // Invalid types are equal. return true; @@ -332,7 +337,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { } // Acquires and stores RTI of inexact Object to be used when creating HNullConstant. - void InitializeInexactObjectRTI(StackHandleScopeCollection* handles); + void InitializeInexactObjectRTI(VariableSizedHandleScope* handles); ArenaAllocator* GetArena() const { return arena_; } const ArenaVector<HBasicBlock*>& GetBlocks() const { return blocks_; } @@ -456,10 +461,23 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return reverse_post_order_; } + ArrayRef<HBasicBlock* const> GetReversePostOrderSkipEntryBlock() { + DCHECK(GetReversePostOrder()[0] == entry_block_); + return ArrayRef<HBasicBlock* const>(GetReversePostOrder()).SubArray(1); + } + + IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetPostOrder() const { + return ReverseRange(GetReversePostOrder()); + } + const ArenaVector<HBasicBlock*>& GetLinearOrder() const { return linear_order_; } + IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetLinearPostOrder() const { + return ReverseRange(GetLinearOrder()); + } + bool HasBoundsChecks() const { return has_bounds_checks_; } @@ -575,7 +593,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // List of blocks to perform a reverse post order tree traversal. ArenaVector<HBasicBlock*> reverse_post_order_; - // List of blocks to perform a linear order tree traversal. + // List of blocks to perform a linear order tree traversal. Unlike the reverse + // post order, this order is not incrementally kept up-to-date. ArenaVector<HBasicBlock*> linear_order_; HBasicBlock* entry_block_; @@ -827,7 +846,7 @@ static constexpr uint32_t kInvalidBlockId = static_cast<uint32_t>(-1); class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { public: - HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc) + explicit HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc) : graph_(graph), predecessors_(graph->GetArena()->Adapter(kArenaAllocPredecessors)), successors_(graph->GetArena()->Adapter(kArenaAllocSuccessors)), @@ -1311,7 +1330,8 @@ class HLoopInformationOutwardIterator : public ValueObject { #else #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ M(MipsComputeBaseMethodAddress, Instruction) \ - M(MipsDexCacheArraysBase, Instruction) + M(MipsDexCacheArraysBase, Instruction) \ + M(MipsPackedSwitch, Instruction) #endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) @@ -1925,6 +1945,22 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return !HasEnvironmentUses() && GetUses().HasExactlyOneElement(); } + bool IsRemovable() const { + return + !HasSideEffects() && + !CanThrow() && + !IsSuspendCheck() && + !IsControlFlow() && + !IsNativeDebugInfo() && + !IsParameterValue() && + // If we added an explicit barrier then we should keep it. + !IsMemoryBarrier(); + } + + bool IsDeadAndRemovable() const { + return IsRemovable() && !HasUses(); + } + // Does this instruction strictly dominate `other_instruction`? // Returns false if this instruction and `other_instruction` are the same. // Aborts if this instruction and `other_instruction` are both phis. @@ -2074,10 +2110,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // to the current method. Such instructions are: // (1): Instructions that require an environment, as calling the runtime requires // to walk the stack and have the current method stored at a specific stack address. - // (2): Object literals like classes and strings, that are loaded from the dex cache - // fields of the current method. + // (2): HCurrentMethod, potentially used by HInvokeStaticOrDirect, HLoadString, or HLoadClass + // to access the dex cache. bool NeedsCurrentMethod() const { - return NeedsEnvironment() || IsLoadClass() || IsLoadString(); + return NeedsEnvironment() || IsCurrentMethod(); } // Returns whether the code generation of the instruction will require to have access @@ -3679,17 +3715,6 @@ class HNewInstance FINAL : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HNewInstance); }; -enum class Intrinsics { -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ - k ## Name, -#include "intrinsics_list.h" - kNone, - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS -}; -std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic); - enum IntrinsicNeedsEnvironmentOrCache { kNoEnvironmentOrCache, // Intrinsic does not require an environment or dex cache. kNeedsEnvironmentOrCache // Intrinsic requires an environment or requires a dex cache. @@ -3731,8 +3756,8 @@ class HInvoke : public HInstruction { uint32_t GetDexMethodIndex() const { return dex_method_index_; } const DexFile& GetDexFile() const { return GetEnvironment()->GetDexFile(); } - InvokeType GetOriginalInvokeType() const { - return GetPackedField<OriginalInvokeTypeField>(); + InvokeType GetInvokeType() const { + return GetPackedField<InvokeTypeField>(); } Intrinsics GetIntrinsic() const { @@ -3766,21 +3791,22 @@ class HInvoke : public HInstruction { bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; } + ArtMethod* GetResolvedMethod() const { return resolved_method_; } + DECLARE_ABSTRACT_INSTRUCTION(Invoke); protected: - static constexpr size_t kFieldOriginalInvokeType = kNumberOfGenericPackedBits; - static constexpr size_t kFieldOriginalInvokeTypeSize = + static constexpr size_t kFieldInvokeType = kNumberOfGenericPackedBits; + static constexpr size_t kFieldInvokeTypeSize = MinimumBitsToStore(static_cast<size_t>(kMaxInvokeType)); static constexpr size_t kFieldReturnType = - kFieldOriginalInvokeType + kFieldOriginalInvokeTypeSize; + kFieldInvokeType + kFieldInvokeTypeSize; static constexpr size_t kFieldReturnTypeSize = MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast)); static constexpr size_t kFlagCanThrow = kFieldReturnType + kFieldReturnTypeSize; static constexpr size_t kNumberOfInvokePackedBits = kFlagCanThrow + 1; static_assert(kNumberOfInvokePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using OriginalInvokeTypeField = - BitField<InvokeType, kFieldOriginalInvokeType, kFieldOriginalInvokeTypeSize>; + using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>; using ReturnTypeField = BitField<Primitive::Type, kFieldReturnType, kFieldReturnTypeSize>; HInvoke(ArenaAllocator* arena, @@ -3789,23 +3815,26 @@ class HInvoke : public HInstruction { Primitive::Type return_type, uint32_t dex_pc, uint32_t dex_method_index, - InvokeType original_invoke_type) + ArtMethod* resolved_method, + InvokeType invoke_type) : HInstruction( SideEffects::AllExceptGCDependency(), dex_pc), // Assume write/read on all fields/arrays. number_of_arguments_(number_of_arguments), + resolved_method_(resolved_method), inputs_(number_of_arguments + number_of_other_inputs, arena->Adapter(kArenaAllocInvokeInputs)), dex_method_index_(dex_method_index), intrinsic_(Intrinsics::kNone), intrinsic_optimizations_(0) { SetPackedField<ReturnTypeField>(return_type); - SetPackedField<OriginalInvokeTypeField>(original_invoke_type); + SetPackedField<InvokeTypeField>(invoke_type); SetPackedFlag<kFlagCanThrow>(true); } void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); } uint32_t number_of_arguments_; + ArtMethod* const resolved_method_; ArenaVector<HUserRecord<HInstruction*>> inputs_; const uint32_t dex_method_index_; Intrinsics intrinsic_; @@ -3831,6 +3860,7 @@ class HInvokeUnresolved FINAL : public HInvoke { return_type, dex_pc, dex_method_index, + nullptr, invoke_type) { } @@ -3924,10 +3954,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { Primitive::Type return_type, uint32_t dex_pc, uint32_t method_index, - MethodReference target_method, + ArtMethod* resolved_method, DispatchInfo dispatch_info, - InvokeType original_invoke_type, - InvokeType optimized_invoke_type, + InvokeType invoke_type, + MethodReference target_method, ClinitCheckRequirement clinit_check_requirement) : HInvoke(arena, number_of_arguments, @@ -3939,10 +3969,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { return_type, dex_pc, method_index, - original_invoke_type), + resolved_method, + invoke_type), target_method_(target_method), dispatch_info_(dispatch_info) { - SetPackedField<OptimizedInvokeTypeField>(optimized_invoke_type); SetPackedField<ClinitCheckRequirementField>(clinit_check_requirement); } @@ -4006,14 +4036,6 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); } bool HasSpecialInput() const { return GetNumberOfArguments() != InputCount(); } - InvokeType GetOptimizedInvokeType() const { - return GetPackedField<OptimizedInvokeTypeField>(); - } - - void SetOptimizedInvokeType(InvokeType invoke_type) { - SetPackedField<OptimizedInvokeTypeField>(invoke_type); - } - MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; } CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; } bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; } @@ -4035,12 +4057,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { } } bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; } - MethodReference GetTargetMethod() const { return target_method_; } - void SetTargetMethod(MethodReference method) { target_method_ = method; } - int32_t GetStringInitOffset() const { + QuickEntrypointEnum GetStringInitEntryPoint() const { DCHECK(IsStringInit()); - return dispatch_info_.method_load_data; + return static_cast<QuickEntrypointEnum>(dispatch_info_.method_load_data); } uint64_t GetMethodAddress() const { @@ -4064,7 +4084,11 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Is this instruction a call to a static method? bool IsStatic() const { - return GetOriginalInvokeType() == kStatic; + return GetInvokeType() == kStatic; + } + + MethodReference GetTargetMethod() const { + return target_method_; } // Remove the HClinitCheck or the replacement HLoadClass (set as last input by @@ -4106,26 +4130,18 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { void RemoveInputAt(size_t index); private: - static constexpr size_t kFieldOptimizedInvokeType = kNumberOfInvokePackedBits; - static constexpr size_t kFieldOptimizedInvokeTypeSize = - MinimumBitsToStore(static_cast<size_t>(kMaxInvokeType)); - static constexpr size_t kFieldClinitCheckRequirement = - kFieldOptimizedInvokeType + kFieldOptimizedInvokeTypeSize; + static constexpr size_t kFieldClinitCheckRequirement = kNumberOfInvokePackedBits; static constexpr size_t kFieldClinitCheckRequirementSize = MinimumBitsToStore(static_cast<size_t>(ClinitCheckRequirement::kLast)); static constexpr size_t kNumberOfInvokeStaticOrDirectPackedBits = kFieldClinitCheckRequirement + kFieldClinitCheckRequirementSize; static_assert(kNumberOfInvokeStaticOrDirectPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using OptimizedInvokeTypeField = - BitField<InvokeType, kFieldOptimizedInvokeType, kFieldOptimizedInvokeTypeSize>; using ClinitCheckRequirementField = BitField<ClinitCheckRequirement, kFieldClinitCheckRequirement, kFieldClinitCheckRequirementSize>; - // The target method may refer to different dex file or method index than the original - // invoke. This happens for sharpened calls and for calls where a method was redeclared - // in derived class to increase visibility. + // Cached values of the resolved method, to avoid needing the mutator lock. MethodReference target_method_; DispatchInfo dispatch_info_; @@ -4141,8 +4157,16 @@ class HInvokeVirtual FINAL : public HInvoke { Primitive::Type return_type, uint32_t dex_pc, uint32_t dex_method_index, + ArtMethod* resolved_method, uint32_t vtable_index) - : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index, kVirtual), + : HInvoke(arena, + number_of_arguments, + 0u, + return_type, + dex_pc, + dex_method_index, + resolved_method, + kVirtual), vtable_index_(vtable_index) {} bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { @@ -4155,6 +4179,7 @@ class HInvokeVirtual FINAL : public HInvoke { DECLARE_INSTRUCTION(InvokeVirtual); private: + // Cached value of the resolved method, to avoid needing the mutator lock. const uint32_t vtable_index_; DISALLOW_COPY_AND_ASSIGN(HInvokeVirtual); @@ -4167,8 +4192,16 @@ class HInvokeInterface FINAL : public HInvoke { Primitive::Type return_type, uint32_t dex_pc, uint32_t dex_method_index, + ArtMethod* resolved_method, uint32_t imt_index) - : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index, kInterface), + : HInvoke(arena, + number_of_arguments, + 0u, + return_type, + dex_pc, + dex_method_index, + resolved_method, + kInterface), imt_index_(imt_index) {} bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { @@ -4182,6 +4215,7 @@ class HInvokeInterface FINAL : public HInvoke { DECLARE_INSTRUCTION(InvokeInterface); private: + // Cached value of the resolved method, to avoid needing the mutator lock. const uint32_t imt_index_; DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); @@ -4363,7 +4397,7 @@ class HDiv FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc) - : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls(), dex_pc) {} + : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} template <typename T> T ComputeIntegral(T x, T y) const { @@ -4398,11 +4432,6 @@ class HDiv FINAL : public HBinaryOperation { ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } - static SideEffects SideEffectsForArchRuntimeCalls() { - // The generated code can use a runtime call. - return SideEffects::CanTriggerGC(); - } - DECLARE_INSTRUCTION(Div); private: @@ -4415,7 +4444,7 @@ class HRem FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc) - : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls(), dex_pc) {} + : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} template <typename T> T ComputeIntegral(T x, T y) const { @@ -4450,10 +4479,6 @@ class HRem FINAL : public HBinaryOperation { ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } - static SideEffects SideEffectsForArchRuntimeCalls() { - return SideEffects::CanTriggerGC(); - } - DECLARE_INSTRUCTION(Rem); private: @@ -4906,9 +4931,7 @@ class HTypeConversion FINAL : public HExpression<1> { public: // Instantiate a type conversion of `input` to `result_type`. HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc) - : HExpression(result_type, - SideEffectsForArchRuntimeCalls(input->GetType(), result_type), - dex_pc) { + : HExpression(result_type, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); // Invariant: We should never generate a conversion to a Boolean value. DCHECK_NE(Primitive::kPrimBoolean, result_type); @@ -4927,18 +4950,6 @@ class HTypeConversion FINAL : public HExpression<1> { // containing the result. If the input cannot be converted, return nullptr. HConstant* TryStaticEvaluation() const; - static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type input_type, - Primitive::Type result_type) { - // Some architectures may not require the 'GC' side effects, but at this point - // in the compilation process we do not know what architecture we will - // generate code for, so we must be conservative. - if ((Primitive::IsFloatingPointType(input_type) && Primitive::IsIntegralType(result_type)) - || (input_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(result_type))) { - return SideEffects::CanTriggerGC(); - } - return SideEffects::None(); - } - DECLARE_INSTRUCTION(TypeConversion); private: @@ -5020,9 +5031,7 @@ class HInstanceFieldGet FINAL : public HExpression<1> { const DexFile& dex_file, Handle<mirror::DexCache> dex_cache, uint32_t dex_pc) - : HExpression(field_type, - SideEffects::FieldReadOfType(field_type, is_volatile), - dex_pc), + : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc), field_info_(field_offset, field_type, is_volatile, @@ -5073,8 +5082,7 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { const DexFile& dex_file, Handle<mirror::DexCache> dex_cache, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), - dex_pc), + : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc), field_info_(field_offset, field_type, is_volatile, @@ -5441,7 +5449,8 @@ class HLoadClass FINAL : public HInstruction { bool is_referrers_class, uint32_t dex_pc, bool needs_access_check, - bool is_in_dex_cache) + bool is_in_dex_cache, + bool is_in_boot_image) : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), @@ -5455,6 +5464,7 @@ class HLoadClass FINAL : public HInstruction { is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod); SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagIsInDexCache>(is_in_dex_cache); + SetPackedFlag<kFlagIsInBootImage>(is_in_boot_image); SetPackedFlag<kFlagGenerateClInitCheck>(false); } @@ -5545,6 +5555,7 @@ class HLoadClass FINAL : public HInstruction { bool IsReferrersClass() const { return GetLoadKind() == LoadKind::kReferrersClass; } bool NeedsAccessCheck() const { return GetPackedFlag<kFlagNeedsAccessCheck>(); } bool IsInDexCache() const { return GetPackedFlag<kFlagIsInDexCache>(); } + bool IsInBootImage() const { return GetPackedFlag<kFlagIsInBootImage>(); } bool MustGenerateClinitCheck() const { return GetPackedFlag<kFlagGenerateClInitCheck>(); } void MarkInDexCache() { @@ -5554,6 +5565,10 @@ class HLoadClass FINAL : public HInstruction { SetSideEffects(SideEffects::None()); } + void MarkInBootImage() { + SetPackedFlag<kFlagIsInBootImage>(true); + } + void AddSpecialInput(HInstruction* special_input); using HInstruction::GetInputRecords; // Keep the const version visible. @@ -5571,9 +5586,10 @@ class HLoadClass FINAL : public HInstruction { private: static constexpr size_t kFlagNeedsAccessCheck = kNumberOfGenericPackedBits; static constexpr size_t kFlagIsInDexCache = kFlagNeedsAccessCheck + 1; + static constexpr size_t kFlagIsInBootImage = kFlagIsInDexCache + 1; // Whether this instruction must generate the initialization check. // Used for code generation. - static constexpr size_t kFlagGenerateClInitCheck = kFlagIsInDexCache + 1; + static constexpr size_t kFlagGenerateClInitCheck = kFlagIsInBootImage + 1; static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1; static constexpr size_t kFieldLoadKindSize = MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast)); @@ -5658,10 +5674,9 @@ class HLoadString FINAL : public HInstruction { // Used for strings outside the boot image referenced by JIT-compiled code. kDexCacheAddress, - // Load from resolved strings array in the dex cache using a PC-relative load. - // Used for strings outside boot image when we know that we can access - // the dex cache arrays using a PC-relative load. - kDexCachePcRelative, + // Load from an entry in the .bss section using a PC-relative load. + // Used for strings outside boot image when .bss is accessible with a PC-relative load. + kBssEntry, // Load from resolved strings array accessed through the class loaded from // the compiled method's own ArtMethod*. This is the default access type when @@ -5680,7 +5695,7 @@ class HLoadString FINAL : public HInstruction { string_index_(string_index) { SetPackedFlag<kFlagIsInDexCache>(false); SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod); - load_data_.ref.dex_file = &dex_file; + load_data_.dex_file_ = &dex_file; } void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) { @@ -5693,20 +5708,11 @@ class HLoadString FINAL : public HInstruction { const DexFile& dex_file, uint32_t string_index) { DCHECK(HasStringReference(load_kind)); - load_data_.ref.dex_file = &dex_file; + load_data_.dex_file_ = &dex_file; string_index_ = string_index; SetLoadKindInternal(load_kind); } - void SetLoadKindWithDexCacheReference(LoadKind load_kind, - const DexFile& dex_file, - uint32_t element_index) { - DCHECK(HasDexCacheReference(load_kind)); - load_data_.ref.dex_file = &dex_file; - load_data_.ref.dex_cache_element_index = element_index; - SetLoadKindInternal(load_kind); - } - LoadKind GetLoadKind() const { return GetPackedField<LoadKindField>(); } @@ -5718,8 +5724,6 @@ class HLoadString FINAL : public HInstruction { return string_index_; } - uint32_t GetDexCacheElementOffset() const; - uint64_t GetAddress() const { DCHECK(HasAddress(GetLoadKind())); return load_data_.address; @@ -5789,6 +5793,7 @@ class HLoadString FINAL : public HInstruction { static bool HasStringReference(LoadKind load_kind) { return load_kind == LoadKind::kBootImageLinkTimeAddress || load_kind == LoadKind::kBootImageLinkTimePcRelative || + load_kind == LoadKind::kBssEntry || load_kind == LoadKind::kDexCacheViaMethod; } @@ -5796,10 +5801,6 @@ class HLoadString FINAL : public HInstruction { return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress; } - static bool HasDexCacheReference(LoadKind load_kind) { - return load_kind == LoadKind::kDexCachePcRelative; - } - void SetLoadKindInternal(LoadKind load_kind); // The special input is the HCurrentMethod for kDexCacheViaMethod. @@ -5812,10 +5813,7 @@ class HLoadString FINAL : public HInstruction { uint32_t string_index_; union { - struct { - const DexFile* dex_file; // For string reference and dex cache reference. - uint32_t dex_cache_element_index; // Only for dex cache reference. - } ref; + const DexFile* dex_file_; // For string reference. uint64_t address; // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets. } load_data_; @@ -5825,15 +5823,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs); // Note: defined outside class to see operator<<(., HLoadString::LoadKind). inline const DexFile& HLoadString::GetDexFile() const { - DCHECK(HasStringReference(GetLoadKind()) || HasDexCacheReference(GetLoadKind())) - << GetLoadKind(); - return *load_data_.ref.dex_file; -} - -// Note: defined outside class to see operator<<(., HLoadString::LoadKind). -inline uint32_t HLoadString::GetDexCacheElementOffset() const { - DCHECK(HasDexCacheReference(GetLoadKind())) << GetLoadKind(); - return load_data_.ref.dex_cache_element_index; + DCHECK(HasStringReference(GetLoadKind())) << GetLoadKind(); + return *load_data_.dex_file_; } // Note: defined outside class to see operator<<(., HLoadString::LoadKind). @@ -5841,7 +5832,7 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kDexCachePcRelative || + GetLoadKind() == LoadKind::kBssEntry || GetLoadKind() == LoadKind::kBootImageLinkTimeAddress || GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, @@ -5895,9 +5886,7 @@ class HStaticFieldGet FINAL : public HExpression<1> { const DexFile& dex_file, Handle<mirror::DexCache> dex_cache, uint32_t dex_pc) - : HExpression(field_type, - SideEffects::FieldReadOfType(field_type, is_volatile), - dex_pc), + : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc), field_info_(field_offset, field_type, is_volatile, @@ -5945,8 +5934,7 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { const DexFile& dex_file, Handle<mirror::DexCache> dex_cache, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), - dex_pc), + : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc), field_info_(field_offset, field_type, is_volatile, @@ -6223,7 +6211,7 @@ class HInstanceOf FINAL : public HExpression<2> { class HBoundType FINAL : public HExpression<1> { public: - HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc) + explicit HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc) : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc), upper_bound_(ReferenceTypeInfo::CreateInvalid()) { SetPackedFlag<kFlagUpperCanBeNull>(true); @@ -6644,95 +6632,6 @@ class HGraphDelegateVisitor : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(HGraphDelegateVisitor); }; -class HInsertionOrderIterator : public ValueObject { - public: - explicit HInsertionOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {} - - bool Done() const { return index_ == graph_.GetBlocks().size(); } - HBasicBlock* Current() const { return graph_.GetBlocks()[index_]; } - void Advance() { ++index_; } - - private: - const HGraph& graph_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HInsertionOrderIterator); -}; - -class HReversePostOrderIterator : public ValueObject { - public: - explicit HReversePostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) { - // Check that reverse post order of the graph has been built. - DCHECK(!graph.GetReversePostOrder().empty()); - } - - bool Done() const { return index_ == graph_.GetReversePostOrder().size(); } - HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_]; } - void Advance() { ++index_; } - - private: - const HGraph& graph_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HReversePostOrderIterator); -}; - -class HPostOrderIterator : public ValueObject { - public: - explicit HPostOrderIterator(const HGraph& graph) - : graph_(graph), index_(graph_.GetReversePostOrder().size()) { - // Check that reverse post order of the graph has been built. - DCHECK(!graph.GetReversePostOrder().empty()); - } - - bool Done() const { return index_ == 0; } - HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_ - 1u]; } - void Advance() { --index_; } - - private: - const HGraph& graph_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HPostOrderIterator); -}; - -class HLinearPostOrderIterator : public ValueObject { - public: - explicit HLinearPostOrderIterator(const HGraph& graph) - : order_(graph.GetLinearOrder()), index_(graph.GetLinearOrder().size()) {} - - bool Done() const { return index_ == 0; } - - HBasicBlock* Current() const { return order_[index_ - 1u]; } - - void Advance() { - --index_; - DCHECK_GE(index_, 0U); - } - - private: - const ArenaVector<HBasicBlock*>& order_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator); -}; - -class HLinearOrderIterator : public ValueObject { - public: - explicit HLinearOrderIterator(const HGraph& graph) - : order_(graph.GetLinearOrder()), index_(0) {} - - bool Done() const { return index_ == order_.size(); } - HBasicBlock* Current() const { return order_[index_]; } - void Advance() { ++index_; } - - private: - const ArenaVector<HBasicBlock*>& order_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator); -}; - // Iterator over the blocks that art part of the loop. Includes blocks part // of an inner loop. The order in which the blocks are iterated is on their // block id. diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h index de77245e17..36431c1fb9 100644 --- a/compiler/optimizing/nodes_mips.h +++ b/compiler/optimizing/nodes_mips.h @@ -66,6 +66,41 @@ class HMipsDexCacheArraysBase : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase); }; +// Mips version of HPackedSwitch that holds a pointer to the base method address. +class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { + public: + HMipsPackedSwitch(int32_t start_value, + int32_t num_entries, + HInstruction* input, + HMipsComputeBaseMethodAddress* method_base, + uint32_t dex_pc) + : HTemplateInstruction(SideEffects::None(), dex_pc), + start_value_(start_value), + num_entries_(num_entries) { + SetRawInputAt(0, input); + SetRawInputAt(1, method_base); + } + + bool IsControlFlow() const OVERRIDE { return true; } + + int32_t GetStartValue() const { return start_value_; } + + int32_t GetNumEntries() const { return num_entries_; } + + HBasicBlock* GetDefaultBlock() const { + // Last entry is the default block. + return GetBlock()->GetSuccessors()[num_entries_]; + } + + DECLARE_INSTRUCTION(MipsPackedSwitch); + + private: + const int32_t start_value_; + const int32_t num_entries_; + + DISALLOW_COPY_AND_ASSIGN(HMipsPackedSwitch); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index 8bd8667f84..814202e97b 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -17,6 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_NODES_SHARED_H_ #define ART_COMPILER_OPTIMIZING_NODES_SHARED_H_ +// This `#include` should never be used by compilation, as this file (`nodes_shared.h`) is included +// in `nodes.h`. However it helps editing tools (e.g. YouCompleteMe) by giving them better context +// (defining `HInstruction` and co). +#include "nodes.h" + namespace art { class HMultiplyAccumulate FINAL : public HExpression<3> { @@ -117,10 +122,15 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation { // This instruction computes an intermediate address pointing in the 'middle' of an object. The // result pointer cannot be handled by GC, so extra care is taken to make sure that this value is // never used across anything that can trigger GC. +// The result of this instruction is not a pointer in the sense of `Primitive::kPrimNot`. So we +// represent it by the type `Primitive::kPrimInt`. class HIntermediateAddress FINAL : public HExpression<2> { public: HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc) - : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) { + : HExpression(Primitive::kPrimInt, SideEffects::DependsOnGC(), dex_pc) { + DCHECK_EQ(Primitive::ComponentSize(Primitive::kPrimInt), + Primitive::ComponentSize(Primitive::kPrimNot)) + << "kPrimInt and kPrimNot have different sizes."; SetRawInputAt(0, base_address); SetRawInputAt(1, offset); } diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 2f59d4cd5b..0819fb01ac 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -37,7 +37,10 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> { virtual ~HOptimization() {} - // Return the name of the pass. + // Return the name of the pass. Pass names for a single HOptimization should be of form + // <optimization_name> or <optimization_name>$<pass_name> for common <optimization_name> prefix. + // Example: 'instruction_simplifier', 'instruction_simplifier$after_bce', + // 'instruction_simplifier$before_codegen'. const char* GetPassName() const { return pass_name_; } // Perform the analysis itself. diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index a6d234d739..013e110b87 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -19,6 +19,7 @@ #include "arch/instruction_set.h" #include "cfi_test.h" +#include "driver/compiler_options.h" #include "gtest/gtest.h" #include "optimizing/code_generator.h" #include "optimizing/optimizing_unit_test.h" @@ -51,7 +52,7 @@ class OptimizingCFITest : public CFITest { void SetUpFrame(InstructionSet isa) { // Setup simple context. std::string error; - isa_features_.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); + isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error); graph_ = CreateGraph(&allocator_); // Generate simple frame with some spills. code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_); @@ -157,13 +158,28 @@ class OptimizingCFITest : public CFITest { TestImpl(isa, #isa, expected_asm, expected_cfi); \ } +// TODO(VIXL): Support this test for the VIXL backend. +#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND) TEST_ISA(kThumb2) +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 TEST_ISA(kArm64) +#endif +#ifdef ART_ENABLE_CODEGEN_x86 TEST_ISA(kX86) +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 TEST_ISA(kX86_64) +#endif +#ifdef ART_ENABLE_CODEGEN_mips TEST_ISA(kMips) +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 TEST_ISA(kMips64) +#endif +// TODO(VIXL): Support this test for the VIXL backend. +#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND) TEST_F(OptimizingCFITest, kThumb2Adjust) { std::vector<uint8_t> expected_asm( expected_asm_kThumb2_adjust, @@ -184,7 +200,9 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) { Finish(); Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi); } +#endif +#ifdef ART_ENABLE_CODEGEN_mips TEST_F(OptimizingCFITest, kMipsAdjust) { // One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. static constexpr size_t kNumNops = 1u + (1u << 15); @@ -212,7 +230,9 @@ TEST_F(OptimizingCFITest, kMipsAdjust) { Finish(); Check(kMips, "kMips_adjust", expected_asm, expected_cfi); } +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 TEST_F(OptimizingCFITest, kMips64Adjust) { // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. static constexpr size_t kNumNops = 1u + (1u << 15); @@ -240,6 +260,7 @@ TEST_F(OptimizingCFITest, kMips64Adjust) { Finish(); Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi); } +#endif #endif // ART_TARGET_ANDROID diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 05eb06333e..f735dc8cb3 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -1,10 +1,10 @@ static constexpr uint8_t expected_asm_kThumb2[] = { - 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0, + 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x0B, 0xB0, 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, }; static constexpr uint8_t expected_cfi_kThumb2[] = { 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14, - 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42, + 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x0A, 0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E, 0x40, }; @@ -19,20 +19,19 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x00000006: .cfi_offset_extended: r81 at cfa-16 // 0x00000006: sub sp, sp, #44 // 0x00000008: .cfi_def_cfa_offset: 64 -// 0x00000008: str r0, [sp, #0] -// 0x0000000a: .cfi_remember_state -// 0x0000000a: add sp, sp, #44 -// 0x0000000c: .cfi_def_cfa_offset: 20 -// 0x0000000c: vpop.f32 {s16-s17} -// 0x00000010: .cfi_def_cfa_offset: 12 -// 0x00000010: .cfi_restore_extended: r80 -// 0x00000010: .cfi_restore_extended: r81 -// 0x00000010: pop {r5, r6, pc} -// 0x00000012: .cfi_restore_state -// 0x00000012: .cfi_def_cfa_offset: 64 +// 0x00000008: .cfi_remember_state +// 0x00000008: add sp, sp, #44 +// 0x0000000a: .cfi_def_cfa_offset: 20 +// 0x0000000a: vpop.f32 {s16-s17} +// 0x0000000e: .cfi_def_cfa_offset: 12 +// 0x0000000e: .cfi_restore_extended: r80 +// 0x0000000e: .cfi_restore_extended: r81 +// 0x0000000e: pop {r5, r6, pc} +// 0x00000010: .cfi_restore_state +// 0x00000010: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kArm64[] = { - 0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9, + 0xFF, 0x03, 0x01, 0xD1, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9, 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9, 0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, }; @@ -41,7 +40,7 @@ static constexpr uint8_t expected_cfi_kArm64[] = { 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49, 0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; -// 0x00000000: str x0, [sp, #-64]! +// 0x00000000: sub sp, sp, #0x40 (64) // 0x00000004: .cfi_def_cfa_offset: 64 // 0x00000004: str x20, [sp, #40] // 0x00000008: .cfi_offset: r20 at cfa-24 @@ -67,12 +66,12 @@ static constexpr uint8_t expected_cfi_kArm64[] = { // 0x00000024: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kX86[] = { - 0x56, 0x55, 0x83, 0xEC, 0x34, 0x89, 0x04, 0x24, 0x83, 0xC4, 0x34, 0x5D, + 0x56, 0x55, 0x83, 0xEC, 0x34, 0x83, 0xC4, 0x34, 0x5D, 0x5E, 0xC3, }; static constexpr uint8_t expected_cfi_kX86[] = { 0x41, 0x0E, 0x08, 0x86, 0x02, 0x41, 0x0E, 0x0C, 0x85, 0x03, 0x43, 0x0E, - 0x40, 0x43, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E, + 0x40, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E, 0x04, 0xC6, 0x41, 0x0B, 0x0E, 0x40, }; // 0x00000000: push esi @@ -83,29 +82,28 @@ static constexpr uint8_t expected_cfi_kX86[] = { // 0x00000002: .cfi_offset: r5 at cfa-12 // 0x00000002: sub esp, 52 // 0x00000005: .cfi_def_cfa_offset: 64 -// 0x00000005: mov [esp], eax -// 0x00000008: .cfi_remember_state -// 0x00000008: add esp, 52 -// 0x0000000b: .cfi_def_cfa_offset: 12 -// 0x0000000b: pop ebp -// 0x0000000c: .cfi_def_cfa_offset: 8 -// 0x0000000c: .cfi_restore: r5 -// 0x0000000c: pop esi -// 0x0000000d: .cfi_def_cfa_offset: 4 -// 0x0000000d: .cfi_restore: r6 -// 0x0000000d: ret -// 0x0000000e: .cfi_restore_state -// 0x0000000e: .cfi_def_cfa_offset: 64 +// 0x00000005: .cfi_remember_state +// 0x00000005: add esp, 52 +// 0x00000008: .cfi_def_cfa_offset: 12 +// 0x00000008: pop ebp +// 0x0000000a: .cfi_def_cfa_offset: 8 +// 0x0000000a: .cfi_restore: r5 +// 0x0000000a: pop esi +// 0x0000000b: .cfi_def_cfa_offset: 4 +// 0x0000000b: .cfi_restore: r6 +// 0x0000000b: ret +// 0x0000000c: .cfi_restore_state +// 0x0000000c: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kX86_64[] = { 0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24, - 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x48, 0x89, 0x3C, 0x24, + 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3, }; static constexpr uint8_t expected_cfi_kX86_64[] = { 0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E, - 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x44, 0x0A, 0x47, 0xDD, 0x47, + 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x0A, 0x47, 0xDD, 0x47, 0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6, 0x41, 0x0B, 0x0E, 0x40, }; @@ -121,35 +119,34 @@ static constexpr uint8_t expected_cfi_kX86_64[] = { // 0x0000000d: .cfi_offset: r30 at cfa-32 // 0x0000000d: movsd [rsp + 24], xmm12 // 0x00000014: .cfi_offset: r29 at cfa-40 -// 0x00000014: movq [rsp], rdi -// 0x00000018: .cfi_remember_state -// 0x00000018: movsd xmm12, [rsp + 24] -// 0x0000001f: .cfi_restore: r29 -// 0x0000001f: movsd xmm13, [rsp + 32] -// 0x00000026: .cfi_restore: r30 -// 0x00000026: addq rsp, 40 -// 0x0000002a: .cfi_def_cfa_offset: 24 -// 0x0000002a: pop rbx -// 0x0000002b: .cfi_def_cfa_offset: 16 -// 0x0000002b: .cfi_restore: r3 -// 0x0000002b: pop rbp -// 0x0000002c: .cfi_def_cfa_offset: 8 -// 0x0000002c: .cfi_restore: r6 -// 0x0000002c: ret -// 0x0000002d: .cfi_restore_state -// 0x0000002d: .cfi_def_cfa_offset: 64 +// 0x00000014: .cfi_remember_state +// 0x00000014: movsd xmm12, [rsp + 24] +// 0x0000001c: .cfi_restore: r29 +// 0x0000001c: movsd xmm13, [rsp + 32] +// 0x00000022: .cfi_restore: r30 +// 0x00000022: addq rsp, 40 +// 0x00000026: .cfi_def_cfa_offset: 24 +// 0x00000026: pop rbx +// 0x00000027: .cfi_def_cfa_offset: 16 +// 0x00000027: .cfi_restore: r3 +// 0x00000027: pop rbp +// 0x00000028: .cfi_def_cfa_offset: 8 +// 0x00000028: .cfi_restore: r6 +// 0x00000028: ret +// 0x00000029: .cfi_restore_state +// 0x00000029: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kMips[] = { 0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF, 0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7, - 0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, + 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F, 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, - 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x09, 0x00, 0xE0, 0x03, 0x40, 0x00, 0xBD, 0x27, }; static constexpr uint8_t expected_cfi_kMips[] = { 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, - 0x4C, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48, - 0x0B, 0x0E, 0x40, + 0x48, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, + 0x0E, 0x40, }; // 0x00000000: addiu r29, r29, -64 // 0x00000004: .cfi_def_cfa_offset: 64 @@ -161,34 +158,33 @@ static constexpr uint8_t expected_cfi_kMips[] = { // 0x00000010: .cfi_offset: r16 at cfa-12 // 0x00000010: sdc1 f22, +40(r29) // 0x00000014: sdc1 f20, +32(r29) -// 0x00000018: sw r4, +0(r29) -// 0x0000001c: .cfi_remember_state -// 0x0000001c: lw r31, +60(r29) -// 0x00000020: .cfi_restore: r31 -// 0x00000020: lw r17, +56(r29) -// 0x00000024: .cfi_restore: r17 -// 0x00000024: lw r16, +52(r29) -// 0x00000028: .cfi_restore: r16 -// 0x00000028: ldc1 f22, +40(r29) -// 0x0000002c: ldc1 f20, +32(r29) +// 0x00000018: .cfi_remember_state +// 0x00000018: lw r31, +60(r29) +// 0x0000001c: .cfi_restore: r31 +// 0x0000001c: lw r17, +56(r29) +// 0x00000020: .cfi_restore: r17 +// 0x00000020: lw r16, +52(r29) +// 0x00000024: .cfi_restore: r16 +// 0x00000024: ldc1 f22, +40(r29) +// 0x00000028: ldc1 f20, +32(r29) +// 0x0000002c: jr r31 // 0x00000030: addiu r29, r29, 64 // 0x00000034: .cfi_def_cfa_offset: 0 -// 0x00000034: jr r31 -// 0x00000038: nop -// 0x0000003c: .cfi_restore_state -// 0x0000003c: .cfi_def_cfa_offset: 64 +// 0x00000034: .cfi_restore_state +// 0x00000034: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kMips64[] = { 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, - 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x18, 0x00, 0xBD, 0x67, + 0xE8, 0xFF, 0xBD, 0x67, 0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, }; + static constexpr uint8_t expected_cfi_kMips64[] = { 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, - 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x44, 0x0A, 0x44, + 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, }; @@ -206,29 +202,28 @@ static constexpr uint8_t expected_cfi_kMips64[] = { // 0x00000018: .cfi_offset: r56 at cfa-40 // 0x00000018: daddiu r29, r29, -24 // 0x0000001c: .cfi_def_cfa_offset: 64 -// 0x0000001c: sd r4, +0(r29) -// 0x00000020: .cfi_remember_state -// 0x00000020: daddiu r29, r29, 24 -// 0x00000024: .cfi_def_cfa_offset: 40 -// 0x00000024: ldc1 f24, +0(r29) -// 0x00000028: .cfi_restore: r56 -// 0x00000028: ldc1 f25, +8(r29) -// 0x0000002c: .cfi_restore: r57 -// 0x0000002c: ld r16, +16(r29) -// 0x00000030: .cfi_restore: r16 -// 0x00000030: ld r17, +24(r29) -// 0x00000034: .cfi_restore: r17 -// 0x00000034: ld r31, +32(r29) -// 0x00000038: .cfi_restore: r31 -// 0x00000038: daddiu r29, r29, 40 -// 0x0000003c: .cfi_def_cfa_offset: 0 -// 0x0000003c: jr r31 -// 0x00000040: nop -// 0x00000044: .cfi_restore_state -// 0x00000044: .cfi_def_cfa_offset: 64 +// 0x0000001c: .cfi_remember_state +// 0x0000001c: daddiu r29, r29, 24 +// 0x00000020: .cfi_def_cfa_offset: 40 +// 0x00000020: ldc1 f24, +0(r29) +// 0x00000024: .cfi_restore: r56 +// 0x00000024: ldc1 f25, +8(r29) +// 0x00000028: .cfi_restore: r57 +// 0x00000028: ld r16, +16(r29) +// 0x0000002c: .cfi_restore: r16 +// 0x0000002c: ld r17, +24(r29) +// 0x00000030: .cfi_restore: r17 +// 0x00000030: ld r31, +32(r29) +// 0x00000034: .cfi_restore: r31 +// 0x00000034: daddiu r29, r29, 40 +// 0x00000038: .cfi_def_cfa_offset: 0 +// 0x00000038: jr r31 +// 0x0000003c: nop +// 0x00000040: .cfi_restore_state +// 0x00000040: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kThumb2_adjust[] = { - 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x00, 0x28, + 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x28, 0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, @@ -244,7 +239,7 @@ static constexpr uint8_t expected_asm_kThumb2_adjust[] = { }; static constexpr uint8_t expected_cfi_kThumb2_adjust[] = { 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14, - 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A, + 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x86, 0x0A, 0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E, 0x40, }; @@ -259,9 +254,9 @@ static constexpr uint8_t expected_cfi_kThumb2_adjust[] = { // 0x00000006: .cfi_offset_extended: r81 at cfa-16 // 0x00000006: sub sp, sp, #44 // 0x00000008: .cfi_def_cfa_offset: 64 -// 0x00000008: str r0, [sp, #0] -// 0x0000000a: cmp r0, #0 -// 0x0000000c: beq +128 (0x00000090) +// 0x00000008: cmp r0, #0 +// 0x0000000a: beq +128 (0x00000090) +// 0x0000000c: ldr r0, [r0, #0] // 0x0000000e: ldr r0, [r0, #0] // 0x00000010: ldr r0, [r0, #0] // 0x00000012: ldr r0, [r0, #0] @@ -326,36 +321,34 @@ static constexpr uint8_t expected_cfi_kThumb2_adjust[] = { // 0x00000088: ldr r0, [r0, #0] // 0x0000008a: ldr r0, [r0, #0] // 0x0000008c: ldr r0, [r0, #0] -// 0x0000008e: ldr r0, [r0, #0] -// 0x00000090: .cfi_remember_state -// 0x00000090: add sp, sp, #44 -// 0x00000092: .cfi_def_cfa_offset: 20 -// 0x00000092: vpop.f32 {s16-s17} -// 0x00000096: .cfi_def_cfa_offset: 12 -// 0x00000096: .cfi_restore_extended: r80 -// 0x00000096: .cfi_restore_extended: r81 -// 0x00000096: pop {r5, r6, pc} -// 0x00000098: .cfi_restore_state -// 0x00000098: .cfi_def_cfa_offset: 64 +// 0x0000008e: .cfi_remember_state +// 0x0000008e: add sp, sp, #44 +// 0x00000090: .cfi_def_cfa_offset: 20 +// 0x00000090: vpop.f32 {s16-s17} +// 0x00000094: .cfi_def_cfa_offset: 12 +// 0x00000094: .cfi_restore_extended: r80 +// 0x00000094: .cfi_restore_extended: r81 +// 0x00000094: pop {r5, r6, pc} +// 0x00000096: .cfi_restore_state +// 0x00000096: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kMips_adjust_head[] = { 0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF, 0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7, - 0x00, 0x00, 0xA4, 0xAF, 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27, + 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27, 0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C, 0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F, 0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27, }; static constexpr uint8_t expected_asm_kMips_adjust_tail[] = { 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F, - 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x40, 0x00, 0xBD, 0x27, - 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x09, 0x00, 0xE0, 0x03, + 0x40, 0x00, 0xBD, 0x27, }; static constexpr uint8_t expected_cfi_kMips_adjust[] = { 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, - 0x54, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A, - 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48, 0x0B, 0x0E, - 0x40, + 0x50, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A, + 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40, }; // 0x00000000: addiu r29, r29, -64 // 0x00000004: .cfi_def_cfa_offset: 64 @@ -367,42 +360,40 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = { // 0x00000010: .cfi_offset: r16 at cfa-12 // 0x00000010: sdc1 f22, +40(r29) // 0x00000014: sdc1 f20, +32(r29) -// 0x00000018: sw r4, +0(r29) -// 0x0000001c: bne r0, r4, 0x00000040 ; +36 -// 0x00000020: addiu r29, r29, -4 -// 0x00000024: .cfi_def_cfa_offset: 68 -// 0x00000024: sw r31, +0(r29) -// 0x00000028: bltzal r0, 0x0000002c ; +4 -// 0x0000002c: lui r1, 0x20000 -// 0x00000030: ori r1, r1, 24 -// 0x00000034: addu r1, r1, r31 -// 0x00000038: lw r31, +0(r29) -// 0x0000003c: jr r1 -// 0x00000040: addiu r29, r29, 4 -// 0x00000044: .cfi_def_cfa_offset: 64 -// 0x00000044: nop +// 0x00000018: bne r0, r4, 0x00000040 ; +36 +// 0x0000001c: addiu r29, r29, -4 +// 0x00000020: .cfi_def_cfa_offset: 68 +// 0x00000020: sw r31, +0(r29) +// 0x00000024: bltzal r0, 0x0000002c ; +4 +// 0x00000028: lui r1, 0x20000 +// 0x0000002c: ori r1, r1, 24 +// 0x00000030: addu r1, r1, r31 +// 0x00000034: lw r31, +0(r29) +// 0x00000038: jr r1 +// 0x0000003c: addiu r29, r29, 4 +// 0x00000040: .cfi_def_cfa_offset: 64 +// 0x00000040: nop // ... -// 0x00020044: nop -// 0x00020048: .cfi_remember_state -// 0x00020048: lw r31, +60(r29) -// 0x0002004c: .cfi_restore: r31 -// 0x0002004c: lw r17, +56(r29) -// 0x00020050: .cfi_restore: r17 -// 0x00020050: lw r16, +52(r29) -// 0x00020054: .cfi_restore: r16 -// 0x00020054: ldc1 f22, +40(r29) -// 0x00020058: ldc1 f20, +32(r29) +// 0x00020040: nop +// 0x00020044: .cfi_remember_state +// 0x00020044: lw r31, +60(r29) +// 0x00020048: .cfi_restore: r31 +// 0x00020048: lw r17, +56(r29) +// 0x0002004c: .cfi_restore: r17 +// 0x0002004c: lw r16, +52(r29) +// 0x00020050: .cfi_restore: r16 +// 0x00020050: ldc1 f22, +40(r29) +// 0x00020054: ldc1 f20, +32(r29) +// 0x00020058: jr r31 // 0x0002005c: addiu r29, r29, 64 // 0x00020060: .cfi_def_cfa_offset: 0 -// 0x00020060: jr r31 -// 0x00020064: nop -// 0x00020068: .cfi_restore_state -// 0x00020068: .cfi_def_cfa_offset: 64 +// 0x00020060: .cfi_restore_state +// 0x00020060: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kMips64_adjust_head[] = { 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, - 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60, + 0xE8, 0xFF, 0xBD, 0x67, 0x02, 0x00, 0xA6, 0x60, 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8, }; static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = { @@ -412,7 +403,7 @@ static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = { }; static constexpr uint8_t expected_cfi_kMips64_adjust[] = { 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, - 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x14, 0x00, + 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x10, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, }; @@ -430,29 +421,28 @@ static constexpr uint8_t expected_cfi_kMips64_adjust[] = { // 0x00000018: .cfi_offset: r56 at cfa-40 // 0x00000018: daddiu r29, r29, -24 // 0x0000001c: .cfi_def_cfa_offset: 64 -// 0x0000001c: sd r4, +0(r29) -// 0x00000020: bnec r5, r6, 0x0000002c ; +12 -// 0x00000024: auipc r1, 2 -// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080 -// 0x0000002c: nop +// 0x0000001c: bnec r5, r6, 0x0000002c ; +12 +// 0x00000020: auipc r1, 2 +// 0x00000024: jic r1, 12 ; b 0x00020030 ; +131080 +// 0x00000028: nop // ... -// 0x0002002c: nop -// 0x00020030: .cfi_remember_state -// 0x00020030: daddiu r29, r29, 24 -// 0x00020034: .cfi_def_cfa_offset: 40 -// 0x00020034: ldc1 f24, +0(r29) -// 0x00020038: .cfi_restore: r56 -// 0x00020038: ldc1 f25, +8(r29) -// 0x0002003c: .cfi_restore: r57 -// 0x0002003c: ld r16, +16(r29) -// 0x00020040: .cfi_restore: r16 -// 0x00020040: ld r17, +24(r29) -// 0x00020044: .cfi_restore: r17 -// 0x00020044: ld r31, +32(r29) -// 0x00020048: .cfi_restore: r31 -// 0x00020048: daddiu r29, r29, 40 -// 0x0002004c: .cfi_def_cfa_offset: 0 -// 0x0002004c: jr r31 -// 0x00020050: nop -// 0x00020054: .cfi_restore_state -// 0x00020054: .cfi_def_cfa_offset: 64 +// 0x00020028: nop +// 0x0002002c: .cfi_remember_state +// 0x0002002c: daddiu r29, r29, 24 +// 0x00020030: .cfi_def_cfa_offset: 40 +// 0x00020030: ldc1 f24, +0(r29) +// 0x00020034: .cfi_restore: r56 +// 0x00020034: ldc1 f25, +8(r29) +// 0x00020038: .cfi_restore: r57 +// 0x00020038: ld r16, +16(r29) +// 0x0002003c: .cfi_restore: r16 +// 0x0002003c: ld r17, +24(r29) +// 0x00020040: .cfi_restore: r17 +// 0x00020040: ld r31, +32(r29) +// 0x00020044: .cfi_restore: r31 +// 0x00020044: daddiu r29, r29, 40 +// 0x00020047: .cfi_def_cfa_offset: 0 +// 0x00020048: jr r31 +// 0x0002004c: nop +// 0x00020050: .cfi_restore_state +// 0x00020050: .cfi_def_cfa_offset: 64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index d5b0d77fe5..8c769270b1 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -18,6 +18,8 @@ #include <fstream> #include <memory> +#include <sstream> + #include <stdint.h> #ifdef ART_ENABLE_CODEGEN_arm @@ -46,6 +48,7 @@ #include "base/arena_containers.h" #include "base/dumpable.h" #include "base/macros.h" +#include "base/mutex.h" #include "base/timing_logger.h" #include "bounds_check_elimination.h" #include "builder.h" @@ -56,7 +59,6 @@ #include "dead_code_elimination.h" #include "debug/elf_debug_writer.h" #include "debug/method_debug_info.h" -#include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex/verification_results.h" #include "dex/verified_method.h" #include "driver/compiler_driver-inl.h" @@ -77,6 +79,7 @@ #include "jni/quick/jni_compiler.h" #include "licm.h" #include "load_store_elimination.h" +#include "loop_optimization.h" #include "nodes.h" #include "oat_quick_method_header.h" #include "prepare_for_register_allocation.h" @@ -95,6 +98,8 @@ namespace art { static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB; +static constexpr const char* kPassNameSeparator = "$"; + /** * Used by the code generator, to allocate the code in a vector. */ @@ -133,14 +138,18 @@ class PassObserver : public ValueObject { PassObserver(HGraph* graph, CodeGenerator* codegen, std::ostream* visualizer_output, - CompilerDriver* compiler_driver) + CompilerDriver* compiler_driver, + Mutex& dump_mutex) : graph_(graph), cached_method_name_(), timing_logger_enabled_(compiler_driver->GetDumpPasses()), timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), disasm_info_(graph->GetArena()), + visualizer_oss_(), + visualizer_output_(visualizer_output), visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()), - visualizer_(visualizer_output, graph, *codegen), + visualizer_(&visualizer_oss_, graph, *codegen), + visualizer_dump_mutex_(dump_mutex), graph_in_bad_state_(false) { if (timing_logger_enabled_ || visualizer_enabled_) { if (!IsVerboseMethod(compiler_driver, GetMethodName())) { @@ -158,6 +167,10 @@ class PassObserver : public ValueObject { LOG(INFO) << "TIMINGS " << GetMethodName(); LOG(INFO) << Dumpable<TimingLogger>(timing_logger_); } + if (visualizer_enabled_) { + MutexLock mu(Thread::Current(), visualizer_dump_mutex_); + *visualizer_output_ << visualizer_oss_.str(); + } } void DumpDisassembly() const { @@ -171,13 +184,14 @@ class PassObserver : public ValueObject { const char* GetMethodName() { // PrettyMethod() is expensive, so we delay calling it until we actually have to. if (cached_method_name_.empty()) { - cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile()); + cached_method_name_ = graph_->GetDexFile().PrettyMethod(graph_->GetMethodIdx()); } return cached_method_name_.c_str(); } private: void StartPass(const char* pass_name) { + VLOG(compiler) << "Starting pass: " << pass_name; // Dump graph first, then start timer. if (visualizer_enabled_) { visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_); @@ -234,8 +248,11 @@ class PassObserver : public ValueObject { DisassemblyInformation disasm_info_; + std::ostringstream visualizer_oss_; + std::ostream* visualizer_output_; bool visualizer_enabled_; HGraphVisualizer visualizer_; + Mutex& visualizer_dump_mutex_; // Flag to be set by the compiler if the pass failed and the graph is not // expected to validate. @@ -266,7 +283,7 @@ class PassScope : public ValueObject { class OptimizingCompiler FINAL : public Compiler { public: explicit OptimizingCompiler(CompilerDriver* driver); - ~OptimizingCompiler(); + ~OptimizingCompiler() OVERRIDE; bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const OVERRIDE; @@ -281,12 +298,17 @@ class OptimizingCompiler FINAL : public Compiler { CompiledMethod* JniCompile(uint32_t access_flags, uint32_t method_idx, - const DexFile& dex_file) const OVERRIDE { - return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file); + const DexFile& dex_file, + JniOptimizationFlags optimization_flags) const OVERRIDE { + return ArtQuickJniCompileMethod(GetCompilerDriver(), + access_flags, + method_idx, + dex_file, + optimization_flags); } uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize( InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); } @@ -303,19 +325,19 @@ class OptimizingCompiler FINAL : public Compiler { bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr) OVERRIDE - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); - protected: - virtual void RunOptimizations(HGraph* graph, - CodeGenerator* codegen, - CompilerDriver* driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer, - StackHandleScopeCollection* handles) const; + private: + void RunOptimizations(HGraph* graph, + CodeGenerator* codegen, + CompilerDriver* driver, + const DexCompilationUnit& dex_compilation_unit, + PassObserver* pass_observer, + VariableSizedHandleScope* handles) const; - virtual void RunOptimizations(HOptimization* optimizations[], - size_t length, - PassObserver* pass_observer) const; + void RunOptimizations(HOptimization* optimizations[], + size_t length, + PassObserver* pass_observer) const; private: // Create a 'CompiledMethod' for an optimized graph. @@ -350,7 +372,7 @@ class OptimizingCompiler FINAL : public Compiler { CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, - StackHandleScopeCollection* handles) const; + VariableSizedHandleScope* handles) const; void RunArchOptimizations(InstructionSet instruction_set, HGraph* graph, @@ -361,13 +383,16 @@ class OptimizingCompiler FINAL : public Compiler { std::unique_ptr<std::ostream> visualizer_output_; + mutable Mutex dump_mutex_; // To synchronize visualizer writing. + DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler); }; static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) - : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {} + : Compiler(driver, kMaximumCompilationTimeBeforeWarning), + dump_mutex_("Visualizer dump lock") {} void OptimizingCompiler::Init() { // Enable C1visualizer output. Must be done in Init() because the compiler @@ -375,9 +400,6 @@ void OptimizingCompiler::Init() { CompilerDriver* driver = GetCompilerDriver(); const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName(); if (!cfg_file_name.empty()) { - CHECK_EQ(driver->GetThreadCount(), 1U) - << "Graph visualizer requires the compiler to run single-threaded. " - << "Invoke the compiler with '-j1'."; std::ios_base::openmode cfg_file_mode = driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out; visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode)); @@ -420,6 +442,134 @@ static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { || instruction_set == kX86_64; } +// Strip pass name suffix to get optimization name. +static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) { + size_t pos = pass_name.find(kPassNameSeparator); + return pos == std::string::npos ? pass_name : pass_name.substr(0, pos); +} + +static HOptimization* BuildOptimization( + const std::string& pass_name, + ArenaAllocator* arena, + HGraph* graph, + OptimizingCompilerStats* stats, + CodeGenerator* codegen, + CompilerDriver* driver, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles, + SideEffectsAnalysis* most_recent_side_effects, + HInductionVarAnalysis* most_recent_induction) { + std::string opt_name = ConvertPassNameToOptimizationName(pass_name); + if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) { + CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); + return new (arena) BoundsCheckElimination(graph, + *most_recent_side_effects, + most_recent_induction); + } else if (opt_name == GVNOptimization::kGlobalValueNumberingPassName) { + CHECK(most_recent_side_effects != nullptr); + return new (arena) GVNOptimization(graph, *most_recent_side_effects, pass_name.c_str()); + } else if (opt_name == HConstantFolding::kConstantFoldingPassName) { + return new (arena) HConstantFolding(graph, pass_name.c_str()); + } else if (opt_name == HDeadCodeElimination::kDeadCodeEliminationPassName) { + return new (arena) HDeadCodeElimination(graph, stats, pass_name.c_str()); + } else if (opt_name == HInliner::kInlinerPassName) { + size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_; + return new (arena) HInliner(graph, // outer_graph + graph, // outermost_graph + codegen, + dex_compilation_unit, // outer_compilation_unit + dex_compilation_unit, // outermost_compilation_unit + driver, + handles, + stats, + number_of_dex_registers, + /* depth */ 0); + } else if (opt_name == HSharpening::kSharpeningPassName) { + return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver); + } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) { + return new (arena) HSelectGenerator(graph, stats); + } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { + return new (arena) HInductionVarAnalysis(graph); + } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) { + return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str()); + } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) { + return new (arena) IntrinsicsRecognizer(graph, stats); + } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) { + CHECK(most_recent_side_effects != nullptr); + return new (arena) LICM(graph, *most_recent_side_effects, stats); + } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) { + CHECK(most_recent_side_effects != nullptr); + return new (arena) LoadStoreElimination(graph, *most_recent_side_effects); + } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { + return new (arena) SideEffectsAnalysis(graph); + } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { + return new (arena) HLoopOptimization(graph, most_recent_induction); +#ifdef ART_ENABLE_CODEGEN_arm + } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) { + return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); + } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) { + return new (arena) arm::InstructionSimplifierArm(graph, stats); +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 + } else if (opt_name == arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName) { + return new (arena) arm64::InstructionSimplifierArm64(graph, stats); +#endif +#ifdef ART_ENABLE_CODEGEN_mips + } else if (opt_name == mips::DexCacheArrayFixups::kDexCacheArrayFixupsMipsPassName) { + return new (arena) mips::DexCacheArrayFixups(graph, codegen, stats); + } else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) { + return new (arena) mips::PcRelativeFixups(graph, codegen, stats); +#endif +#ifdef ART_ENABLE_CODEGEN_x86 + } else if (opt_name == x86::PcRelativeFixups::kPcRelativeFixupsX86PassName) { + return new (arena) x86::PcRelativeFixups(graph, codegen, stats); + } else if (opt_name == x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName) { + return new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats); +#endif + } + return nullptr; +} + +static ArenaVector<HOptimization*> BuildOptimizations( + const std::vector<std::string>& pass_names, + ArenaAllocator* arena, + HGraph* graph, + OptimizingCompilerStats* stats, + CodeGenerator* codegen, + CompilerDriver* driver, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles) { + // Few HOptimizations constructors require SideEffectsAnalysis or HInductionVarAnalysis + // instances. This method assumes that each of them expects the nearest instance preceeding it + // in the pass name list. + SideEffectsAnalysis* most_recent_side_effects = nullptr; + HInductionVarAnalysis* most_recent_induction = nullptr; + ArenaVector<HOptimization*> ret(arena->Adapter()); + for (const std::string& pass_name : pass_names) { + HOptimization* opt = BuildOptimization( + pass_name, + arena, + graph, + stats, + codegen, + driver, + dex_compilation_unit, + handles, + most_recent_side_effects, + most_recent_induction); + CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\""; + ret.push_back(opt); + + std::string opt_name = ConvertPassNameToOptimizationName(pass_name); + if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { + most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt); + } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { + most_recent_induction = down_cast<HInductionVarAnalysis*>(opt); + } + } + return ret; +} + void OptimizingCompiler::RunOptimizations(HOptimization* optimizations[], size_t length, PassObserver* pass_observer) const { @@ -434,7 +584,7 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph, CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, - StackHandleScopeCollection* handles) const { + VariableSizedHandleScope* handles) const { OptimizingCompilerStats* stats = compilation_stats_.get(); const CompilerOptions& compiler_options = driver->GetCompilerOptions(); bool should_inline = (compiler_options.GetInlineDepthLimit() > 0) @@ -444,11 +594,11 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph, } size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_; HInliner* inliner = new (graph->GetArena()) HInliner( - graph, - graph, + graph, // outer_graph + graph, // outermost_graph codegen, - dex_compilation_unit, - dex_compilation_unit, + dex_compilation_unit, // outer_compilation_unit + dex_compilation_unit, // outermost_compilation_unit driver, handles, stats, @@ -463,17 +613,24 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, HGraph* graph, CodeGenerator* codegen, PassObserver* pass_observer) const { + UNUSED(codegen); // To avoid compilation error when compiling for svelte OptimizingCompilerStats* stats = compilation_stats_.get(); ArenaAllocator* arena = graph->GetArena(); +#ifdef ART_USE_VIXL_ARM_BACKEND + UNUSED(arena); + UNUSED(pass_observer); + UNUSED(stats); +#endif switch (instruction_set) { -#ifdef ART_ENABLE_CODEGEN_arm +#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND) case kThumb2: case kArm: { - arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats); + arm::DexCacheArrayFixups* fixups = + new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); arm::InstructionSimplifierArm* simplifier = new (arena) arm::InstructionSimplifierArm(graph, stats); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch"); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); HOptimization* arm_optimizations[] = { simplifier, side_effects, @@ -489,7 +646,7 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, arm64::InstructionSimplifierArm64* simplifier = new (arena) arm64::InstructionSimplifierArm64(graph, stats); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch"); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); HOptimization* arm64_optimizations[] = { simplifier, side_effects, @@ -518,7 +675,7 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, x86::PcRelativeFixups* pc_relative_fixups = new (arena) x86::PcRelativeFixups(graph, codegen, stats); x86::X86MemoryOperandGeneration* memory_gen = - new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen); + new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats); HOptimization* x86_optimizations[] = { pc_relative_fixups, memory_gen @@ -530,7 +687,7 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, #ifdef ART_ENABLE_CODEGEN_x86_64 case kX86_64: { x86::X86MemoryOperandGeneration* memory_gen = - new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen); + new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats); HOptimization* x86_64_optimizations[] = { memory_gen }; @@ -546,7 +703,8 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, NO_INLINE // Avoid increasing caller's frame size by large stack-allocated objects. static void AllocateRegisters(HGraph* graph, CodeGenerator* codegen, - PassObserver* pass_observer) { + PassObserver* pass_observer, + RegisterAllocator::Strategy strategy) { { PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName, pass_observer); @@ -559,7 +717,7 @@ static void AllocateRegisters(HGraph* graph, } { PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer); - RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters(); + RegisterAllocator::Create(graph->GetArena(), codegen, liveness, strategy)->AllocateRegisters(); } } @@ -568,30 +726,48 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, - StackHandleScopeCollection* handles) const { + VariableSizedHandleScope* handles) const { OptimizingCompilerStats* stats = compilation_stats_.get(); ArenaAllocator* arena = graph->GetArena(); + if (driver->GetCompilerOptions().GetPassesToRun() != nullptr) { + ArenaVector<HOptimization*> optimizations = BuildOptimizations( + *driver->GetCompilerOptions().GetPassesToRun(), + arena, + graph, + stats, + codegen, + driver, + dex_compilation_unit, + handles); + RunOptimizations(&optimizations[0], optimizations.size(), pass_observer); + return; + } + HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( - graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); + graph, stats, "dead_code_elimination$initial"); HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination( - graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName); + graph, stats, "dead_code_elimination$final"); HConstantFolding* fold1 = new (arena) HConstantFolding(graph); InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats); - HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); - HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce"); + HConstantFolding* fold2 = new (arena) HConstantFolding( + graph, "constant_folding$after_inlining"); + HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce"); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); LICM* licm = new (arena) LICM(graph, *side_effects, stats); LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects); HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction); + HLoopOptimization* loop = new (arena) HLoopOptimization(graph, induction); HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( - graph, stats, "instruction_simplifier_after_bce"); + graph, stats, "instruction_simplifier$after_inlining"); InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( - graph, stats, "instruction_simplifier_before_codegen"); - IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats); + graph, stats, "instruction_simplifier$after_bce"); + InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier$before_codegen"); + IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); HOptimization* optimizations1[] = { intrinsics, @@ -609,24 +785,25 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, // redundant suspend checks to recognize empty blocks. select_generator, fold2, // TODO: if we don't inline we can also skip fold2. + simplify2, side_effects, gvn, licm, induction, bce, + loop, fold3, // evaluates code generated by dynamic bce - simplify2, + simplify3, lse, dce2, // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. - simplify3, + simplify4, }; RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, pass_observer); - AllocateRegisters(graph, codegen, pass_observer); } static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) { @@ -688,9 +865,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, // Always use the Thumb-2 assembler: some runtime functionality // (like implicit stack overflow checks) assume Thumb-2. - if (instruction_set == kArm) { - instruction_set = kThumb2; - } + DCHECK_NE(instruction_set, kArm); // Do not attempt to compile on architectures we do not support. if (!IsInstructionSetSupported(instruction_set)) { @@ -719,9 +894,10 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, return nullptr; } + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); DexCompilationUnit dex_compilation_unit( class_loader, - Runtime::Current()->GetClassLinker(), + class_linker, dex_file, code_item, class_def_idx, @@ -750,7 +926,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, ScopedObjectAccess soa(Thread::Current()); StackHandleScope<1> hs(soa.Self()); Handle<mirror::ClassLoader> loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(class_loader))); + soa.Decode<mirror::ClassLoader>(class_loader))); method = compiler_driver->ResolveMethod( soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type); } @@ -760,7 +936,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, if (method != nullptr) { graph->SetArtMethod(method); ScopedObjectAccess soa(Thread::Current()); - interpreter_metadata = method->GetQuickenedInfo(); + interpreter_metadata = method->GetQuickenedInfo(class_linker->GetImagePointerSize()); uint16_t type_index = method->GetDeclaringClass()->GetDexTypeIndex(); // Update the dex cache if the type is not in it yet. Note that under AOT, @@ -789,13 +965,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, PassObserver pass_observer(graph, codegen.get(), visualizer_output_.get(), - compiler_driver); + compiler_driver, + dump_mutex_); VLOG(compiler) << "Building " << pass_observer.GetMethodName(); { ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); + VariableSizedHandleScope handles(soa.Self()); // Do not hold `mutator_lock_` between optimizations. ScopedThreadSuspension sts(soa.Self(), kNative); @@ -841,6 +1018,10 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, &pass_observer, &handles); + RegisterAllocator::Strategy regalloc_strategy = + compiler_options.GetRegisterAllocationStrategy(); + AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy); + codegen->Compile(code_allocator); pass_observer.DumpDisassembly(); } @@ -886,7 +1067,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, if (kArenaAllocatorCountAllocations) { if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) { MemStats mem_stats(arena.GetMemStats()); - LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats); + LOG(INFO) << dex_file.PrettyMethod(method_idx) << " " << Dumpable<MemStats>(mem_stats); } } } @@ -908,7 +1089,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, // instruction set is supported -- and has support for read // barriers, if they are enabled). This makes sure we're not // regressing. - std::string method_name = PrettyMethod(method_idx, dex_file); + std::string method_name = dex_file.PrettyMethod(method_idx); bool shouldCompile = method_name.find("$opt$") != std::string::npos; DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name; } @@ -973,7 +1154,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, if (kArenaAllocatorCountAllocations) { if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) { MemStats mem_stats(arena.GetMemStats()); - LOG(INFO) << PrettyMethod(method_idx, *dex_file) << " " << Dumpable<MemStats>(mem_stats); + LOG(INFO) << dex_file->PrettyMethod(method_idx) << " " << Dumpable<MemStats>(mem_stats); } } } diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index dd5cb1c9bb..58d90176cd 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -22,7 +22,7 @@ #include "common_compiler_test.h" #include "dex_file.h" #include "dex_instruction.h" -#include "handle_scope-inl.h" +#include "handle_scope.h" #include "scoped_thread_state_change.h" #include "ssa_builder.h" #include "ssa_liveness_analysis.h" @@ -90,7 +90,7 @@ inline HGraph* CreateCFG(ArenaAllocator* allocator, { ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); + VariableSizedHandleScope handles(soa.Self()); HGraphBuilder builder(graph, *item, &handles, return_type); bool graph_built = (builder.BuildGraph() == kAnalysisSuccess); return graph_built ? graph : nullptr; diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index c6acc45581..82feb95a2f 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -83,6 +83,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); load_string->AddSpecialInput(base_); @@ -92,6 +93,25 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } + void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + if (switch_insn->GetNumEntries() <= + InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) { + return; + } + // We need to replace the HPackedSwitch with a HMipsPackedSwitch in order to + // address the constant area. + InitializePCRelativeBasePointer(); + HGraph* graph = GetGraph(); + HBasicBlock* block = switch_insn->GetBlock(); + HMipsPackedSwitch* mips_switch = new (graph->GetArena()) HMipsPackedSwitch( + switch_insn->GetStartValue(), + switch_insn->GetNumEntries(), + switch_insn->InputAt(0), + base_, + switch_insn->GetDexPc()); + block->ReplaceAndRemoveInstructionWith(switch_insn, mips_switch); + } + void HandleInvoke(HInvoke* invoke) { // If this is an invoke-static/-direct with PC-relative dex cache array // addressing, we need the PC-relative address base. @@ -115,7 +135,8 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { return; } - if (has_extra_input && !WillHaveCallFreeIntrinsicsCodeGen(invoke)) { + if (has_extra_input && + !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) { InitializePCRelativeBasePointer(); // Add the extra parameter base_. invoke_static_or_direct->AddSpecialInput(base_); @@ -123,22 +144,6 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) { - if (invoke->GetIntrinsic() != Intrinsics::kNone) { - // This invoke may have intrinsic code generation defined. However, we must - // now also determine if this code generation is truly there and call-free - // (not unimplemented, no bail on instruction features, or call on slow path). - // This is done by actually calling the locations builder on the instruction - // and clearing out the locations once result is known. We assume this - // call only has creating locations as side effects! - IntrinsicLocationsBuilderMIPS builder(codegen_); - bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall(); - invoke->SetLocations(nullptr); - return success; - } - return false; - } - CodeGeneratorMIPS* codegen_; // The generated HMipsComputeBaseMethodAddress in the entry block needed as an diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h index 1e8b071bb3..5a7397bf9d 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.h +++ b/compiler/optimizing/pc_relative_fixups_mips.h @@ -32,6 +32,8 @@ class PcRelativeFixups : public HOptimization { : HOptimization(graph, "pc_relative_fixups_mips", stats), codegen_(codegen) {} + static constexpr const char* kPcRelativeFixupsMipsPassName = "pc_relative_fixups_mips"; + void Run() OVERRIDE; private: diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 921f3dfff6..b1fdb1792d 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -92,7 +92,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadString(HLoadString* load_string) OVERRIDE { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kDexCachePcRelative) { + load_kind == HLoadString::LoadKind::kBssEntry) { InitializePCRelativeBasePointer(); load_string->AddSpecialInput(base_); } @@ -203,7 +203,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { bool base_added = false; if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache() && - !WillHaveCallFreeIntrinsicsCodeGen(invoke)) { + !IsCallFreeIntrinsic<IntrinsicLocationsBuilderX86>(invoke, codegen_)) { InitializePCRelativeBasePointer(); // Add the extra parameter base_. invoke_static_or_direct->AddSpecialInput(base_); @@ -227,6 +227,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { case Intrinsics::kMathMaxFloatFloat: case Intrinsics::kMathMinDoubleDouble: case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kMathRoundFloat: if (!base_added) { DCHECK(invoke_static_or_direct != nullptr); DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); @@ -239,22 +240,6 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) { - if (invoke->GetIntrinsic() != Intrinsics::kNone) { - // This invoke may have intrinsic code generation defined. However, we must - // now also determine if this code generation is truly there and call-free - // (not unimplemented, no bail on instruction features, or call on slow path). - // This is done by actually calling the locations builder on the instruction - // and clearing out the locations once result is known. We assume this - // call only has creating locations as side effects! - IntrinsicLocationsBuilderX86 builder(codegen_); - bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall(); - invoke->SetLocations(nullptr); - return success; - } - return false; - } - CodeGeneratorX86* codegen_; // The generated HX86ComputeBaseMethodAddress in the entry block needed as an diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h index 03de2fcece..72fa71ea94 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.h +++ b/compiler/optimizing/pc_relative_fixups_x86.h @@ -29,9 +29,11 @@ namespace x86 { class PcRelativeFixups : public HOptimization { public: PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) - : HOptimization(graph, "pc_relative_fixups_x86", stats), + : HOptimization(graph, kPcRelativeFixupsX86PassName, stats), codegen_(codegen) {} + static constexpr const char* kPcRelativeFixupsX86PassName = "pc_relative_fixups_x86"; + void Run() OVERRIDE; private: diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 8fb539661f..0db60882db 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -20,8 +20,7 @@ namespace art { void PrepareForRegisterAllocation::Run() { // Order does not matter. - for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { // No need to visit the phis. for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { @@ -44,7 +43,7 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) { // Add a fake environment for String.charAt() inline info as we want // the exception to appear as being thrown from there. const DexFile& dex_file = check->GetEnvironment()->GetDexFile(); - DCHECK_STREQ(PrettyMethod(check->GetStringCharAtMethodIndex(), dex_file).c_str(), + DCHECK_STREQ(dex_file.PrettyMethod(check->GetStringCharAtMethodIndex()).c_str(), "char java.lang.String.charAt(int)"); ArenaAllocator* arena = GetGraph()->GetArena(); HEnvironment* environment = new (arena) HEnvironment(arena, @@ -129,6 +128,7 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) { // Pass the initialization duty to the `HLoadClass` instruction, // and remove the instruction from the graph. + DCHECK(load_class->HasEnvironment()); load_class->SetMustGenerateClinitCheck(true); check->GetBlock()->RemoveInstruction(check); } @@ -136,7 +136,7 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) { HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass(); - bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse(); + const bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse(); // Change the entrypoint to kQuickAllocObject if either: // - the class is finalizable (only kQuickAllocObject handles finalizable classes), // - the class needs access checks (we do not know if it's finalizable), @@ -144,19 +144,25 @@ void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) { if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) { instruction->SetEntrypoint(kQuickAllocObject); instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0); - // The allocation entry point that deals with access checks does not work with inlined - // methods, so we need to check whether this allocation comes from an inlined method. - // We also need to make the same check as for moving clinit check, whether the HLoadClass - // has the clinit check responsibility or not (HLoadClass can throw anyway). - if (has_only_one_use && - !instruction->GetEnvironment()->IsFromInlinedInvoke() && - CanMoveClinitCheck(load_class, instruction)) { - // We can remove the load class from the graph. If it needed access checks, we delegate - // the access check to the allocation. - if (load_class->NeedsAccessCheck()) { - instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck); + if (has_only_one_use) { + // We've just removed the only use of the HLoadClass. Since we don't run DCE after this pass, + // do it manually if possible. + if (!load_class->CanThrow()) { + // If the load class can not throw, it has no side effects and can be removed if there is + // only one use. + load_class->GetBlock()->RemoveInstruction(load_class); + } else if (!instruction->GetEnvironment()->IsFromInlinedInvoke() && + CanMoveClinitCheck(load_class, instruction)) { + // The allocation entry point that deals with access checks does not work with inlined + // methods, so we need to check whether this allocation comes from an inlined method. + // We also need to make the same check as for moving clinit check, whether the HLoadClass + // has the clinit check responsibility or not (HLoadClass can throw anyway). + // If it needed access checks, we delegate the access check to the allocation. + if (load_class->NeedsAccessCheck()) { + instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck); + } + load_class->GetBlock()->RemoveInstruction(load_class); } - load_class->GetBlock()->RemoveInstruction(load_class); } } } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index e96ab1918c..d588deaace 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -20,14 +20,14 @@ #include "class_linker-inl.h" #include "mirror/class-inl.h" #include "mirror/dex_cache.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" namespace art { static inline mirror::DexCache* FindDexCacheWithHint(Thread* self, const DexFile& dex_file, Handle<mirror::DexCache> hint_dex_cache) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { if (LIKELY(hint_dex_cache->GetDexFile() == &dex_file)) { return hint_dex_cache.Get(); } else { @@ -35,7 +35,7 @@ static inline mirror::DexCache* FindDexCacheWithHint(Thread* self, } } -static inline ReferenceTypeInfo::TypeHandle GetRootHandle(StackHandleScopeCollection* handles, +static inline ReferenceTypeInfo::TypeHandle GetRootHandle(VariableSizedHandleScope* handles, ClassLinker::ClassRoot class_root, ReferenceTypeInfo::TypeHandle* cache) { if (!ReferenceTypeInfo::IsValidHandle(*cache)) { @@ -84,8 +84,8 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { void VisitNewArray(HNewArray* instr) OVERRIDE; void VisitParameterValue(HParameterValue* instr) OVERRIDE; void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); - void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact) - SHARED_REQUIRES(Locks::mutator_lock_); + void SetClassAsTypeInfo(HInstruction* instr, ObjPtr<mirror::Class> klass, bool is_exact) + REQUIRES_SHARED(Locks::mutator_lock_); void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) OVERRIDE; @@ -109,7 +109,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, Handle<mirror::DexCache> hint_dex_cache, - StackHandleScopeCollection* handles, + VariableSizedHandleScope* handles, bool is_first_run, const char* name) : HOptimization(graph, name), @@ -123,8 +123,7 @@ void ReferenceTypePropagation::ValidateTypes() { // TODO: move this to the graph checker. if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) { HInstruction* instr = iti.Current(); if (instr->GetType() == Primitive::kPrimNot) { @@ -158,8 +157,8 @@ void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. // We take advantage of this order in `VisitBasicBlock`. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + VisitBasicBlock(block); } ProcessWorklist(); @@ -194,7 +193,7 @@ static bool ShouldCreateBoundType(HInstruction* position, ReferenceTypeInfo upper_bound, HInstruction* dominator_instr, HBasicBlock* dominator_block) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { // If the position where we should insert the bound type is not already a // a bound type then we need to create one. if (position == nullptr || !position->IsBoundType()) { @@ -427,7 +426,7 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { } void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, - mirror::Class* klass, + ObjPtr<mirror::Class> klass, bool is_exact) { if (instr->IsInvokeStaticOrDirect() && instr->AsInvokeStaticOrDirect()->IsStringInit()) { // Calls to String.<init> are replaced with a StringFactory. @@ -448,13 +447,13 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst mirror::Class* declaring_class = method->GetDeclaringClass(); DCHECK(declaring_class != nullptr); DCHECK(declaring_class->IsStringClass()) - << "Expected String class: " << PrettyDescriptor(declaring_class); + << "Expected String class: " << declaring_class->PrettyDescriptor(); DCHECK(method->IsConstructor()) - << "Expected String.<init>: " << PrettyMethod(method); + << "Expected String.<init>: " << method->PrettyMethod(); } instr->SetReferenceTypeInfo( ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true)); - } else if (IsAdmissible(klass)) { + } else if (IsAdmissible(klass.Ptr())) { ReferenceTypeInfo::TypeHandle handle = handle_cache_->NewHandle(klass); is_exact = is_exact || handle->CannotBeAssignedFromOtherTypes(); instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact)); @@ -487,7 +486,7 @@ static mirror::Class* GetClassFromDexCache(Thread* self, const DexFile& dex_file, uint16_t type_idx, Handle<mirror::DexCache> hint_dex_cache) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { mirror::DexCache* dex_cache = FindDexCacheWithHint(self, dex_file, hint_dex_cache); // Get type from dex cache assuming it was populated by the verifier. return dex_cache->GetResolvedType(type_idx); @@ -512,12 +511,13 @@ void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstructio } ScopedObjectAccess soa(Thread::Current()); - mirror::Class* klass = nullptr; + ObjPtr<mirror::Class> klass; // The field index is unknown only during tests. if (info.GetFieldIndex() != kUnknownFieldIndex) { ClassLinker* cl = Runtime::Current()->GetClassLinker(); - ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get()); + ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), + MakeObjPtr(info.GetDexCache().Get())); // TODO: There are certain cases where we can't resolve the field. // b/21914925 is open to keep track of a repro case for this issue. if (field != nullptr) { diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index edd83bf5de..4663471729 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -21,6 +21,7 @@ #include "driver/dex_compilation_unit.h" #include "handle_scope-inl.h" #include "nodes.h" +#include "obj_ptr.h" #include "optimization.h" #include "optimizing_compiler_stats.h" @@ -33,7 +34,7 @@ class ReferenceTypePropagation : public HOptimization { public: ReferenceTypePropagation(HGraph* graph, Handle<mirror::DexCache> hint_dex_cache, - StackHandleScopeCollection* handles, + VariableSizedHandleScope* handles, bool is_first_run, const char* name = kReferenceTypePropagationPassName); @@ -44,7 +45,7 @@ class ReferenceTypePropagation : public HOptimization { // Returns true if klass is admissible to the propagation: non-null and resolved. // For an array type, we also check if the component type is admissible. - static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) { + static bool IsAdmissible(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) { return klass != nullptr && klass->IsResolved() && (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType())); @@ -55,10 +56,15 @@ class ReferenceTypePropagation : public HOptimization { private: class HandleCache { public: - explicit HandleCache(StackHandleScopeCollection* handles) : handles_(handles) { } + explicit HandleCache(VariableSizedHandleScope* handles) : handles_(handles) { } template <typename T> - MutableHandle<T> NewHandle(T* object) SHARED_REQUIRES(Locks::mutator_lock_) { + MutableHandle<T> NewHandle(T* object) REQUIRES_SHARED(Locks::mutator_lock_) { + return handles_->NewHandle(object); + } + + template <typename T> + MutableHandle<T> NewHandle(ObjPtr<T> object) REQUIRES_SHARED(Locks::mutator_lock_) { return handles_->NewHandle(object); } @@ -68,7 +74,7 @@ class ReferenceTypePropagation : public HOptimization { ReferenceTypeInfo::TypeHandle GetThrowableClassHandle(); private: - StackHandleScopeCollection* handles_; + VariableSizedHandleScope* handles_; ReferenceTypeInfo::TypeHandle object_class_handle_; ReferenceTypeInfo::TypeHandle class_class_handle_; @@ -80,8 +86,8 @@ class ReferenceTypePropagation : public HOptimization { void VisitPhi(HPhi* phi); void VisitBasicBlock(HBasicBlock* block); - void UpdateBoundType(HBoundType* bound_type) SHARED_REQUIRES(Locks::mutator_lock_); - void UpdatePhi(HPhi* phi) SHARED_REQUIRES(Locks::mutator_lock_); + void UpdateBoundType(HBoundType* bound_type) REQUIRES_SHARED(Locks::mutator_lock_); + void UpdatePhi(HPhi* phi) REQUIRES_SHARED(Locks::mutator_lock_); void BoundTypeForIfNotNull(HBasicBlock* block); void BoundTypeForIfInstanceOf(HBasicBlock* block); void ProcessWorklist(); @@ -92,10 +98,10 @@ class ReferenceTypePropagation : public HOptimization { bool UpdateReferenceTypeInfo(HInstruction* instr); static void UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b) - SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); void ValidateTypes(); diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc index 7649b5093c..b061c871b0 100644 --- a/compiler/optimizing/reference_type_propagation_test.cc +++ b/compiler/optimizing/reference_type_propagation_test.cc @@ -35,7 +35,7 @@ class ReferenceTypePropagationTest : public CommonCompilerTest { ~ReferenceTypePropagationTest() { } - void SetupPropagation(StackHandleScopeCollection* handles) { + void SetupPropagation(VariableSizedHandleScope* handles) { graph_->InitializeInexactObjectRTI(handles); propagation_ = new (&allocator_) ReferenceTypePropagation(graph_, Handle<mirror::DexCache>(), @@ -46,7 +46,7 @@ class ReferenceTypePropagationTest : public CommonCompilerTest { // Relay method to merge type in reference type propagation. ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, - const ReferenceTypeInfo& b) SHARED_REQUIRES(Locks::mutator_lock_) { + const ReferenceTypeInfo& b) REQUIRES_SHARED(Locks::mutator_lock_) { return propagation_->MergeTypes(a, b); } @@ -56,12 +56,12 @@ class ReferenceTypePropagationTest : public CommonCompilerTest { } // Helper method to construct the Object type. - ReferenceTypeInfo ObjectType(bool is_exact = true) SHARED_REQUIRES(Locks::mutator_lock_) { + ReferenceTypeInfo ObjectType(bool is_exact = true) REQUIRES_SHARED(Locks::mutator_lock_) { return ReferenceTypeInfo::Create(propagation_->handle_cache_.GetObjectClassHandle(), is_exact); } // Helper method to construct the String type. - ReferenceTypeInfo StringType(bool is_exact = true) SHARED_REQUIRES(Locks::mutator_lock_) { + ReferenceTypeInfo StringType(bool is_exact = true) REQUIRES_SHARED(Locks::mutator_lock_) { return ReferenceTypeInfo::Create(propagation_->handle_cache_.GetStringClassHandle(), is_exact); } @@ -79,7 +79,7 @@ class ReferenceTypePropagationTest : public CommonCompilerTest { TEST_F(ReferenceTypePropagationTest, ProperSetup) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); + VariableSizedHandleScope handles(soa.Self()); SetupPropagation(&handles); EXPECT_TRUE(propagation_ != nullptr); @@ -88,7 +88,7 @@ TEST_F(ReferenceTypePropagationTest, ProperSetup) { TEST_F(ReferenceTypePropagationTest, MergeInvalidTypes) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); + VariableSizedHandleScope handles(soa.Self()); SetupPropagation(&handles); // Two invalid types. @@ -120,7 +120,7 @@ TEST_F(ReferenceTypePropagationTest, MergeInvalidTypes) { TEST_F(ReferenceTypePropagationTest, MergeValidTypes) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); + VariableSizedHandleScope handles(soa.Self()); SetupPropagation(&handles); // Same types. diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 34502869e4..caf66474eb 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -17,6 +17,7 @@ #include "register_allocation_resolver.h" #include "code_generator.h" +#include "linear_order.h" #include "ssa_liveness_analysis.h" namespace art { @@ -28,8 +29,7 @@ RegisterAllocationResolver::RegisterAllocationResolver(ArenaAllocator* allocator codegen_(codegen), liveness_(liveness) {} -void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs, - size_t max_safepoint_live_fp_regs, +void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoints, size_t reserved_out_slots, size_t int_spill_slots, size_t long_spill_slots, @@ -43,10 +43,13 @@ void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs, + double_spill_slots + catch_phi_spill_slots; + // Update safepoints and calculate the size of the spills. + UpdateSafepointLiveRegisters(); + size_t maximum_safepoint_spill_size = CalculateMaximumSafepointSpillSize(safepoints); + // Computes frame size and spill mask. codegen_->InitializeCodeGeneration(spill_slots, - max_safepoint_live_core_regs, - max_safepoint_live_fp_regs, + maximum_safepoint_spill_size, reserved_out_slots, // Includes slot(s) for the art method. codegen_->GetGraph()->GetLinearOrder()); @@ -135,13 +138,11 @@ void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs, // Connect siblings and resolve inputs. for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); - ConnectSiblings(instruction->GetLiveInterval(), - max_safepoint_live_core_regs + max_safepoint_live_fp_regs); + ConnectSiblings(instruction->GetLiveInterval()); } // Resolve non-linear control flow across branches. Order does not matter. - for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : codegen_->GetGraph()->GetLinearOrder()) { if (block->IsCatchBlock() || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { // Instructions live at the top of catch blocks or irreducible loop header @@ -171,15 +172,14 @@ void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs, } // Resolve phi inputs. Order does not matter. - for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); - if (current->IsCatchBlock()) { + for (HBasicBlock* block : codegen_->GetGraph()->GetLinearOrder()) { + if (block->IsCatchBlock()) { // Catch phi values are set at runtime by the exception delivery mechanism. } else { - for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HInstruction* phi = inst_it.Current(); - for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) { - HBasicBlock* predecessor = current->GetPredecessors()[i]; + for (size_t i = 0, e = block->GetPredecessors().size(); i < e; ++i) { + HBasicBlock* predecessor = block->GetPredecessors()[i]; DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u); HInstruction* input = phi->InputAt(i); Location source = input->GetLiveInterval()->GetLocationAt( @@ -222,8 +222,73 @@ void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs, } } -void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval, - size_t max_safepoint_live_regs) { +void RegisterAllocationResolver::UpdateSafepointLiveRegisters() { + for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + for (LiveInterval* current = instruction->GetLiveInterval(); + current != nullptr; + current = current->GetNextSibling()) { + if (!current->HasRegister()) { + continue; + } + Location source = current->ToLocation(); + for (SafepointPosition* safepoint_position = current->GetFirstSafepoint(); + safepoint_position != nullptr; + safepoint_position = safepoint_position->GetNext()) { + DCHECK(current->CoversSlow(safepoint_position->GetPosition())); + LocationSummary* locations = safepoint_position->GetLocations(); + switch (source.GetKind()) { + case Location::kRegister: + case Location::kFpuRegister: { + locations->AddLiveRegister(source); + break; + } + case Location::kRegisterPair: + case Location::kFpuRegisterPair: { + locations->AddLiveRegister(source.ToLow()); + locations->AddLiveRegister(source.ToHigh()); + break; + } + case Location::kStackSlot: // Fall-through + case Location::kDoubleStackSlot: // Fall-through + case Location::kConstant: { + // Nothing to do. + break; + } + default: { + LOG(FATAL) << "Unexpected location for object"; + } + } + } + } + } +} + +size_t RegisterAllocationResolver::CalculateMaximumSafepointSpillSize( + ArrayRef<HInstruction* const> safepoints) { + size_t core_register_spill_size = codegen_->GetWordSize(); + size_t fp_register_spill_size = codegen_->GetFloatingPointSpillSlotSize(); + size_t maximum_safepoint_spill_size = 0u; + for (HInstruction* instruction : safepoints) { + LocationSummary* locations = instruction->GetLocations(); + if (locations->OnlyCallsOnSlowPath()) { + size_t core_spills = + codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true); + size_t fp_spills = + codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false); + size_t spill_size = + core_register_spill_size * core_spills + fp_register_spill_size * fp_spills; + maximum_safepoint_spill_size = std::max(maximum_safepoint_spill_size, spill_size); + } else if (locations->CallsOnMainAndSlowPath()) { + // Nothing to spill on the slow path if the main path already clobbers caller-saves. + DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true)); + DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false)); + } + } + return maximum_safepoint_spill_size; +} + +void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { LiveInterval* current = interval; if (current->HasSpillSlot() && current->HasRegister() @@ -306,48 +371,16 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval, safepoint_position = safepoint_position->GetNext()) { DCHECK(current->CoversSlow(safepoint_position->GetPosition())); - LocationSummary* locations = safepoint_position->GetLocations(); - if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { + if (current->GetType() == Primitive::kPrimNot) { DCHECK(interval->GetDefinedBy()->IsActualObject()) << interval->GetDefinedBy()->DebugName() << "@" << safepoint_position->GetInstruction()->DebugName(); - locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); - } - - switch (source.GetKind()) { - case Location::kRegister: { - locations->AddLiveRegister(source); - if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) { - DCHECK_LE(locations->GetNumberOfLiveRegisters(), - max_safepoint_live_regs); - } - if (current->GetType() == Primitive::kPrimNot) { - DCHECK(interval->GetDefinedBy()->IsActualObject()) - << interval->GetDefinedBy()->DebugName() - << "@" << safepoint_position->GetInstruction()->DebugName(); - locations->SetRegisterBit(source.reg()); - } - break; - } - case Location::kFpuRegister: { - locations->AddLiveRegister(source); - break; - } - - case Location::kRegisterPair: - case Location::kFpuRegisterPair: { - locations->AddLiveRegister(source.ToLow()); - locations->AddLiveRegister(source.ToHigh()); - break; - } - case Location::kStackSlot: // Fall-through - case Location::kDoubleStackSlot: // Fall-through - case Location::kConstant: { - // Nothing to do. - break; + LocationSummary* locations = safepoint_position->GetLocations(); + if (current->GetParent()->HasSpillSlot()) { + locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); } - default: { - LOG(FATAL) << "Unexpected location for object"; + if (source.GetKind() == Location::kRegister) { + locations->SetRegisterBit(source.reg()); } } } diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h index 6ceb9bc955..d48b1a0bb9 100644 --- a/compiler/optimizing/register_allocation_resolver.h +++ b/compiler/optimizing/register_allocation_resolver.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_ #include "base/arena_containers.h" +#include "base/array_ref.h" #include "base/value_object.h" #include "primitive.h" @@ -43,8 +44,7 @@ class RegisterAllocationResolver : ValueObject { CodeGenerator* codegen, const SsaLivenessAnalysis& liveness); - void Resolve(size_t max_safepoint_live_core_regs, - size_t max_safepoint_live_fp_regs, + void Resolve(ArrayRef<HInstruction* const> safepoints, size_t reserved_out_slots, // Includes slot(s) for the art method. size_t int_spill_slots, size_t long_spill_slots, @@ -54,10 +54,14 @@ class RegisterAllocationResolver : ValueObject { const ArenaVector<LiveInterval*>& temp_intervals); private: + // Update live registers of safepoint location summary. + void UpdateSafepointLiveRegisters(); + + // Calculate the maximum size of the spill area for safepoints. + size_t CalculateMaximumSafepointSpillSize(ArrayRef<HInstruction* const> safepoints); + // Connect adjacent siblings within blocks, and resolve inputs along the way. - // Uses max_safepoint_live_regs to check that we did not underestimate the - // number of live registers at safepoints. - void ConnectSiblings(LiveInterval* interval, size_t max_safepoint_live_regs); + void ConnectSiblings(LiveInterval* interval); // Connect siblings between block entries and exits. void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 2367ce1aeb..5b768d5d67 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -21,6 +21,7 @@ #include "base/bit_vector-inl.h" #include "code_generator.h" +#include "register_allocator_graph_color.h" #include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" @@ -41,6 +42,8 @@ RegisterAllocator* RegisterAllocator::Create(ArenaAllocator* allocator, switch (strategy) { case kRegisterAllocatorLinearScan: return new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis); + case kRegisterAllocatorGraphColor: + return new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis); default: LOG(FATAL) << "Invalid register allocation strategy: " << strategy; UNREACHABLE(); @@ -163,6 +166,19 @@ bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& inte } else { codegen.DumpFloatingPointRegister(message, current->GetRegister()); } + for (LiveInterval* interval : intervals) { + if (interval->HasRegister() + && interval->GetRegister() == current->GetRegister() + && interval->CoversSlow(j)) { + message << std::endl; + if (interval->GetDefinedBy() != nullptr) { + message << interval->GetDefinedBy()->GetKind() << " "; + } else { + message << "physical "; + } + interval->Dump(message); + } + } LOG(FATAL) << message.str(); } else { return false; diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 729eede66e..7e1fff8e2b 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -40,7 +40,8 @@ class SsaLivenessAnalysis; class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> { public: enum Strategy { - kRegisterAllocatorLinearScan + kRegisterAllocatorLinearScan, + kRegisterAllocatorGraphColor }; static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan; diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc new file mode 100644 index 0000000000..aa0d3710fa --- /dev/null +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -0,0 +1,2042 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "register_allocator_graph_color.h" + +#include "code_generator.h" +#include "linear_order.h" +#include "register_allocation_resolver.h" +#include "ssa_liveness_analysis.h" +#include "thread-inl.h" + +namespace art { + +// Highest number of registers that we support for any platform. This can be used for std::bitset, +// for example, which needs to know its size at compile time. +static constexpr size_t kMaxNumRegs = 32; + +// The maximum number of graph coloring attempts before triggering a DCHECK. +// This is meant to catch changes to the graph coloring algorithm that undermine its forward +// progress guarantees. Forward progress for the algorithm means splitting live intervals on +// every graph coloring attempt so that eventually the interference graph will be sparse enough +// to color. The main threat to forward progress is trying to split short intervals which cannot be +// split further; this could cause infinite looping because the interference graph would never +// change. This is avoided by prioritizing short intervals before long ones, so that long +// intervals are split when coloring fails. +static constexpr size_t kMaxGraphColoringAttemptsDebug = 100; + +// We always want to avoid spilling inside loops. +static constexpr size_t kLoopSpillWeightMultiplier = 10; + +// If we avoid moves in single jump blocks, we can avoid jumps to jumps. +static constexpr size_t kSingleJumpBlockWeightMultiplier = 2; + +// We avoid moves in blocks that dominate the exit block, since these blocks will +// be executed on every path through the method. +static constexpr size_t kDominatesExitBlockWeightMultiplier = 2; + +enum class CoalesceKind { + kAdjacentSibling, // Prevents moves at interval split points. + kFixedOutputSibling, // Prevents moves from a fixed output location. + kFixedInput, // Prevents moves into a fixed input location. + kNonlinearControlFlow, // Prevents moves between blocks. + kPhi, // Prevents phi resolution moves. + kFirstInput, // Prevents a single input move. + kAnyInput, // May lead to better instruction selection / smaller encodings. +}; + +std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) { + return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind); +} + +static size_t LoopDepthAt(HBasicBlock* block) { + HLoopInformation* loop_info = block->GetLoopInformation(); + size_t depth = 0; + while (loop_info != nullptr) { + ++depth; + loop_info = loop_info->GetPreHeader()->GetLoopInformation(); + } + return depth; +} + +// Return the runtime cost of inserting a move instruction at the specified location. +static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) { + HBasicBlock* block = liveness.GetBlockFromPosition(position / 2); + DCHECK(block != nullptr); + size_t cost = 1; + if (block->IsSingleJump()) { + cost *= kSingleJumpBlockWeightMultiplier; + } + if (block->Dominates(block->GetGraph()->GetExitBlock())) { + cost *= kDominatesExitBlockWeightMultiplier; + } + for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) { + cost *= kLoopSpillWeightMultiplier; + } + return cost; +} + +// In general, we estimate coalesce priority by whether it will definitely avoid a move, +// and by how likely it is to create an interference graph that's harder to color. +static size_t ComputeCoalescePriority(CoalesceKind kind, + size_t position, + const SsaLivenessAnalysis& liveness) { + if (kind == CoalesceKind::kAnyInput) { + // This type of coalescing can affect instruction selection, but not moves, so we + // give it the lowest priority. + return 0; + } else { + return CostForMoveAt(position, liveness); + } +} + +enum class CoalesceStage { + kWorklist, // Currently in the iterative coalescing worklist. + kActive, // Not in a worklist, but could be considered again during iterative coalescing. + kInactive, // No longer considered until last-chance coalescing. + kDefunct, // Either the two nodes interfere, or have already been coalesced. +}; + +std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) { + return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage); +} + +// Represents a coalesce opportunity between two nodes. +struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> { + CoalesceOpportunity(InterferenceNode* a, + InterferenceNode* b, + CoalesceKind kind, + size_t position, + const SsaLivenessAnalysis& liveness) + : node_a(a), + node_b(b), + stage(CoalesceStage::kWorklist), + priority(ComputeCoalescePriority(kind, position, liveness)) {} + + // Compare two coalesce opportunities based on their priority. + // Return true if lhs has a lower priority than that of rhs. + static bool CmpPriority(const CoalesceOpportunity* lhs, + const CoalesceOpportunity* rhs) { + return lhs->priority < rhs->priority; + } + + InterferenceNode* const node_a; + InterferenceNode* const node_b; + + // The current stage of this coalesce opportunity, indicating whether it is in a worklist, + // and whether it should still be considered. + CoalesceStage stage; + + // The priority of this coalesce opportunity, based on heuristics. + const size_t priority; +}; + +enum class NodeStage { + kInitial, // Uninitialized. + kPrecolored, // Marks fixed nodes. + kSafepoint, // Marks safepoint nodes. + kPrunable, // Marks uncolored nodes in the interference graph. + kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers. + kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers. + kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers. + kPruned // Marks nodes already pruned from the interference graph. +}; + +std::ostream& operator<<(std::ostream& os, const NodeStage& stage) { + return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage); +} + +// Returns the estimated cost of spilling a particular live interval. +static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) { + if (interval->HasRegister()) { + // Intervals with a fixed register cannot be spilled. + return std::numeric_limits<float>::min(); + } + + size_t length = interval->GetLength(); + if (length == 1) { + // Tiny intervals should have maximum priority, since they cannot be split any further. + return std::numeric_limits<float>::max(); + } + + size_t use_weight = 0; + if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) { + // Cost for spilling at a register definition point. + use_weight += CostForMoveAt(interval->GetStart() + 1, liveness); + } + + UsePosition* use = interval->GetFirstUse(); + while (use != nullptr && use->GetPosition() <= interval->GetStart()) { + // Skip uses before the start of this live interval. + use = use->GetNext(); + } + + while (use != nullptr && use->GetPosition() <= interval->GetEnd()) { + if (use->GetUser() != nullptr && use->RequiresRegister()) { + // Cost for spilling at a register use point. + use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness); + } + use = use->GetNext(); + } + + // We divide by the length of the interval because we want to prioritize + // short intervals; we do not benefit much if we split them further. + return static_cast<float>(use_weight) / static_cast<float>(length); +} + +// Interference nodes make up the interference graph, which is the primary data structure in +// graph coloring register allocation. Each node represents a single live interval, and contains +// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory, +// pre-colored nodes never contain outgoing edges (only incoming ones). +// +// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed, +// but outgoing edges remain in order to later color the node based on the colors of its neighbors. +// +// Note that a pair interval is represented by a single node in the interference graph, which +// essentially requires two colors. One consequence of this is that the degree of a node is not +// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum +// number of colors with which a node could interfere. We model this by giving edges different +// weights (1 or 2) to control how much it increases the degree of adjacent nodes. +// For example, the edge between two single nodes will have weight 1. On the other hand, +// the edge between a single node and a pair node will have weight 2. This is because the pair +// node could block up to two colors for the single node, and because the single node could +// block an entire two-register aligned slot for the pair node. +// The degree is defined this way because we use it to decide whether a node is guaranteed a color, +// and thus whether it is safe to prune it from the interference graph early on. +class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { + public: + InterferenceNode(ArenaAllocator* allocator, + LiveInterval* interval, + const SsaLivenessAnalysis& liveness) + : stage(NodeStage::kInitial), + interval_(interval), + adjacent_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + coalesce_opportunities_(allocator->Adapter(kArenaAllocRegisterAllocator)), + out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0), + alias_(this), + spill_weight_(ComputeSpillWeight(interval, liveness)), + requires_color_(interval->RequiresRegister()), + needs_spill_slot_(false) { + DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval"; + } + + void AddInterference(InterferenceNode* other, bool guaranteed_not_interfering_yet) { + DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences"; + DCHECK_NE(this, other) << "Should not create self loops in the interference graph"; + DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another"; + DCHECK_NE(stage, NodeStage::kPruned); + DCHECK_NE(other->stage, NodeStage::kPruned); + if (guaranteed_not_interfering_yet) { + DCHECK(std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other) + == adjacent_nodes_.end()); + adjacent_nodes_.push_back(other); + out_degree_ += EdgeWeightWith(other); + } else { + auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); + if (it == adjacent_nodes_.end()) { + adjacent_nodes_.push_back(other); + out_degree_ += EdgeWeightWith(other); + } + } + } + + void RemoveInterference(InterferenceNode* other) { + DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node"; + DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning"; + auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); + if (it != adjacent_nodes_.end()) { + adjacent_nodes_.erase(it); + out_degree_ -= EdgeWeightWith(other); + } + } + + bool ContainsInterference(InterferenceNode* other) const { + DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences"; + DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences"; + auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); + return it != adjacent_nodes_.end(); + } + + LiveInterval* GetInterval() const { + return interval_; + } + + const ArenaVector<InterferenceNode*>& GetAdjacentNodes() const { + return adjacent_nodes_; + } + + size_t GetOutDegree() const { + // Pre-colored nodes have infinite degree. + DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max()); + return out_degree_; + } + + void AddCoalesceOpportunity(CoalesceOpportunity* opportunity) { + coalesce_opportunities_.push_back(opportunity); + } + + void ClearCoalesceOpportunities() { + coalesce_opportunities_.clear(); + } + + bool IsMoveRelated() const { + for (CoalesceOpportunity* opportunity : coalesce_opportunities_) { + if (opportunity->stage == CoalesceStage::kWorklist || + opportunity->stage == CoalesceStage::kActive) { + return true; + } + } + return false; + } + + // Return whether this node already has a color. + // Used to find fixed nodes in the interference graph before coloring. + bool IsPrecolored() const { + return interval_->HasRegister(); + } + + bool IsPair() const { + return interval_->HasHighInterval(); + } + + void SetAlias(InterferenceNode* rep) { + DCHECK_NE(rep->stage, NodeStage::kPruned); + DCHECK_EQ(this, alias_) << "Should only set a node's alias once"; + alias_ = rep; + } + + InterferenceNode* GetAlias() { + if (alias_ != this) { + // Recurse in order to flatten tree of alias pointers. + alias_ = alias_->GetAlias(); + } + return alias_; + } + + const ArenaVector<CoalesceOpportunity*>& GetCoalesceOpportunities() const { + return coalesce_opportunities_; + } + + float GetSpillWeight() const { + return spill_weight_; + } + + bool RequiresColor() const { + return requires_color_; + } + + // We give extra weight to edges adjacent to pair nodes. See the general comment on the + // interference graph above. + size_t EdgeWeightWith(const InterferenceNode* other) const { + return (IsPair() || other->IsPair()) ? 2 : 1; + } + + bool NeedsSpillSlot() const { + return needs_spill_slot_; + } + + void SetNeedsSpillSlot() { + needs_spill_slot_ = true; + } + + // The current stage of this node, indicating which worklist it belongs to. + NodeStage stage; + + private: + // The live interval that this node represents. + LiveInterval* const interval_; + + // All nodes interfering with this one. + // We use an unsorted vector as a set, since a tree or hash set is too heavy for the + // set sizes that we encounter. Using a vector leads to much better performance. + ArenaVector<InterferenceNode*> adjacent_nodes_; + + // Interference nodes that this node should be coalesced with to reduce moves. + ArenaVector<CoalesceOpportunity*> coalesce_opportunities_; + + // The maximum number of colors with which this node could interfere. This could be more than + // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes. + // We use "out" degree because incoming edges come from nodes already pruned from the graph, + // and do not affect the coloring of this node. + // Pre-colored nodes are treated as having infinite degree. + size_t out_degree_; + + // The node representing this node in the interference graph. + // Initially set to `this`, and only changed if this node is coalesced into another. + InterferenceNode* alias_; + + // The cost of splitting and spilling this interval to the stack. + // Nodes with a higher spill weight should be prioritized when assigning registers. + // This is essentially based on use density and location; short intervals with many uses inside + // deeply nested loops have a high spill weight. + const float spill_weight_; + + const bool requires_color_; + + bool needs_spill_slot_; + + DISALLOW_COPY_AND_ASSIGN(InterferenceNode); +}; + +// The order in which we color nodes is important. To guarantee forward progress, +// we prioritize intervals that require registers, and after that we prioritize +// short intervals. That way, if we fail to color a node, it either won't require a +// register, or it will be a long interval that can be split in order to make the +// interference graph sparser. +// To improve code quality, we prioritize intervals used frequently in deeply nested loops. +// (This metric is secondary to the forward progress requirements above.) +// TODO: May also want to consider: +// - Constants (since they can be rematerialized) +// - Allocated spill slots +static bool HasGreaterNodePriority(const InterferenceNode* lhs, + const InterferenceNode* rhs) { + // (1) Prioritize the node that requires a color. + if (lhs->RequiresColor() != rhs->RequiresColor()) { + return lhs->RequiresColor(); + } + + // (2) Prioritize the interval that has a higher spill weight. + return lhs->GetSpillWeight() > rhs->GetSpillWeight(); +} + +// A ColoringIteration holds the many data structures needed for a single graph coloring attempt, +// and provides methods for each phase of the attempt. +class ColoringIteration { + public: + ColoringIteration(RegisterAllocatorGraphColor* register_allocator, + ArenaAllocator* allocator, + bool processing_core_regs, + size_t num_regs) + : register_allocator_(register_allocator), + allocator_(allocator), + processing_core_regs_(processing_core_regs), + num_regs_(num_regs), + interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)), + prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), + freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), + spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)), + coalesce_worklist_(CoalesceOpportunity::CmpPriority, + allocator->Adapter(kArenaAllocRegisterAllocator)) {} + + // Use the intervals collected from instructions to construct an + // interference graph mapping intervals to adjacency lists. + // Also, collect synthesized safepoint nodes, used to keep + // track of live intervals across safepoints. + // TODO: Should build safepoints elsewhere. + void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals, + const ArenaVector<InterferenceNode*>& physical_nodes); + + // Add coalesce opportunities to interference nodes. + void FindCoalesceOpportunities(); + + // Prune nodes from the interference graph to be colored later. Build + // a stack (pruned_nodes) containing these intervals in an order determined + // by various heuristics. + void PruneInterferenceGraph(); + + // Process pruned_intervals_ to color the interference graph, spilling when + // necessary. Returns true if successful. Else, some intervals have been + // split, and the interference graph should be rebuilt for another attempt. + bool ColorInterferenceGraph(); + + // Return prunable nodes. + // The register allocator will need to access prunable nodes after coloring + // in order to tell the code generator which registers have been assigned. + const ArenaVector<InterferenceNode*>& GetPrunableNodes() const { + return prunable_nodes_; + } + + private: + // Create a coalesce opportunity between two nodes. + void CreateCoalesceOpportunity(InterferenceNode* a, + InterferenceNode* b, + CoalesceKind kind, + size_t position); + + // Add an edge in the interference graph, if valid. + // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion + // when possible. + void AddPotentialInterference(InterferenceNode* from, + InterferenceNode* to, + bool guaranteed_not_interfering_yet, + bool both_directions = true); + + // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors) + // may be pruned from the interference graph. + void FreezeMoves(InterferenceNode* node); + + // Prune a node from the interference graph, updating worklists if necessary. + void PruneNode(InterferenceNode* node); + + // Add coalesce opportunities associated with this node to the coalesce worklist. + void EnableCoalesceOpportunities(InterferenceNode* node); + + // If needed, from `node` from the freeze worklist to the simplify worklist. + void CheckTransitionFromFreezeWorklist(InterferenceNode* node); + + // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively. + bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into); + + // Return true if `from` and `into` are uncolored, and can be coalesced conservatively. + bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into); + + void Coalesce(CoalesceOpportunity* opportunity); + + // Merge `from` into `into` in the interference graph. + void Combine(InterferenceNode* from, InterferenceNode* into); + + // A reference to the register allocator instance, + // needed to split intervals and assign spill slots. + RegisterAllocatorGraphColor* register_allocator_; + + // An arena allocator used for a single graph coloring attempt. + ArenaAllocator* allocator_; + + const bool processing_core_regs_; + + const size_t num_regs_; + + // A map from live intervals to interference nodes. + ArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_; + + // Uncolored nodes that should be pruned from the interference graph. + ArenaVector<InterferenceNode*> prunable_nodes_; + + // A stack of nodes pruned from the interference graph, waiting to be pruned. + ArenaStdStack<InterferenceNode*> pruned_nodes_; + + // A queue containing low degree, non-move-related nodes that can pruned immediately. + ArenaDeque<InterferenceNode*> simplify_worklist_; + + // A queue containing low degree, move-related nodes. + ArenaDeque<InterferenceNode*> freeze_worklist_; + + // A queue containing high degree nodes. + // If we have to prune from the spill worklist, we cannot guarantee + // the pruned node a color, so we order the worklist by priority. + ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_; + + // A queue containing coalesce opportunities. + // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those + // inside of loops) are more important than others. + ArenaPriorityQueue<CoalesceOpportunity*, + decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_; + + DISALLOW_COPY_AND_ASSIGN(ColoringIteration); +}; + +static bool IsCoreInterval(LiveInterval* interval) { + return !Primitive::IsFloatingPointType(interval->GetType()); +} + +static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) { + return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize; +} + +RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& liveness, + bool iterative_move_coalescing) + : RegisterAllocator(allocator, codegen, liveness), + iterative_move_coalescing_(iterative_move_coalescing), + core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + num_int_spill_slots_(0), + num_double_spill_slots_(0), + num_float_spill_slots_(0), + num_long_spill_slots_(0), + catch_phi_spill_slot_counter_(0), + reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)), + reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) { + // Before we ask for blocked registers, set them up in the code generator. + codegen->SetupBlockedRegisters(); + + // Initialize physical core register live intervals and blocked registers. + // This includes globally blocked registers, such as the stack pointer. + physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr); + for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { + LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt); + physical_core_nodes_[i] = + new (allocator_) InterferenceNode(allocator_, interval, liveness); + physical_core_nodes_[i]->stage = NodeStage::kPrecolored; + core_intervals_.push_back(interval); + if (codegen_->IsBlockedCoreRegister(i)) { + interval->AddRange(0, liveness.GetMaxLifetimePosition()); + } + } + // Initialize physical floating point register live intervals and blocked registers. + physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr); + for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { + LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat); + physical_fp_nodes_[i] = + new (allocator_) InterferenceNode(allocator_, interval, liveness); + physical_fp_nodes_[i]->stage = NodeStage::kPrecolored; + fp_intervals_.push_back(interval); + if (codegen_->IsBlockedFloatingPointRegister(i)) { + interval->AddRange(0, liveness.GetMaxLifetimePosition()); + } + } +} + +void RegisterAllocatorGraphColor::AllocateRegisters() { + // (1) Collect and prepare live intervals. + ProcessInstructions(); + + for (bool processing_core_regs : {true, false}) { + ArenaVector<LiveInterval*>& intervals = processing_core_regs + ? core_intervals_ + : fp_intervals_; + size_t num_registers = processing_core_regs + ? codegen_->GetNumberOfCoreRegisters() + : codegen_->GetNumberOfFloatingPointRegisters(); + + size_t attempt = 0; + while (true) { + ++attempt; + DCHECK(attempt <= kMaxGraphColoringAttemptsDebug) + << "Exceeded debug max graph coloring register allocation attempts. " + << "This could indicate that the register allocator is not making forward progress, " + << "which could be caused by prioritizing the wrong live intervals. (Short intervals " + << "should be prioritized over long ones, because they cannot be split further.)"; + + // Many data structures are cleared between graph coloring attempts, so we reduce + // total memory usage by using a new arena allocator for each attempt. + ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool()); + ColoringIteration iteration(this, + &coloring_attempt_allocator, + processing_core_regs, + num_registers); + + // (2) Build the interference graph. Also gather safepoints. + ArenaVector<InterferenceNode*> safepoints( + coloring_attempt_allocator.Adapter(kArenaAllocRegisterAllocator)); + ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs + ? physical_core_nodes_ + : physical_fp_nodes_; + iteration.BuildInterferenceGraph(intervals, physical_nodes); + + // (3) Add coalesce opportunities. + // If we have tried coloring the graph a suspiciously high number of times, give + // up on move coalescing, just in case the coalescing heuristics are not conservative. + // (This situation will be caught if DCHECKs are turned on.) + if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) { + iteration.FindCoalesceOpportunities(); + } + + // (4) Prune all uncolored nodes from interference graph. + iteration.PruneInterferenceGraph(); + + // (5) Color pruned nodes based on interferences. + bool successful = iteration.ColorInterferenceGraph(); + + // We manually clear coalesce opportunities for physical nodes, + // since they persist across coloring attempts. + for (InterferenceNode* node : physical_core_nodes_) { + node->ClearCoalesceOpportunities(); + } + for (InterferenceNode* node : physical_fp_nodes_) { + node->ClearCoalesceOpportunities(); + } + + if (successful) { + // Assign spill slots. + AllocateSpillSlots(iteration.GetPrunableNodes()); + + // Tell the code generator which registers were allocated. + // We only look at prunable_nodes because we already told the code generator about + // fixed intervals while processing instructions. We also ignore the fixed intervals + // placed at the top of catch blocks. + for (InterferenceNode* node : iteration.GetPrunableNodes()) { + LiveInterval* interval = node->GetInterval(); + if (interval->HasRegister()) { + Location low_reg = processing_core_regs + ? Location::RegisterLocation(interval->GetRegister()) + : Location::FpuRegisterLocation(interval->GetRegister()); + codegen_->AddAllocatedRegister(low_reg); + if (interval->HasHighInterval()) { + LiveInterval* high = interval->GetHighInterval(); + DCHECK(high->HasRegister()); + Location high_reg = processing_core_regs + ? Location::RegisterLocation(high->GetRegister()) + : Location::FpuRegisterLocation(high->GetRegister()); + codegen_->AddAllocatedRegister(high_reg); + } + } else { + DCHECK(!interval->HasHighInterval() || !interval->GetHighInterval()->HasRegister()); + } + } + + break; + } + } // while unsuccessful + } // for processing_core_instructions + + // (6) Resolve locations and deconstruct SSA form. + RegisterAllocationResolver(allocator_, codegen_, liveness_) + .Resolve(ArrayRef<HInstruction* const>(safepoints_), + reserved_art_method_slots_ + reserved_out_slots_, + num_int_spill_slots_, + num_long_spill_slots_, + num_float_spill_slots_, + num_double_spill_slots_, + catch_phi_spill_slot_counter_, + temp_intervals_); + + if (kIsDebugBuild) { + Validate(/*log_fatal_on_failure*/ true); + } +} + +bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { + for (bool processing_core_regs : {true, false}) { + ArenaVector<LiveInterval*> intervals( + allocator_->Adapter(kArenaAllocRegisterAllocatorValidate)); + for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + LiveInterval* interval = instruction->GetLiveInterval(); + if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) { + intervals.push_back(instruction->GetLiveInterval()); + } + } + + ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs + ? physical_core_nodes_ + : physical_fp_nodes_; + for (InterferenceNode* fixed : physical_nodes) { + LiveInterval* interval = fixed->GetInterval(); + if (interval->GetFirstRange() != nullptr) { + // Ideally we would check fixed ranges as well, but currently there are times when + // two fixed intervals for the same register will overlap. For example, a fixed input + // and a fixed output may sometimes share the same register, in which there will be two + // fixed intervals for the same place. + } + } + + for (LiveInterval* temp : temp_intervals_) { + if (IsCoreInterval(temp) == processing_core_regs) { + intervals.push_back(temp); + } + } + + size_t spill_slots = num_int_spill_slots_ + + num_long_spill_slots_ + + num_float_spill_slots_ + + num_double_spill_slots_ + + catch_phi_spill_slot_counter_; + bool ok = ValidateIntervals(intervals, + spill_slots, + reserved_art_method_slots_ + reserved_out_slots_, + *codegen_, + allocator_, + processing_core_regs, + log_fatal_on_failure); + if (!ok) { + return false; + } + } // for processing_core_regs + + return true; +} + +void RegisterAllocatorGraphColor::ProcessInstructions() { + for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) { + // Note that we currently depend on this ordering, since some helper + // code is designed for linear scan register allocation. + for (HBackwardInstructionIterator instr_it(block->GetInstructions()); + !instr_it.Done(); + instr_it.Advance()) { + ProcessInstruction(instr_it.Current()); + } + + for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + ProcessInstruction(phi_it.Current()); + } + + if (block->IsCatchBlock() + || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { + // By blocking all registers at the top of each catch block or irreducible loop, we force + // intervals belonging to the live-in set of the catch/header block to be spilled. + // TODO(ngeoffray): Phis in this block could be allocated in register. + size_t position = block->GetLifetimeStart(); + BlockRegisters(position, position + 1); + } + } +} + +void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) { + LocationSummary* locations = instruction->GetLocations(); + if (locations == nullptr) { + return; + } + if (locations->NeedsSafepoint() && codegen_->IsLeafMethod()) { + // We do this here because we do not want the suspend check to artificially + // create live registers. + DCHECK(instruction->IsSuspendCheckEntry()); + DCHECK_EQ(locations->GetTempCount(), 0u); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + CheckForTempLiveIntervals(instruction); + CheckForSafepoint(instruction); + if (instruction->GetLocations()->WillCall()) { + // If a call will happen, create fixed intervals for caller-save registers. + // TODO: Note that it may be beneficial to later split intervals at this point, + // so that we allow last-minute moves from a caller-save register + // to a callee-save register. + BlockRegisters(instruction->GetLifetimePosition(), + instruction->GetLifetimePosition() + 1, + /*caller_save_only*/ true); + } + CheckForFixedInputs(instruction); + + LiveInterval* interval = instruction->GetLiveInterval(); + if (interval == nullptr) { + // Instructions lacking a valid output location do not have a live interval. + DCHECK(!locations->Out().IsValid()); + return; + } + + // Low intervals act as representatives for their corresponding high interval. + DCHECK(!interval->IsHighInterval()); + if (codegen_->NeedsTwoRegisters(interval->GetType())) { + interval->AddHighInterval(); + } + AddSafepointsFor(instruction); + CheckForFixedOutput(instruction); + AllocateSpillSlotForCatchPhi(instruction); + + ArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval) + ? core_intervals_ + : fp_intervals_; + if (interval->HasSpillSlot() || instruction->IsConstant()) { + // Note that if an interval already has a spill slot, then its value currently resides + // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first + // register use. This is also true for constants, which can be materialized at any point. + size_t first_register_use = interval->FirstRegisterUse(); + if (first_register_use != kNoLifetime) { + LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1); + intervals.push_back(split); + } else { + // We won't allocate a register for this value. + } + } else { + intervals.push_back(interval); + } +} + +void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) { + // We simply block physical registers where necessary. + // TODO: Ideally we would coalesce the physical register with the register + // allocated to the input value, but this can be tricky if, e.g., there + // could be multiple physical register uses of the same value at the + // same instruction. Furthermore, there's currently no distinction between + // fixed inputs to a call (which will be clobbered) and other fixed inputs (which + // may not be clobbered). + LocationSummary* locations = instruction->GetLocations(); + size_t position = instruction->GetLifetimePosition(); + for (size_t i = 0; i < locations->GetInputCount(); ++i) { + Location input = locations->InAt(i); + if (input.IsRegister() || input.IsFpuRegister()) { + BlockRegister(input, position, position + 1); + codegen_->AddAllocatedRegister(input); + } else if (input.IsPair()) { + BlockRegister(input.ToLow(), position, position + 1); + BlockRegister(input.ToHigh(), position, position + 1); + codegen_->AddAllocatedRegister(input.ToLow()); + codegen_->AddAllocatedRegister(input.ToHigh()); + } + } +} + +void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) { + // If an instruction has a fixed output location, we give the live interval a register and then + // proactively split it just after the definition point to avoid creating too many interferences + // with a fixed node. + LiveInterval* interval = instruction->GetLiveInterval(); + Location out = interval->GetDefinedBy()->GetLocations()->Out(); + size_t position = instruction->GetLifetimePosition(); + DCHECK_GE(interval->GetEnd() - position, 2u); + + if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { + out = instruction->GetLocations()->InAt(0); + } + + if (out.IsRegister() || out.IsFpuRegister()) { + interval->SetRegister(out.reg()); + codegen_->AddAllocatedRegister(out); + Split(interval, position + 1); + } else if (out.IsPair()) { + interval->SetRegister(out.low()); + interval->GetHighInterval()->SetRegister(out.high()); + codegen_->AddAllocatedRegister(out.ToLow()); + codegen_->AddAllocatedRegister(out.ToHigh()); + Split(interval, position + 1); + } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) { + interval->SetSpillSlot(out.GetStackIndex()); + } else { + DCHECK(out.IsUnallocated() || out.IsConstant()); + } +} + +void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) { + LiveInterval* interval = instruction->GetLiveInterval(); + for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) { + HInstruction* safepoint = safepoints_[safepoint_index - 1u]; + size_t safepoint_position = safepoint->GetLifetimePosition(); + + // Test that safepoints_ are ordered in the optimal way. + DCHECK(safepoint_index == safepoints_.size() || + safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position); + + if (safepoint_position == interval->GetStart()) { + // The safepoint is for this instruction, so the location of the instruction + // does not need to be saved. + DCHECK_EQ(safepoint_index, safepoints_.size()); + DCHECK_EQ(safepoint, instruction); + continue; + } else if (interval->IsDeadAt(safepoint_position)) { + break; + } else if (!interval->Covers(safepoint_position)) { + // Hole in the interval. + continue; + } + interval->AddSafepoint(safepoint); + } +} + +void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) { + LocationSummary* locations = instruction->GetLocations(); + size_t position = instruction->GetLifetimePosition(); + for (size_t i = 0; i < locations->GetTempCount(); ++i) { + Location temp = locations->GetTemp(i); + if (temp.IsRegister() || temp.IsFpuRegister()) { + BlockRegister(temp, position, position + 1); + codegen_->AddAllocatedRegister(temp); + } else { + DCHECK(temp.IsUnallocated()); + switch (temp.GetPolicy()) { + case Location::kRequiresRegister: { + LiveInterval* interval = + LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); + interval->AddTempUse(instruction, i); + core_intervals_.push_back(interval); + temp_intervals_.push_back(interval); + break; + } + + case Location::kRequiresFpuRegister: { + LiveInterval* interval = + LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); + interval->AddTempUse(instruction, i); + fp_intervals_.push_back(interval); + temp_intervals_.push_back(interval); + if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { + interval->AddHighInterval(/*is_temp*/ true); + temp_intervals_.push_back(interval->GetHighInterval()); + } + break; + } + + default: + LOG(FATAL) << "Unexpected policy for temporary location " + << temp.GetPolicy(); + } + } + } +} + +void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) { + LocationSummary* locations = instruction->GetLocations(); + + if (locations->NeedsSafepoint()) { + safepoints_.push_back(instruction); + } +} + +LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) { + if (interval->GetStart() < position && position < interval->GetEnd()) { + return Split(interval, position); + } else { + return interval; + } +} + +void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) { + DCHECK(!interval->IsHighInterval()); + + // Split just after a register definition. + if (interval->IsParent() && interval->DefinitionRequiresRegister()) { + interval = TrySplit(interval, interval->GetStart() + 1); + } + + UsePosition* use = interval->GetFirstUse(); + while (use != nullptr && use->GetPosition() < interval->GetStart()) { + use = use->GetNext(); + } + + // Split around register uses. + size_t end = interval->GetEnd(); + while (use != nullptr && use->GetPosition() <= end) { + if (use->RequiresRegister()) { + size_t position = use->GetPosition(); + interval = TrySplit(interval, position - 1); + if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) { + // If we are at the very end of a basic block, we cannot split right + // at the use. Split just after instead. + interval = TrySplit(interval, position + 1); + } else { + interval = TrySplit(interval, position); + } + } + use = use->GetNext(); + } +} + +void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) { + if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) { + HPhi* phi = instruction->AsPhi(); + LiveInterval* interval = phi->GetLiveInterval(); + + HInstruction* previous_phi = phi->GetPrevious(); + DCHECK(previous_phi == nullptr || + previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber()) + << "Phis expected to be sorted by vreg number, " + << "so that equivalent phis are adjacent."; + + if (phi->IsVRegEquivalentOf(previous_phi)) { + // Assign the same spill slot. + DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot()); + interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); + } else { + interval->SetSpillSlot(catch_phi_spill_slot_counter_); + catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + } + } +} + +void RegisterAllocatorGraphColor::BlockRegister(Location location, + size_t start, + size_t end) { + DCHECK(location.IsRegister() || location.IsFpuRegister()); + int reg = location.reg(); + LiveInterval* interval = location.IsRegister() + ? physical_core_nodes_[reg]->GetInterval() + : physical_fp_nodes_[reg]->GetInterval(); + DCHECK(interval->GetRegister() == reg); + bool blocked_by_codegen = location.IsRegister() + ? codegen_->IsBlockedCoreRegister(reg) + : codegen_->IsBlockedFloatingPointRegister(reg); + if (blocked_by_codegen) { + // We've already blocked this register for the entire method. (And adding a + // range inside another range violates the preconditions of AddRange). + } else { + interval->AddRange(start, end); + } +} + +void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) { + for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { + if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) { + BlockRegister(Location::RegisterLocation(i), start, end); + } + } + for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { + if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) { + BlockRegister(Location::FpuRegisterLocation(i), start, end); + } + } +} + +void ColoringIteration::AddPotentialInterference(InterferenceNode* from, + InterferenceNode* to, + bool guaranteed_not_interfering_yet, + bool both_directions) { + if (from->IsPrecolored()) { + // We save space by ignoring outgoing edges from fixed nodes. + } else if (to->IsPrecolored()) { + // It is important that only a single node represents a given fixed register in the + // interference graph. We retrieve that node here. + const ArenaVector<InterferenceNode*>& physical_nodes = to->GetInterval()->IsFloatingPoint() + ? register_allocator_->physical_fp_nodes_ + : register_allocator_->physical_core_nodes_; + InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()]; + from->AddInterference(physical_node, /*guaranteed_not_interfering_yet*/ false); + DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister()); + DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node"; + + // If a node interferes with a fixed pair node, the weight of the edge may + // be inaccurate after using the alias of the pair node, because the alias of the pair node + // is a singular node. + // We could make special pair fixed nodes, but that ends up being too conservative because + // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of + // three rather than two. + // Instead, we explicitly add an interference with the high node of the fixed pair node. + // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals + // can be unaligned on x86 complicates things. + if (to->IsPair()) { + InterferenceNode* high_node = + physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()]; + DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(), + high_node->GetInterval()->GetRegister()); + from->AddInterference(high_node, /*guaranteed_not_interfering_yet*/ false); + } + } else { + // Standard interference between two uncolored nodes. + from->AddInterference(to, guaranteed_not_interfering_yet); + } + + if (both_directions) { + AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false); + } +} + +// Returns true if `in_node` represents an input interval of `out_node`, and the output interval +// is allowed to have the same register as the input interval. +// TODO: Ideally we should just produce correct intervals in liveness analysis. +// We would need to refactor the current live interval layout to do so, which is +// no small task. +static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) { + LiveInterval* output_interval = out_node->GetInterval(); + HInstruction* defined_by = output_interval->GetDefinedBy(); + if (defined_by == nullptr) { + // This must not be a definition point. + return false; + } + + LocationSummary* locations = defined_by->GetLocations(); + if (locations->OutputCanOverlapWithInputs()) { + // This instruction does not allow the output to reuse a register from an input. + return false; + } + + LiveInterval* input_interval = in_node->GetInterval(); + LiveInterval* next_sibling = input_interval->GetNextSibling(); + size_t def_position = defined_by->GetLifetimePosition(); + size_t use_position = def_position + 1; + if (next_sibling != nullptr && next_sibling->GetStart() == use_position) { + // The next sibling starts at the use position, so reusing the input register in the output + // would clobber the input before it's moved into the sibling interval location. + return false; + } + + if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) { + // The input interval is live after the use position. + return false; + } + + HInputsRef inputs = defined_by->GetInputs(); + for (size_t i = 0; i < inputs.size(); ++i) { + if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) { + DCHECK(input_interval->SameRegisterKind(*output_interval)); + return true; + } + } + + // The input interval was not an input for this instruction. + return false; +} + +void ColoringIteration::BuildInterferenceGraph( + const ArenaVector<LiveInterval*>& intervals, + const ArenaVector<InterferenceNode*>& physical_nodes) { + DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty()); + // Build the interference graph efficiently by ordering range endpoints + // by position and doing a linear sweep to find interferences. (That is, we + // jump from endpoint to endpoint, maintaining a set of intervals live at each + // point. If two nodes are ever in the live set at the same time, then they + // interfere with each other.) + // + // We order by both position and (secondarily) by whether the endpoint + // begins or ends a range; we want to process range endings before range + // beginnings at the same position because they should not conflict. + // + // For simplicity, we create a tuple for each endpoint, and then sort the tuples. + // Tuple contents: (position, is_range_beginning, node). + ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints( + allocator_->Adapter(kArenaAllocRegisterAllocator)); + + // We reserve plenty of space to avoid excessive copying. + range_endpoints.reserve(4 * prunable_nodes_.size()); + + for (LiveInterval* parent : intervals) { + for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) { + LiveRange* range = sibling->GetFirstRange(); + if (range != nullptr) { + InterferenceNode* node = new (allocator_) InterferenceNode( + allocator_, sibling, register_allocator_->liveness_); + interval_node_map_.Insert(std::make_pair(sibling, node)); + + if (sibling->HasRegister()) { + // Fixed nodes should alias the canonical node for the corresponding register. + node->stage = NodeStage::kPrecolored; + InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()]; + node->SetAlias(physical_node); + DCHECK_EQ(node->GetInterval()->GetRegister(), + physical_node->GetInterval()->GetRegister()); + } else { + node->stage = NodeStage::kPrunable; + prunable_nodes_.push_back(node); + } + + while (range != nullptr) { + range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node)); + range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node)); + range = range->GetNext(); + } + } + } + } + + // Sort the endpoints. + // We explicitly ignore the third entry of each tuple (the node pointer) in order + // to maintain determinism. + std::sort(range_endpoints.begin(), range_endpoints.end(), + [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs, + const std::tuple<size_t, bool, InterferenceNode*>& rhs) { + return std::tie(std::get<0>(lhs), std::get<1>(lhs)) + < std::tie(std::get<0>(rhs), std::get<1>(rhs)); + }); + + // Nodes live at the current position in the linear sweep. + ArenaVector<InterferenceNode*> live( + allocator_->Adapter(kArenaAllocRegisterAllocator)); + + // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the + // live set. When we encounter the end of a range, we remove the corresponding node + // from the live set. Nodes interfere if they are in the live set at the same time. + for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) { + bool is_range_beginning; + InterferenceNode* node; + size_t position; + // Extract information from the tuple, including the node this tuple represents. + std::tie(position, is_range_beginning, node) = *it; + + if (is_range_beginning) { + bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart(); + for (InterferenceNode* conflicting : live) { + DCHECK_NE(node, conflicting); + if (CheckInputOutputCanOverlap(conflicting, node)) { + // We do not add an interference, because the instruction represented by `node` allows + // its output to share a register with an input, represented here by `conflicting`. + } else { + AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet); + } + } + DCHECK(std::find(live.begin(), live.end(), node) == live.end()); + live.push_back(node); + } else { + // End of range. + auto live_it = std::find(live.begin(), live.end(), node); + DCHECK(live_it != live.end()); + live.erase(live_it); + } + } + DCHECK(live.empty()); +} + +void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a, + InterferenceNode* b, + CoalesceKind kind, + size_t position) { + DCHECK_EQ(a->IsPair(), b->IsPair()) + << "Nodes of different memory widths should never be coalesced"; + CoalesceOpportunity* opportunity = + new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_); + a->AddCoalesceOpportunity(opportunity); + b->AddCoalesceOpportunity(opportunity); + coalesce_worklist_.push(opportunity); +} + +// When looking for coalesce opportunities, we use the interval_node_map_ to find the node +// corresponding to an interval. Note that not all intervals are in this map, notably the parents +// of constants and stack arguments. (However, these interval should not be involved in coalesce +// opportunities anyway, because they're not going to be in registers.) +void ColoringIteration::FindCoalesceOpportunities() { + DCHECK(coalesce_worklist_.empty()); + + for (InterferenceNode* node : prunable_nodes_) { + LiveInterval* interval = node->GetInterval(); + + // Coalesce siblings. + LiveInterval* next_sibling = interval->GetNextSibling(); + if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) { + auto it = interval_node_map_.Find(next_sibling); + if (it != interval_node_map_.end()) { + InterferenceNode* sibling_node = it->second; + CreateCoalesceOpportunity(node, + sibling_node, + CoalesceKind::kAdjacentSibling, + interval->GetEnd()); + } + } + + // Coalesce fixed outputs with this interval if this interval is an adjacent sibling. + LiveInterval* parent = interval->GetParent(); + if (parent->HasRegister() + && parent->GetNextSibling() == interval + && parent->GetEnd() == interval->GetStart()) { + auto it = interval_node_map_.Find(parent); + if (it != interval_node_map_.end()) { + InterferenceNode* parent_node = it->second; + CreateCoalesceOpportunity(node, + parent_node, + CoalesceKind::kFixedOutputSibling, + parent->GetEnd()); + } + } + + // Try to prevent moves across blocks. + // Note that this does not lead to many succeeding coalesce attempts, so could be removed + // if found to add to compile time. + const SsaLivenessAnalysis& liveness = register_allocator_->liveness_; + if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) { + // If the start of this interval is at a block boundary, we look at the + // location of the interval in blocks preceding the block this interval + // starts at. This can avoid a move between the two blocks. + HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2); + for (HBasicBlock* predecessor : block->GetPredecessors()) { + size_t position = predecessor->GetLifetimeEnd() - 1; + LiveInterval* existing = interval->GetParent()->GetSiblingAt(position); + if (existing != nullptr) { + auto it = interval_node_map_.Find(existing); + if (it != interval_node_map_.end()) { + InterferenceNode* existing_node = it->second; + CreateCoalesceOpportunity(node, + existing_node, + CoalesceKind::kNonlinearControlFlow, + position); + } + } + } + } + + // Coalesce phi inputs with the corresponding output. + HInstruction* defined_by = interval->GetDefinedBy(); + if (defined_by != nullptr && defined_by->IsPhi()) { + const ArenaVector<HBasicBlock*>& predecessors = defined_by->GetBlock()->GetPredecessors(); + HInputsRef inputs = defined_by->GetInputs(); + + for (size_t i = 0, e = inputs.size(); i < e; ++i) { + // We want the sibling at the end of the appropriate predecessor block. + size_t position = predecessors[i]->GetLifetimeEnd() - 1; + LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position); + + auto it = interval_node_map_.Find(input_interval); + if (it != interval_node_map_.end()) { + InterferenceNode* input_node = it->second; + CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position); + } + } + } + + // Coalesce output with first input when policy is kSameAsFirstInput. + if (defined_by != nullptr) { + Location out = defined_by->GetLocations()->Out(); + if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { + LiveInterval* input_interval + = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1); + // TODO: Could we consider lifetime holes here? + if (input_interval->GetEnd() == interval->GetStart()) { + auto it = interval_node_map_.Find(input_interval); + if (it != interval_node_map_.end()) { + InterferenceNode* input_node = it->second; + CreateCoalesceOpportunity(node, + input_node, + CoalesceKind::kFirstInput, + interval->GetStart()); + } + } + } + } + + // An interval that starts an instruction (that is, it is not split), may + // re-use the registers used by the inputs of that instruction, based on the + // location summary. + if (defined_by != nullptr) { + DCHECK(!interval->IsSplit()); + LocationSummary* locations = defined_by->GetLocations(); + if (!locations->OutputCanOverlapWithInputs()) { + HInputsRef inputs = defined_by->GetInputs(); + for (size_t i = 0; i < inputs.size(); ++i) { + size_t def_point = defined_by->GetLifetimePosition(); + // TODO: Getting the sibling at the def_point might not be quite what we want + // for fixed inputs, since the use will be *at* the def_point rather than after. + LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point); + if (input_interval != nullptr && + input_interval->HasHighInterval() == interval->HasHighInterval()) { + auto it = interval_node_map_.Find(input_interval); + if (it != interval_node_map_.end()) { + InterferenceNode* input_node = it->second; + CreateCoalesceOpportunity(node, + input_node, + CoalesceKind::kAnyInput, + interval->GetStart()); + } + } + } + } + } + + // Try to prevent moves into fixed input locations. + UsePosition* use = interval->GetFirstUse(); + for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) { + // Skip past uses before the start of this interval. + } + for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) { + HInstruction* user = use->GetUser(); + if (user == nullptr) { + // User may be null for certain intervals, such as temp intervals. + continue; + } + LocationSummary* locations = user->GetLocations(); + Location input = locations->InAt(use->GetInputIndex()); + if (input.IsRegister() || input.IsFpuRegister()) { + // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes + // is currently not supported. + InterferenceNode* fixed_node = input.IsRegister() + ? register_allocator_->physical_core_nodes_[input.reg()] + : register_allocator_->physical_fp_nodes_[input.reg()]; + CreateCoalesceOpportunity(node, + fixed_node, + CoalesceKind::kFixedInput, + user->GetLifetimePosition()); + } + } + } // for node in prunable_nodes +} + +static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) { + return node->GetOutDegree() < num_regs; +} + +static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) { + return !IsLowDegreeNode(node, num_regs); +} + +void ColoringIteration::PruneInterferenceGraph() { + DCHECK(pruned_nodes_.empty() + && simplify_worklist_.empty() + && freeze_worklist_.empty() + && spill_worklist_.empty()); + // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes, + // and all others as high degree nodes. The distinction is important: low degree nodes are + // guaranteed a color, while high degree nodes are not. + + // Build worklists. Note that the coalesce worklist has already been + // filled by FindCoalesceOpportunities(). + for (InterferenceNode* node : prunable_nodes_) { + DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned"; + if (IsLowDegreeNode(node, num_regs_)) { + if (node->GetCoalesceOpportunities().empty()) { + // Simplify Worklist. + node->stage = NodeStage::kSimplifyWorklist; + simplify_worklist_.push_back(node); + } else { + // Freeze Worklist. + node->stage = NodeStage::kFreezeWorklist; + freeze_worklist_.push_back(node); + } + } else { + // Spill worklist. + node->stage = NodeStage::kSpillWorklist; + spill_worklist_.push(node); + } + } + + // Prune graph. + // Note that we do not remove a node from its current worklist if it moves to another, so it may + // be in multiple worklists at once; the node's `phase` says which worklist it is really in. + while (true) { + if (!simplify_worklist_.empty()) { + // Prune low-degree nodes. + // TODO: pop_back() should work as well, but it didn't; we get a + // failed check while pruning. We should look into this. + InterferenceNode* node = simplify_worklist_.front(); + simplify_worklist_.pop_front(); + DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list"; + DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree"; + DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related"; + PruneNode(node); + } else if (!coalesce_worklist_.empty()) { + // Coalesce. + CoalesceOpportunity* opportunity = coalesce_worklist_.top(); + coalesce_worklist_.pop(); + if (opportunity->stage == CoalesceStage::kWorklist) { + Coalesce(opportunity); + } + } else if (!freeze_worklist_.empty()) { + // Freeze moves and prune a low-degree move-related node. + InterferenceNode* node = freeze_worklist_.front(); + freeze_worklist_.pop_front(); + if (node->stage == NodeStage::kFreezeWorklist) { + DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree"; + DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related"; + FreezeMoves(node); + PruneNode(node); + } + } else if (!spill_worklist_.empty()) { + // We spill the lowest-priority node, because pruning a node earlier + // gives it a higher chance of being spilled. + InterferenceNode* node = spill_worklist_.top(); + spill_worklist_.pop(); + if (node->stage == NodeStage::kSpillWorklist) { + DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree"; + FreezeMoves(node); + PruneNode(node); + } + } else { + // Pruning complete. + break; + } + } + DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size()); +} + +void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) { + for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { + if (opportunity->stage == CoalesceStage::kActive) { + opportunity->stage = CoalesceStage::kWorklist; + coalesce_worklist_.push(opportunity); + } + } +} + +void ColoringIteration::PruneNode(InterferenceNode* node) { + DCHECK_NE(node->stage, NodeStage::kPruned); + DCHECK(!node->IsPrecolored()); + node->stage = NodeStage::kPruned; + pruned_nodes_.push(node); + + for (InterferenceNode* adj : node->GetAdjacentNodes()) { + DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes"; + + if (adj->IsPrecolored()) { + // No effect on pre-colored nodes; they're never pruned. + } else { + // Remove the interference. + bool was_high_degree = IsHighDegreeNode(adj, num_regs_); + DCHECK(adj->ContainsInterference(node)) + << "Missing reflexive interference from non-fixed node"; + adj->RemoveInterference(node); + + // Handle transitions from high degree to low degree. + if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) { + EnableCoalesceOpportunities(adj); + for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) { + EnableCoalesceOpportunities(adj_adj); + } + + DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist); + if (adj->IsMoveRelated()) { + adj->stage = NodeStage::kFreezeWorklist; + freeze_worklist_.push_back(adj); + } else { + adj->stage = NodeStage::kSimplifyWorklist; + simplify_worklist_.push_back(adj); + } + } + } + } +} + +void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) { + if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) { + DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist); + node->stage = NodeStage::kSimplifyWorklist; + simplify_worklist_.push_back(node); + } +} + +void ColoringIteration::FreezeMoves(InterferenceNode* node) { + for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { + if (opportunity->stage == CoalesceStage::kDefunct) { + // Constrained moves should remain constrained, since they will not be considered + // during last-chance coalescing. + } else { + opportunity->stage = CoalesceStage::kInactive; + } + InterferenceNode* other = opportunity->node_a->GetAlias() == node + ? opportunity->node_b->GetAlias() + : opportunity->node_a->GetAlias(); + if (other != node && other->stage == NodeStage::kFreezeWorklist) { + DCHECK(IsLowDegreeNode(node, num_regs_)); + CheckTransitionFromFreezeWorklist(other); + } + } +} + +bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from, + InterferenceNode* into) { + if (!into->IsPrecolored()) { + // The uncolored heuristic will cover this case. + return false; + } + if (from->IsPair() || into->IsPair()) { + // TODO: Merging from a pair node is currently not supported, since fixed pair nodes + // are currently represented as two single fixed nodes in the graph, and `into` is + // only one of them. (We may lose the implicit connections to the second one in a merge.) + return false; + } + + // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`. + // Reasons an adjacent node `adj` can be "ok": + // (1) If `adj` is low degree, interference with `into` will not affect its existing + // colorable guarantee. (Notice that coalescing cannot increase its degree.) + // (2) If `adj` is pre-colored, it already interferes with `into`. See (3). + // (3) If there's already an interference with `into`, coalescing will not add interferences. + for (InterferenceNode* adj : from->GetAdjacentNodes()) { + if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) { + // Ok. + } else { + return false; + } + } + return true; +} + +bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from, + InterferenceNode* into) { + if (into->IsPrecolored()) { + // The pre-colored heuristic will handle this case. + return false; + } + + // Arbitrary cap to improve compile time. Tests show that this has negligible affect + // on generated code. + if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) { + return false; + } + + // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors + // of high degree. (Low degree neighbors can be ignored, because they will eventually be + // pruned from the interference graph in the simplify stage.) + size_t high_degree_interferences = 0; + for (InterferenceNode* adj : from->GetAdjacentNodes()) { + if (IsHighDegreeNode(adj, num_regs_)) { + high_degree_interferences += from->EdgeWeightWith(adj); + } + } + for (InterferenceNode* adj : into->GetAdjacentNodes()) { + if (IsHighDegreeNode(adj, num_regs_)) { + if (from->ContainsInterference(adj)) { + // We've already counted this adjacent node. + // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that + // we should not have counted it at all. (This extends the textbook Briggs coalescing test, + // but remains conservative.) + if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) { + high_degree_interferences -= from->EdgeWeightWith(adj); + } + } else { + high_degree_interferences += into->EdgeWeightWith(adj); + } + } + } + + return high_degree_interferences < num_regs_; +} + +void ColoringIteration::Combine(InterferenceNode* from, + InterferenceNode* into) { + from->SetAlias(into); + + // Add interferences. + for (InterferenceNode* adj : from->GetAdjacentNodes()) { + bool was_low_degree = IsLowDegreeNode(adj, num_regs_); + AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false); + if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) { + // This is a (temporary) transition to a high degree node. Its degree will decrease again + // when we prune `from`, but it's best to be consistent about the current worklist. + adj->stage = NodeStage::kSpillWorklist; + spill_worklist_.push(adj); + } + } + + // Add coalesce opportunities. + for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) { + if (opportunity->stage != CoalesceStage::kDefunct) { + into->AddCoalesceOpportunity(opportunity); + } + } + EnableCoalesceOpportunities(from); + + // Prune and update worklists. + PruneNode(from); + if (IsLowDegreeNode(into, num_regs_)) { + // Coalesce(...) takes care of checking for a transition to the simplify worklist. + DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist); + } else if (into->stage == NodeStage::kFreezeWorklist) { + // This is a transition to a high degree node. + into->stage = NodeStage::kSpillWorklist; + spill_worklist_.push(into); + } else { + DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored); + } +} + +void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) { + InterferenceNode* from = opportunity->node_a->GetAlias(); + InterferenceNode* into = opportunity->node_b->GetAlias(); + DCHECK_NE(from->stage, NodeStage::kPruned); + DCHECK_NE(into->stage, NodeStage::kPruned); + + if (from->IsPrecolored()) { + // If we have one pre-colored node, make sure it's the `into` node. + std::swap(from, into); + } + + if (from == into) { + // These nodes have already been coalesced. + opportunity->stage = CoalesceStage::kDefunct; + CheckTransitionFromFreezeWorklist(from); + } else if (from->IsPrecolored() || from->ContainsInterference(into)) { + // These nodes interfere. + opportunity->stage = CoalesceStage::kDefunct; + CheckTransitionFromFreezeWorklist(from); + CheckTransitionFromFreezeWorklist(into); + } else if (PrecoloredHeuristic(from, into) + || UncoloredHeuristic(from, into)) { + // We can coalesce these nodes. + opportunity->stage = CoalesceStage::kDefunct; + Combine(from, into); + CheckTransitionFromFreezeWorklist(into); + } else { + // We cannot coalesce, but we may be able to later. + opportunity->stage = CoalesceStage::kActive; + } +} + +// Build a mask with a bit set for each register assigned to some +// interval in `intervals`. +template <typename Container> +static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) { + std::bitset<kMaxNumRegs> conflict_mask; + for (InterferenceNode* adjacent : intervals) { + LiveInterval* conflicting = adjacent->GetInterval(); + if (conflicting->HasRegister()) { + conflict_mask.set(conflicting->GetRegister()); + if (conflicting->HasHighInterval()) { + DCHECK(conflicting->GetHighInterval()->HasRegister()); + conflict_mask.set(conflicting->GetHighInterval()->GetRegister()); + } + } else { + DCHECK(!conflicting->HasHighInterval() + || !conflicting->GetHighInterval()->HasRegister()); + } + } + return conflict_mask; +} + +bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) { + return processing_core_regs + ? !codegen_->IsCoreCalleeSaveRegister(reg) + : !codegen_->IsCoreCalleeSaveRegister(reg); +} + +static bool RegisterIsAligned(size_t reg) { + return reg % 2 == 0; +} + +static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) { + // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit. + // Note that CTZ is undefined if all bits are 0, so we special-case it. + return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong()); +} + +bool ColoringIteration::ColorInterferenceGraph() { + DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small"; + ArenaVector<LiveInterval*> colored_intervals( + allocator_->Adapter(kArenaAllocRegisterAllocator)); + bool successful = true; + + while (!pruned_nodes_.empty()) { + InterferenceNode* node = pruned_nodes_.top(); + pruned_nodes_.pop(); + LiveInterval* interval = node->GetInterval(); + size_t reg = 0; + + InterferenceNode* alias = node->GetAlias(); + if (alias != node) { + // This node was coalesced with another. + LiveInterval* alias_interval = alias->GetInterval(); + if (alias_interval->HasRegister()) { + reg = alias_interval->GetRegister(); + DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg]) + << "This node conflicts with the register it was coalesced with"; + } else { + DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " " + << "Move coalescing was not conservative, causing a node to be coalesced " + << "with another node that could not be colored"; + if (interval->RequiresRegister()) { + successful = false; + } + } + } else { + // Search for free register(s). + std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes()); + if (interval->HasHighInterval()) { + // Note that the graph coloring allocator assumes that pair intervals are aligned here, + // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we + // change the alignment requirements here, we will have to update the algorithm (e.g., + // be more conservative about the weight of edges adjacent to pair nodes.) + while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) { + reg += 2; + } + + // Try to use a caller-save register first. + for (size_t i = 0; i < num_regs_ - 1; i += 2) { + bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_); + bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_); + if (!conflict_mask[i] && !conflict_mask[i + 1]) { + if (low_caller_save && high_caller_save) { + reg = i; + break; + } else if (low_caller_save || high_caller_save) { + reg = i; + // Keep looking to try to get both parts in caller-save registers. + } + } + } + } else { + // Not a pair interval. + reg = FindFirstZeroInConflictMask(conflict_mask); + + // Try to use caller-save registers first. + for (size_t i = 0; i < num_regs_; ++i) { + if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) { + reg = i; + break; + } + } + } + + // Last-chance coalescing. + for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { + if (opportunity->stage == CoalesceStage::kDefunct) { + continue; + } + LiveInterval* other_interval = opportunity->node_a->GetAlias() == node + ? opportunity->node_b->GetAlias()->GetInterval() + : opportunity->node_a->GetAlias()->GetInterval(); + if (other_interval->HasRegister()) { + size_t coalesce_register = other_interval->GetRegister(); + if (interval->HasHighInterval()) { + if (!conflict_mask[coalesce_register] && + !conflict_mask[coalesce_register + 1] && + RegisterIsAligned(coalesce_register)) { + reg = coalesce_register; + break; + } + } else if (!conflict_mask[coalesce_register]) { + reg = coalesce_register; + break; + } + } + } + } + + if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) { + // Assign register. + DCHECK(!interval->HasRegister()); + interval->SetRegister(reg); + colored_intervals.push_back(interval); + if (interval->HasHighInterval()) { + DCHECK(!interval->GetHighInterval()->HasRegister()); + interval->GetHighInterval()->SetRegister(reg + 1); + colored_intervals.push_back(interval->GetHighInterval()); + } + } else if (interval->RequiresRegister()) { + // The interference graph is too dense to color. Make it sparser by + // splitting this live interval. + successful = false; + register_allocator_->SplitAtRegisterUses(interval); + // We continue coloring, because there may be additional intervals that cannot + // be colored, and that we should split. + } else { + // Spill. + node->SetNeedsSpillSlot(); + } + } + + // If unsuccessful, reset all register assignments. + if (!successful) { + for (LiveInterval* interval : colored_intervals) { + interval->ClearRegister(); + } + } + + return successful; +} + +void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) { + // The register allocation resolver will organize the stack based on value type, + // so we assign stack slots for each value type separately. + ArenaVector<LiveInterval*> double_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator)); + ArenaVector<LiveInterval*> long_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator)); + ArenaVector<LiveInterval*> float_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator)); + ArenaVector<LiveInterval*> int_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator)); + + // The set of parent intervals already handled. + ArenaSet<LiveInterval*> seen(allocator_->Adapter(kArenaAllocRegisterAllocator)); + + // Find nodes that need spill slots. + for (InterferenceNode* node : nodes) { + if (!node->NeedsSpillSlot()) { + continue; + } + + LiveInterval* parent = node->GetInterval()->GetParent(); + if (seen.find(parent) != seen.end()) { + // We've already handled this interval. + // This can happen if multiple siblings of the same interval request a stack slot. + continue; + } + seen.insert(parent); + + HInstruction* defined_by = parent->GetDefinedBy(); + if (parent->HasSpillSlot()) { + // We already have a spill slot for this value that we can reuse. + } else if (defined_by->IsParameterValue()) { + // Parameters already have a stack slot. + parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue())); + } else if (defined_by->IsCurrentMethod()) { + // The current method is always at stack slot 0. + parent->SetSpillSlot(0); + } else if (defined_by->IsConstant()) { + // Constants don't need a spill slot. + } else { + // We need to find a spill slot for this interval. Place it in the correct + // worklist to be processed later. + switch (node->GetInterval()->GetType()) { + case Primitive::kPrimDouble: + double_intervals.push_back(parent); + break; + case Primitive::kPrimLong: + long_intervals.push_back(parent); + break; + case Primitive::kPrimFloat: + float_intervals.push_back(parent); + break; + case Primitive::kPrimNot: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + case Primitive::kPrimShort: + int_intervals.push_back(parent); + break; + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType(); + UNREACHABLE(); + } + } + } + + // Color spill slots for each value type. + ColorSpillSlots(&double_intervals, &num_double_spill_slots_); + ColorSpillSlots(&long_intervals, &num_long_spill_slots_); + ColorSpillSlots(&float_intervals, &num_float_spill_slots_); + ColorSpillSlots(&int_intervals, &num_int_spill_slots_); +} + +void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* intervals, + size_t* num_stack_slots_used) { + // We cannot use the original interference graph here because spill slots are assigned to + // all of the siblings of an interval, whereas an interference node represents only a single + // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints + // by position, and assigning the lowest spill slot available when we encounter an interval + // beginning. We ignore lifetime holes for simplicity. + ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints( + allocator_->Adapter(kArenaAllocRegisterAllocator)); + + for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) { + LiveInterval* parent_interval = *it; + DCHECK(parent_interval->IsParent()); + DCHECK(!parent_interval->HasSpillSlot()); + size_t start = parent_interval->GetStart(); + size_t end = parent_interval->GetLastSibling()->GetEnd(); + DCHECK_LT(start, end); + interval_endpoints.push_back(std::make_tuple(start, true, parent_interval)); + interval_endpoints.push_back(std::make_tuple(end, false, parent_interval)); + } + + // Sort by position. + // We explicitly ignore the third entry of each tuple (the interval pointer) in order + // to maintain determinism. + std::sort(interval_endpoints.begin(), interval_endpoints.end(), + [] (const std::tuple<size_t, bool, LiveInterval*>& lhs, + const std::tuple<size_t, bool, LiveInterval*>& rhs) { + return std::tie(std::get<0>(lhs), std::get<1>(lhs)) + < std::tie(std::get<0>(rhs), std::get<1>(rhs)); + }); + + ArenaBitVector taken(allocator_, 0, true); + for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) { + // Extract information from the current tuple. + LiveInterval* parent_interval; + bool is_interval_beginning; + size_t position; + std::tie(position, is_interval_beginning, parent_interval) = *it; + + bool needs_two_slots = parent_interval->NeedsTwoSpillSlots(); + + if (is_interval_beginning) { + DCHECK(!parent_interval->HasSpillSlot()); + DCHECK_EQ(position, parent_interval->GetStart()); + + // Find a free stack slot. + size_t slot = 0; + for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) { + // Skip taken slots. + } + parent_interval->SetSpillSlot(slot); + + *num_stack_slots_used = std::max(*num_stack_slots_used, + needs_two_slots ? slot + 1 : slot + 2); + if (needs_two_slots && *num_stack_slots_used % 2 != 0) { + // The parallel move resolver requires that there be an even number of spill slots + // allocated for pair value types. + ++(*num_stack_slots_used); + } + + taken.SetBit(slot); + if (needs_two_slots) { + taken.SetBit(slot + 1); + } + } else { + DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd()); + DCHECK(parent_interval->HasSpillSlot()); + + // Free up the stack slot used by this interval. + size_t slot = parent_interval->GetSpillSlot(); + DCHECK(taken.IsBitSet(slot)); + DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1)); + taken.ClearBit(slot); + if (needs_two_slots) { + taken.ClearBit(slot + 1); + } + } + } + DCHECK_EQ(taken.NumSetBits(), 0u); +} + +} // namespace art diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h new file mode 100644 index 0000000000..548687f784 --- /dev/null +++ b/compiler/optimizing/register_allocator_graph_color.h @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ +#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ + +#include "arch/instruction_set.h" +#include "base/arena_containers.h" +#include "base/arena_object.h" +#include "base/macros.h" +#include "primitive.h" +#include "register_allocator.h" + +namespace art { + +class CodeGenerator; +class HBasicBlock; +class HGraph; +class HInstruction; +class HParallelMove; +class Location; +class SsaLivenessAnalysis; +class InterferenceNode; +struct CoalesceOpportunity; +enum class CoalesceKind; + +/** + * A graph coloring register allocator. + * + * The algorithm proceeds as follows: + * (1) Build an interference graph, where nodes represent live intervals, and edges represent + * interferences between two intervals. Coloring this graph with k colors is isomorphic to + * finding a valid register assignment with k registers. + * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are + * guaranteed a color. (No matter how we color their adjacent nodes, we can give them a + * different color.) As we prune nodes from the graph, more nodes may drop below degree k, + * enabling further pruning. The key is to maintain the pruning order in a stack, so that we + * can color the nodes in the reverse order. + * When there are no more nodes with degree less than k, we start pruning alternate nodes based + * on heuristics. Since these nodes are not guaranteed a color, we are careful to + * prioritize nodes that require a register. We also prioritize short intervals, because + * short intervals cannot be split very much if coloring fails (see below). "Prioritizing" + * a node amounts to pruning it later, since it will have fewer interferences if we prune other + * nodes first. + * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign + * a node a color, we do one of two things: + * - If the node requires a register, we consider the current coloring attempt a failure. + * However, we split the node's live interval in order to make the interference graph + * sparser, so that future coloring attempts may succeed. + * - If the node does not require a register, we simply assign it a location on the stack. + * + * If iterative move coalescing is enabled, the algorithm also attempts to conservatively + * combine nodes in the graph that would prefer to have the same color. (For example, the output + * of a phi instruction would prefer to have the same register as at least one of its inputs.) + * There are several additional steps involved with this: + * - We look for coalesce opportunities by examining each live interval, a step similar to that + * used by linear scan when looking for register hints. + * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist + * of low degree nodes that have associated coalesce opportunities. Only when we run out of + * coalesce opportunities do we start pruning coalesce-associated nodes. + * - When pruning a node, if any nodes transition from high degree to low degree, we add + * associated coalesce opportunities to the worklist, since these opportunities may now succeed. + * - Whether two nodes can be combined is decided by two different heuristics--one used when + * coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node. + * It is vital that we only combine two nodes if the node that remains is guaranteed to receive + * a color. This is because additionally spilling is more costly than failing to coalesce. + * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around + * to be used as last-chance register hints when coloring. If nothing else, we try to use + * caller-save registers before callee-save registers. + * + * A good reference for graph coloring register allocation is + * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition). + */ +class RegisterAllocatorGraphColor : public RegisterAllocator { + public: + RegisterAllocatorGraphColor(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& analysis, + bool iterative_move_coalescing = true); + ~RegisterAllocatorGraphColor() OVERRIDE {} + + void AllocateRegisters() OVERRIDE; + + bool Validate(bool log_fatal_on_failure); + + private: + // Collect all intervals and prepare for register allocation. + void ProcessInstructions(); + void ProcessInstruction(HInstruction* instruction); + + // If any inputs require specific registers, block those registers + // at the position of this instruction. + void CheckForFixedInputs(HInstruction* instruction); + + // If the output of an instruction requires a specific register, split + // the interval and assign the register to the first part. + void CheckForFixedOutput(HInstruction* instruction); + + // Add all applicable safepoints to a live interval. + // Currently depends on instruction processing order. + void AddSafepointsFor(HInstruction* instruction); + + // Collect all live intervals associated with the temporary locations + // needed by an instruction. + void CheckForTempLiveIntervals(HInstruction* instruction); + + // If a safe point is needed, add a synthesized interval to later record + // the number of live registers at this point. + void CheckForSafepoint(HInstruction* instruction); + + // Split an interval, but only if `position` is inside of `interval`. + // Return either the new interval, or the original interval if not split. + static LiveInterval* TrySplit(LiveInterval* interval, size_t position); + + // To ensure every graph can be colored, split live intervals + // at their register defs and uses. This creates short intervals with low + // degree in the interference graph, which are prioritized during graph + // coloring. + void SplitAtRegisterUses(LiveInterval* interval); + + // If the given instruction is a catch phi, give it a spill slot. + void AllocateSpillSlotForCatchPhi(HInstruction* instruction); + + // Ensure that the given register cannot be allocated for a given range. + void BlockRegister(Location location, size_t start, size_t end); + void BlockRegisters(size_t start, size_t end, bool caller_save_only = false); + + bool IsCallerSave(size_t reg, bool processing_core_regs); + + // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not + // assigned the same stack slot. + void ColorSpillSlots(ArenaVector<LiveInterval*>* nodes, + size_t* num_stack_slots_used); + + // Provide stack slots to nodes that need them. + void AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes); + + // Whether iterative move coalescing should be performed. Iterative move coalescing + // improves code quality, but increases compile time. + const bool iterative_move_coalescing_; + + // Live intervals, split by kind (core and floating point). + // These should not contain high intervals, as those are represented by + // the corresponding low interval throughout register allocation. + ArenaVector<LiveInterval*> core_intervals_; + ArenaVector<LiveInterval*> fp_intervals_; + + // Intervals for temporaries, saved for special handling in the resolution phase. + ArenaVector<LiveInterval*> temp_intervals_; + + // Safepoints, saved for special handling while processing instructions. + ArenaVector<HInstruction*> safepoints_; + + // Interference nodes representing specific registers. These are "pre-colored" nodes + // in the interference graph. + ArenaVector<InterferenceNode*> physical_core_nodes_; + ArenaVector<InterferenceNode*> physical_fp_nodes_; + + // Allocated stack slot counters. + size_t num_int_spill_slots_; + size_t num_double_spill_slots_; + size_t num_float_spill_slots_; + size_t num_long_spill_slots_; + size_t catch_phi_spill_slot_counter_; + + // Number of stack slots needed for the pointer to the current method. + // This is 1 for 32-bit architectures, and 2 for 64-bit architectures. + const size_t reserved_art_method_slots_; + + // Number of stack slots needed for outgoing arguments. + const size_t reserved_out_slots_; + + friend class ColoringIteration; + + DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 768ed2d26a..1a391ce9bb 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -22,6 +22,7 @@ #include "base/bit_vector-inl.h" #include "base/enums.h" #include "code_generator.h" +#include "linear_order.h" #include "register_allocation_resolver.h" #include "ssa_liveness_analysis.h" @@ -63,9 +64,7 @@ RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocat registers_array_(nullptr), blocked_core_registers_(codegen->GetBlockedCoreRegisters()), blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()), - reserved_out_slots_(0), - maximum_number_of_live_core_registers_(0), - maximum_number_of_live_fp_registers_(0) { + reserved_out_slots_(0) { temp_intervals_.reserve(4); int_spill_slots_.reserve(kDefaultNumberOfSpillSlots); long_spill_slots_.reserve(kDefaultNumberOfSpillSlots); @@ -92,8 +91,7 @@ static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval void RegisterAllocatorLinearScan::AllocateRegisters() { AllocateRegistersInternal(); RegisterAllocationResolver(allocator_, codegen_, liveness_) - .Resolve(maximum_number_of_live_core_registers_, - maximum_number_of_live_fp_registers_, + .Resolve(ArrayRef<HInstruction* const>(safepoints_), reserved_out_slots_, int_spill_slots_.size(), long_spill_slots_.size(), @@ -111,8 +109,7 @@ void RegisterAllocatorLinearScan::AllocateRegisters() { // Since only parallel moves have been inserted during the register allocation, // these checks are mostly for making sure these moves have been added correctly. size_t current_liveness = 0; - for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : codegen_->GetGraph()->GetLinearOrder()) { for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HInstruction* instruction = inst_it.Current(); DCHECK_LE(current_liveness, instruction->GetLifetimePosition()); @@ -166,8 +163,7 @@ void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool void RegisterAllocatorLinearScan::AllocateRegistersInternal() { // Iterate post-order, to ensure the list is sorted, and the last added interval // is the one with the lowest start position. - for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) { for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done(); back_it.Advance()) { ProcessInstruction(back_it.Current()); @@ -283,20 +279,6 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) return; } safepoints_.push_back(instruction); - if (locations->OnlyCallsOnSlowPath()) { - // We add a synthesized range at this position to record the live registers - // at this position. Ideally, we could just update the safepoints when locations - // are updated, but we currently need to know the full stack size before updating - // locations (because of parameters and the fact that we don't have a frame pointer). - // And knowing the full stack size requires to know the maximum number of live - // registers at calls in slow paths. - // By adding the following interval in the algorithm, we can compute this - // maximum before updating locations. - LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); - interval->AddRange(position, position + 1); - AddSorted(&unhandled_core_intervals_, interval); - AddSorted(&unhandled_fp_intervals_, interval); - } } if (locations->WillCall()) { @@ -569,20 +551,6 @@ void RegisterAllocatorLinearScan::LinearScan() { }); inactive_.erase(inactive_kept_end, inactive_to_handle_end); - if (current->IsSlowPathSafepoint()) { - // Synthesized interval to record the maximum number of live registers - // at safepoints. No need to allocate a register for it. - if (processing_core_registers_) { - maximum_number_of_live_core_registers_ = - std::max(maximum_number_of_live_core_registers_, active_.size()); - } else { - maximum_number_of_live_fp_registers_ = - std::max(maximum_number_of_live_fp_registers_, active_.size()); - } - DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart()); - continue; - } - if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) { DCHECK(!current->HasRegister()); // Allocating the low part was unsucessful. The splitted interval for the high part @@ -685,7 +653,7 @@ bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) { // the next intersection with `current`. for (LiveInterval* inactive : inactive_) { // Temp/Slow-path-safepoint interval has no holes. - DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); + DCHECK(!inactive->IsTemp()); if (!current->IsSplit() && !inactive->IsFixed()) { // Neither current nor inactive are fixed. // Thanks to SSA, a non-split interval starting in a hole of an @@ -933,7 +901,7 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) { // start of current. for (LiveInterval* inactive : inactive_) { // Temp/Slow-path-safepoint interval has no holes. - DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); + DCHECK(!inactive->IsTemp()); if (!current->IsSplit() && !inactive->IsFixed()) { // Neither current nor inactive are fixed. // Thanks to SSA, a non-split interval starting in a hole of an @@ -1085,12 +1053,6 @@ void RegisterAllocatorLinearScan::AddSorted(ArenaVector<LiveInterval*>* array, L if (current->StartsAfter(interval) && !current->IsHighInterval()) { insert_at = i; break; - } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) { - // Ensure the slow path interval is the last to be processed at its location: we want the - // interval to know all live registers at this location. - DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current)); - insert_at = i; - break; } } diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h index b6e4f92e42..b3834f45e4 100644 --- a/compiler/optimizing/register_allocator_linear_scan.h +++ b/compiler/optimizing/register_allocator_linear_scan.h @@ -43,6 +43,7 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { RegisterAllocatorLinearScan(ArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& analysis); + ~RegisterAllocatorLinearScan() OVERRIDE {} void AllocateRegisters() OVERRIDE; @@ -170,12 +171,6 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { // Slots reserved for out arguments. size_t reserved_out_slots_; - // The maximum live core registers at safepoints. - size_t maximum_number_of_live_core_registers_; - - // The maximum live FP registers at safepoints. - size_t maximum_number_of_live_fp_registers_; - ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil); ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index cbb7b2f1c5..55ea99e592 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -31,12 +31,29 @@ namespace art { +using Strategy = RegisterAllocator::Strategy; + // Note: the register allocator tests rely on the fact that constants have live // intervals and registers get allocated to them. -class RegisterAllocatorTest : public CommonCompilerTest {}; +class RegisterAllocatorTest : public CommonCompilerTest { + protected: + // These functions need to access private variables of LocationSummary, so we declare it + // as a member of RegisterAllocatorTest, which we make a friend class. + static void SameAsFirstInputHint(Strategy strategy); + static void ExpectedInRegisterHint(Strategy strategy); +}; + +// This macro should include all register allocation strategies that should be tested. +#define TEST_ALL_STRATEGIES(test_name)\ +TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\ + test_name(Strategy::kRegisterAllocatorLinearScan);\ +}\ +TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\ + test_name(Strategy::kRegisterAllocatorGraphColor);\ +} -static bool Check(const uint16_t* data) { +static bool Check(const uint16_t* data, Strategy strategy) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = CreateCFG(&allocator, data); @@ -45,7 +62,8 @@ static bool Check(const uint16_t* data) { x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); - RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); return register_allocator->Validate(false); } @@ -143,7 +161,7 @@ TEST_F(RegisterAllocatorTest, ValidateIntervals) { } } -TEST_F(RegisterAllocatorTest, CFG1) { +static void CFG1(Strategy strategy) { /* * Test the following snippet: * return 0; @@ -160,10 +178,12 @@ TEST_F(RegisterAllocatorTest, CFG1) { Instruction::CONST_4 | 0 | 0, Instruction::RETURN); - ASSERT_TRUE(Check(data)); + ASSERT_TRUE(Check(data, strategy)); } -TEST_F(RegisterAllocatorTest, Loop1) { +TEST_ALL_STRATEGIES(CFG1); + +static void Loop1(Strategy strategy) { /* * Test the following snippet: * int a = 0; @@ -199,10 +219,12 @@ TEST_F(RegisterAllocatorTest, Loop1) { Instruction::CONST_4 | 5 << 12 | 1 << 8, Instruction::RETURN | 1 << 8); - ASSERT_TRUE(Check(data)); + ASSERT_TRUE(Check(data, strategy)); } -TEST_F(RegisterAllocatorTest, Loop2) { +TEST_ALL_STRATEGIES(Loop1); + +static void Loop2(Strategy strategy) { /* * Test the following snippet: * int a = 0; @@ -248,10 +270,12 @@ TEST_F(RegisterAllocatorTest, Loop2) { Instruction::ADD_INT, 1 << 8 | 0, Instruction::RETURN | 1 << 8); - ASSERT_TRUE(Check(data)); + ASSERT_TRUE(Check(data, strategy)); } -TEST_F(RegisterAllocatorTest, Loop3) { +TEST_ALL_STRATEGIES(Loop2); + +static void Loop3(Strategy strategy) { /* * Test the following snippet: * int a = 0 @@ -296,7 +320,8 @@ TEST_F(RegisterAllocatorTest, Loop3) { x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); - RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_TRUE(register_allocator->Validate(false)); @@ -314,6 +339,8 @@ TEST_F(RegisterAllocatorTest, Loop3) { ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister()); } +TEST_ALL_STRATEGIES(Loop3); + TEST_F(RegisterAllocatorTest, FirstRegisterUse) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -354,7 +381,7 @@ TEST_F(RegisterAllocatorTest, FirstRegisterUse) { ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition()); } -TEST_F(RegisterAllocatorTest, DeadPhi) { +static void DeadPhi(Strategy strategy) { /* Test for a dead loop phi taking as back-edge input a phi that also has * this loop phi as input. Walking backwards in SsaDeadPhiElimination * does not solve the problem because the loop phi will be visited last. @@ -385,15 +412,19 @@ TEST_F(RegisterAllocatorTest, DeadPhi) { x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); - RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_TRUE(register_allocator->Validate(false)); } +TEST_ALL_STRATEGIES(DeadPhi); + /** * Test that the TryAllocateFreeReg method works in the presence of inactive intervals * that share the same register. It should split the interval it is currently * allocating for at the minimum lifetime position between the two inactive intervals. + * This test only applies to the linear scan allocator. */ TEST_F(RegisterAllocatorTest, FreeUntil) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( @@ -507,15 +538,15 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, graph->GetDexFile(), dex_cache, 0); -*input2 = new (allocator) HInstanceFieldGet(parameter, - Primitive::kPrimInt, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - dex_cache, - 0); + *input2 = new (allocator) HInstanceFieldGet(parameter, + Primitive::kPrimInt, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + dex_cache, + 0); then->AddInstruction(*input1); else_->AddInstruction(*input2); join->AddInstruction(new (allocator) HExit()); @@ -527,7 +558,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, return graph; } -TEST_F(RegisterAllocatorTest, PhiHint) { +static void PhiHint(Strategy strategy) { ArenaPool pool; ArenaAllocator allocator(&pool); HPhi *phi; @@ -543,7 +574,7 @@ TEST_F(RegisterAllocatorTest, PhiHint) { // Check that the register allocator is deterministic. RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0); @@ -563,7 +594,7 @@ TEST_F(RegisterAllocatorTest, PhiHint) { // the same register. phi->GetLocations()->UpdateOut(Location::RegisterLocation(2)); RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2); @@ -583,7 +614,7 @@ TEST_F(RegisterAllocatorTest, PhiHint) { // the same register. input1->GetLocations()->UpdateOut(Location::RegisterLocation(2)); RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2); @@ -603,7 +634,7 @@ TEST_F(RegisterAllocatorTest, PhiHint) { // the same register. input2->GetLocations()->UpdateOut(Location::RegisterLocation(2)); RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2); @@ -612,6 +643,12 @@ TEST_F(RegisterAllocatorTest, PhiHint) { } } +// TODO: Enable this test for graph coloring register allocation when iterative move +// coalescing is merged. +TEST_F(RegisterAllocatorTest, PhiHint_LinearScan) { + PhiHint(Strategy::kRegisterAllocatorLinearScan); +} + static HGraph* BuildFieldReturn(ArenaAllocator* allocator, HInstruction** field, HInstruction** ret) { @@ -650,7 +687,7 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator, return graph; } -TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) { +void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) { ArenaPool pool; ArenaAllocator allocator(&pool); HInstruction *field, *ret; @@ -664,7 +701,7 @@ TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) { liveness.Analyze(); RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); // Sanity check that in normal conditions, the register should be hinted to 0 (EAX). @@ -684,13 +721,19 @@ TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) { ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2); RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2); } } +// TODO: Enable this test for graph coloring register allocation when iterative move +// coalescing is merged. +TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint_LinearScan) { + ExpectedInRegisterHint(Strategy::kRegisterAllocatorLinearScan); +} + static HGraph* BuildTwoSubs(ArenaAllocator* allocator, HInstruction** first_sub, HInstruction** second_sub) { @@ -720,7 +763,7 @@ static HGraph* BuildTwoSubs(ArenaAllocator* allocator, return graph; } -TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) { +void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) { ArenaPool pool; ArenaAllocator allocator(&pool); HInstruction *first_sub, *second_sub; @@ -734,7 +777,7 @@ TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) { liveness.Analyze(); RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); // Sanity check that in normal conditions, the registers are the same. @@ -757,7 +800,7 @@ TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) { ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput); RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2); @@ -765,6 +808,12 @@ TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) { } } +// TODO: Enable this test for graph coloring register allocation when iterative move +// coalescing is merged. +TEST_F(RegisterAllocatorTest, SameAsFirstInputHint_LinearScan) { + SameAsFirstInputHint(Strategy::kRegisterAllocatorLinearScan); +} + static HGraph* BuildDiv(ArenaAllocator* allocator, HInstruction** div) { HGraph* graph = CreateGraph(allocator); @@ -791,7 +840,7 @@ static HGraph* BuildDiv(ArenaAllocator* allocator, return graph; } -TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { +static void ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) { ArenaPool pool; ArenaAllocator allocator(&pool); HInstruction *div; @@ -805,7 +854,7 @@ TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { liveness.Analyze(); RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness); + RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); register_allocator->AllocateRegisters(); // div on x86 requires its first input in eax and the output be the same as the first input. @@ -813,9 +862,16 @@ TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { } } +// TODO: Enable this test for graph coloring register allocation when iterative move +// coalescing is merged. +TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint_LinearScan) { + ExpectedExactInRegisterAndSameOutputHint(Strategy::kRegisterAllocatorLinearScan); +} + // Test a bug in the register allocator, where allocating a blocked // register would lead to spilling an inactive interval at the wrong // position. +// This test only applies to the linear scan allocator. TEST_F(RegisterAllocatorTest, SpillInactive) { ArenaPool pool; diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index e409035d9d..46d0d0eb65 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -76,8 +76,7 @@ void HSelectGenerator::Run() { // Iterate in post order in the unlikely case that removing one occurrence of // the selection pattern empties a branch block of another occurrence. // Otherwise the order does not matter. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { if (!block->EndsWithIf()) continue; // Find elements of the diamond pattern. diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index b73f73893c..fd1db592bb 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -20,6 +20,7 @@ #include "base/enums.h" #include "class_linker.h" #include "code_generator.h" +#include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "utils/dex_cache_arrays_layout-inl.h" #include "driver/compiler_driver.h" @@ -30,7 +31,7 @@ #include "mirror/string.h" #include "nodes.h" #include "runtime.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" namespace art { @@ -60,44 +61,28 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { return; } - // TODO: Avoid CompilerDriver. - InvokeType original_invoke_type = invoke->GetOriginalInvokeType(); - InvokeType optimized_invoke_type = original_invoke_type; - MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex()); - int vtable_idx; - uintptr_t direct_code, direct_method; - bool success = compiler_driver_->ComputeInvokeInfo( - &compilation_unit_, - invoke->GetDexPc(), - false /* update_stats: already updated in builder */, - true /* enable_devirtualization */, - &optimized_invoke_type, - &target_method, - &vtable_idx, - &direct_code, - &direct_method); - if (!success) { - // TODO: try using kDexCachePcRelative. It's always a valid method load - // kind as long as it's supported by the codegen - return; - } - invoke->SetOptimizedInvokeType(optimized_invoke_type); - invoke->SetTargetMethod(target_method); + HGraph* outer_graph = codegen_->GetGraph(); + ArtMethod* compiling_method = graph_->GetArtMethod(); HInvokeStaticOrDirect::MethodLoadKind method_load_kind; HInvokeStaticOrDirect::CodePtrLocation code_ptr_location; uint64_t method_load_data = 0u; uint64_t direct_code_ptr = 0u; - HGraph* outer_graph = codegen_->GetGraph(); - if (target_method.dex_file == &outer_graph->GetDexFile() && - target_method.dex_method_index == outer_graph->GetMethodIdx()) { + if (invoke->GetResolvedMethod() == outer_graph->GetArtMethod()) { + DCHECK(outer_graph->GetArtMethod() != nullptr); method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf; } else { - bool use_pc_relative_instructions = - ((direct_method == 0u || direct_code == static_cast<uintptr_t>(-1))) && - ContainsElement(compiler_driver_->GetDexFilesForOatFile(), target_method.dex_file); + uintptr_t direct_code, direct_method; + { + ScopedObjectAccess soa(Thread::Current()); + compiler_driver_->GetCodeAndMethodForDirectCall( + (compiling_method == nullptr) ? nullptr : compiling_method->GetDeclaringClass(), + invoke->GetResolvedMethod(), + &direct_code, + &direct_method); + } if (direct_method != 0u) { // Should we use a direct pointer to the method? // Note: For JIT, kDirectAddressWithFixup doesn't make sense at all and while // kDirectAddress would be fine for image methods, we don't support it at the moment. @@ -109,13 +94,12 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup; } } else { // Use dex cache. - DCHECK_EQ(target_method.dex_file, &graph_->GetDexFile()); - if (use_pc_relative_instructions) { // Can we use PC-relative access to the dex cache arrays? - DCHECK(!Runtime::Current()->UseJitCompilation()); + if (!Runtime::Current()->UseJitCompilation()) { + // Use PC-relative access to the dex cache arrays. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative; DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()), &graph_->GetDexFile()); - method_load_data = layout.MethodOffset(target_method.dex_method_index); + method_load_data = layout.MethodOffset(invoke->GetDexMethodIndex()); } else { // We must go through the ArtMethod's pointer to resolved methods. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; } @@ -124,10 +108,11 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { // Note: For JIT, kCallPCRelative and kCallDirectWithFixup don't make sense at all and // while kCallDirect would be fine for image methods, we don't support it at the moment. DCHECK(!Runtime::Current()->UseJitCompilation()); + const DexFile* dex_file_of_callee = invoke->GetTargetMethod().dex_file; if (direct_code != static_cast<uintptr_t>(-1)) { // Is the code pointer known now? code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirect; direct_code_ptr = direct_code; - } else if (use_pc_relative_instructions) { + } else if (ContainsElement(compiler_driver_->GetDexFilesForOatFile(), dex_file_of_callee)) { // Use PC-relative calls for invokes within a multi-dex oat file. code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative; } else { // The direct pointer will be known at link time. @@ -150,31 +135,22 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { method_load_kind, code_ptr_location, method_load_data, direct_code_ptr }; HInvokeStaticOrDirect::DispatchInfo dispatch_info = - codegen_->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, - invoke->GetTargetMethod()); + codegen_->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke); invoke->SetDispatchInfo(dispatch_info); } void HSharpening::ProcessLoadClass(HLoadClass* load_class) { - if (load_class->NeedsAccessCheck()) { - // We need to call the runtime anyway, so we simply get the class as that call's return value. - return; - } - if (load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) { - // Loading from the ArtMethod* is the most efficient retrieval. - // TODO: This may not actually be true for all architectures and - // locations of target classes. The additional register pressure - // for using the ArtMethod* should be considered. - return; - } - - DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod); + DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod || + load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) + << load_class->GetLoadKind(); DCHECK(!load_class->IsInDexCache()) << "HLoadClass should not be optimized before sharpening."; + DCHECK(!load_class->IsInBootImage()) << "HLoadClass should not be optimized before sharpening."; const DexFile& dex_file = load_class->GetDexFile(); uint32_t type_index = load_class->GetTypeIndex(); bool is_in_dex_cache = false; + bool is_in_boot_image = false; HLoadClass::LoadKind desired_load_kind; uint64_t address = 0u; // Class or dex cache element address. { @@ -186,50 +162,46 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) { ? compilation_unit_.GetDexCache() : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); mirror::Class* klass = dex_cache->GetResolvedType(type_index); - - if (compiler_driver_->IsBootImage()) { + if (codegen_->GetCompilerOptions().IsBootImage()) { // Compiling boot image. Check if the class is a boot image class. DCHECK(!runtime->UseJitCompilation()); if (!compiler_driver_->GetSupportBootImageFixup()) { - // MIPS/MIPS64 or compiler_driver_test. Do not sharpen. + // MIPS64 or compiler_driver_test. Do not sharpen. desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + } else if ((klass != nullptr) && compiler_driver_->IsImageClass( + dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) { + is_in_boot_image = true; + is_in_dex_cache = true; + desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic() + ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative + : HLoadClass::LoadKind::kBootImageLinkTimeAddress; } else { - if (klass != nullptr && - compiler_driver_->IsImageClass( - dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) { - is_in_dex_cache = true; - desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic() - ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative - : HLoadClass::LoadKind::kBootImageLinkTimeAddress; - } else { - // Not a boot image class. We must go through the dex cache. - DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)); - desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative; - } - } - } else if (runtime->UseJitCompilation()) { - // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. - // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); - is_in_dex_cache = (klass != nullptr); - if (klass != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(klass)) { - // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 - desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; - address = reinterpret_cast64<uint64_t>(klass); - } else { - // Note: If the class is not in the dex cache or isn't initialized, the - // instruction needs environment and will not be inlined across dex files. - // Within a dex file, the slow-path helper loads the correct class and - // inlined frames are used correctly for OOM stack trace. - // TODO: Write a test for this. Bug: 29416588 - desired_load_kind = HLoadClass::LoadKind::kDexCacheAddress; - void* dex_cache_element_address = &dex_cache->GetResolvedTypes()[type_index]; - address = reinterpret_cast64<uint64_t>(dex_cache_element_address); + // Not a boot image class. We must go through the dex cache. + DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)); + desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative; } } else { - // AOT app compilation. Check if the class is in the boot image. - if ((klass != nullptr) && - runtime->GetHeap()->ObjectIsInBootImageSpace(klass) && - !codegen_->GetCompilerOptions().GetCompilePic()) { + is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass); + if (runtime->UseJitCompilation()) { + // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. + // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); + is_in_dex_cache = (klass != nullptr); + if (is_in_boot_image) { + // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 + desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; + address = reinterpret_cast64<uint64_t>(klass); + } else { + // Note: If the class is not in the dex cache or isn't initialized, the + // instruction needs environment and will not be inlined across dex files. + // Within a dex file, the slow-path helper loads the correct class and + // inlined frames are used correctly for OOM stack trace. + // TODO: Write a test for this. Bug: 29416588 + desired_load_kind = HLoadClass::LoadKind::kDexCacheAddress; + void* dex_cache_element_address = &dex_cache->GetResolvedTypes()[type_index]; + address = reinterpret_cast64<uint64_t>(dex_cache_element_address); + } + // AOT app compilation. Check if the class is in the boot image. + } else if (is_in_boot_image && !codegen_->GetCompilerOptions().GetCompilePic()) { desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(klass); } else { @@ -243,6 +215,24 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) { } } } + + if (is_in_boot_image) { + load_class->MarkInBootImage(); + } + + if (load_class->NeedsAccessCheck()) { + // We need to call the runtime anyway, so we simply get the class as that call's return value. + return; + } + + if (load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) { + // Loading from the ArtMethod* is the most efficient retrieval in code size. + // TODO: This may not actually be true for all architectures and + // locations of target classes. The additional register pressure + // for using the ArtMethod* should be considered. + return; + } + if (is_in_dex_cache) { load_class->MarkInDexCache(); } @@ -279,8 +269,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { const DexFile& dex_file = load_string->GetDexFile(); uint32_t string_index = load_string->GetStringIndex(); - bool is_in_dex_cache = false; - HLoadString::LoadKind desired_load_kind; + HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; uint64_t address = 0u; // String or dex cache element address. { Runtime* runtime = Runtime::Current(); @@ -291,38 +280,27 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { ? compilation_unit_.GetDexCache() : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); - if (compiler_driver_->IsBootImage()) { + if (codegen_->GetCompilerOptions().IsBootImage()) { // Compiling boot image. Resolve the string and allocate it if needed. DCHECK(!runtime->UseJitCompilation()); mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache); CHECK(string != nullptr); - if (!compiler_driver_->GetSupportBootImageFixup()) { - // MIPS/MIPS64 or compiler_driver_test. Do not sharpen. - desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; - } else { + if (compiler_driver_->GetSupportBootImageFixup()) { DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)); - is_in_dex_cache = true; desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic() ? HLoadString::LoadKind::kBootImageLinkTimePcRelative : HLoadString::LoadKind::kBootImageLinkTimeAddress; + } else { + // MIPS64 or compiler_driver_test. Do not sharpen. + DCHECK_EQ(desired_load_kind, HLoadString::LoadKind::kDexCacheViaMethod); } } else if (runtime->UseJitCompilation()) { // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); mirror::String* string = dex_cache->GetResolvedString(string_index); - is_in_dex_cache = (string != nullptr); if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { - // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 desired_load_kind = HLoadString::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(string); - } else { - // Note: If the string is not in the dex cache, the instruction needs environment - // and will not be inlined across dex files. Within a dex file, the slow-path helper - // loads the correct string and inlined frames are used correctly for OOM stack trace. - // TODO: Write a test for this. Bug: 29416588 - desired_load_kind = HLoadString::LoadKind::kDexCacheAddress; - void* dex_cache_element_address = &dex_cache->GetStrings()[string_index]; - address = reinterpret_cast64<uint64_t>(dex_cache_element_address); } } else { // AOT app compilation. Try to lookup the string without allocating if not found. @@ -333,23 +311,16 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { desired_load_kind = HLoadString::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(string); } else { - // Not JIT and either the string is not in boot image or we are compiling in PIC mode. - // Use PC-relative load from the dex cache if the dex file belongs - // to the oat file that we're currently compiling. - desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file) - ? HLoadString::LoadKind::kDexCachePcRelative - : HLoadString::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadString::LoadKind::kBssEntry; } } } - if (is_in_dex_cache) { - load_string->MarkInDexCache(); - } HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind); switch (load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: case HLoadString::LoadKind::kDexCacheViaMethod: load_string->SetLoadKindWithStringReference(load_kind, dex_file, string_index); break; @@ -358,13 +329,6 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { DCHECK_NE(address, 0u); load_string->SetLoadKindWithAddress(load_kind, address); break; - case HLoadString::LoadKind::kDexCachePcRelative: { - PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); - DexCacheArraysLayout layout(pointer_size, &dex_file); - size_t element_index = layout.StringOffset(string_index); - load_string->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index); - break; - } } } diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index 1dc69867b4..6d82e8e06d 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -26,8 +26,7 @@ void SideEffectsAnalysis::Run() { // In DEBUG mode, ensure side effects are properly initialized to empty. if (kIsDebugBuild) { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { SideEffects effects = GetBlockEffects(block); DCHECK(effects.DoesNothing()); if (block->IsLoopHeader()) { @@ -38,9 +37,7 @@ void SideEffectsAnalysis::Run() { } // Do a post order visit to ensure we visit a loop header after its loop body. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - + for (HBasicBlock* block : graph_->GetPostOrder()) { SideEffects effects = SideEffects::None(); // Update `effects` with the side effects of all instructions in this block. for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 5a574d9af7..ae1e369999 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -25,8 +25,8 @@ namespace art { void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. - for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) { - for (HInstructionIterator it(itb.Current()->GetInstructions()); !it.Done(); it.Advance()) { + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* equality_instr = it.Current(); if (!equality_instr->IsEqual() && !equality_instr->IsNotEqual()) { continue; @@ -57,8 +57,8 @@ void SsaBuilder::FixNullConstantType() { void SsaBuilder::EquivalentPhisCleanup() { // The order doesn't matter here. - for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) { - for (HInstructionIterator it(itb.Current()->GetPhis()); !it.Done(); it.Advance()) { + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); HPhi* next = phi->GetNextEquivalentPhiWithSameType(); if (next != nullptr) { @@ -79,8 +79,7 @@ void SsaBuilder::EquivalentPhisCleanup() { } void SsaBuilder::FixEnvironmentPhis() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) { HPhi* phi = it_phis.Current()->AsPhi(); // If the phi is not dead, or has no environment uses, there is nothing to do. @@ -163,18 +162,12 @@ static bool TypePhiFromInputs(HPhi* phi) { // Replace inputs of `phi` to match its type. Return false if conflict is identified. bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) { Primitive::Type common_type = phi->GetType(); - if (common_type == Primitive::kPrimVoid || Primitive::IsIntegralType(common_type)) { - // Phi either contains only other untyped phis (common_type == kPrimVoid), - // or `common_type` is integral and we do not need to retype ambiguous inputs - // because they are always constructed with the integral type candidate. + if (Primitive::IsIntegralType(common_type)) { + // We do not need to retype ambiguous inputs because they are always constructed + // with the integral type candidate. if (kIsDebugBuild) { for (HInstruction* input : phi->GetInputs()) { - if (common_type == Primitive::kPrimVoid) { - DCHECK(input->IsPhi() && input->GetType() == Primitive::kPrimVoid); - } else { - DCHECK((input->IsPhi() && input->GetType() == Primitive::kPrimVoid) || - HPhi::ToPhiType(input->GetType()) == common_type); - } + DCHECK(HPhi::ToPhiType(input->GetType()) == common_type); } } // Inputs did not need to be replaced, hence no conflict. Report success. @@ -234,8 +227,7 @@ bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) { void SsaBuilder::RunPrimitiveTypePropagation() { ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder)); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { if (block->IsLoopHeader()) { for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { HPhi* phi = phi_it.Current()->AsPhi(); @@ -303,7 +295,7 @@ static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { } static Primitive::Type GetPrimitiveArrayComponentType(HInstruction* array) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { ReferenceTypeInfo array_type = array->GetReferenceTypeInfo(); DCHECK(array_type.IsPrimitiveArrayClass()); return array_type.GetTypeHandle()->GetComponentType()->GetPrimitiveType(); diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index d7360adef8..45dac54115 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -49,7 +49,7 @@ class SsaBuilder : public ValueObject { public: SsaBuilder(HGraph* graph, Handle<mirror::DexCache> dex_cache, - StackHandleScopeCollection* handles) + VariableSizedHandleScope* handles) : graph_(graph), dex_cache_(dex_cache), handles_(handles), @@ -116,7 +116,7 @@ class SsaBuilder : public ValueObject { HGraph* graph_; Handle<mirror::DexCache> dex_cache_; - StackHandleScopeCollection* const handles_; + VariableSizedHandleScope* const handles_; // True if types of ambiguous ArrayGets have been resolved. bool agets_fixed_; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 7af4302884..e8e12e1a55 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -18,90 +18,21 @@ #include "base/bit_vector-inl.h" #include "code_generator.h" +#include "linear_order.h" #include "nodes.h" namespace art { void SsaLivenessAnalysis::Analyze() { - LinearizeGraph(); + // Compute the linear order directly in the graph's data structure + // (there are no more following graph mutations). + LinearizeGraph(graph_, graph_->GetArena(), &graph_->linear_order_); + + // Liveness analysis. NumberInstructions(); ComputeLiveness(); } -static bool IsLoop(HLoopInformation* info) { - return info != nullptr; -} - -static bool InSameLoop(HLoopInformation* first_loop, HLoopInformation* second_loop) { - return first_loop == second_loop; -} - -static bool IsInnerLoop(HLoopInformation* outer, HLoopInformation* inner) { - return (inner != outer) - && (inner != nullptr) - && (outer != nullptr) - && inner->IsIn(*outer); -} - -static void AddToListForLinearization(ArenaVector<HBasicBlock*>* worklist, HBasicBlock* block) { - HLoopInformation* block_loop = block->GetLoopInformation(); - auto insert_pos = worklist->rbegin(); // insert_pos.base() will be the actual position. - for (auto end = worklist->rend(); insert_pos != end; ++insert_pos) { - HBasicBlock* current = *insert_pos; - HLoopInformation* current_loop = current->GetLoopInformation(); - if (InSameLoop(block_loop, current_loop) - || !IsLoop(current_loop) - || IsInnerLoop(current_loop, block_loop)) { - // The block can be processed immediately. - break; - } - } - worklist->insert(insert_pos.base(), block); -} - -void SsaLivenessAnalysis::LinearizeGraph() { - // Create a reverse post ordering with the following properties: - // - Blocks in a loop are consecutive, - // - Back-edge is the last block before loop exits. - - // (1): Record the number of forward predecessors for each block. This is to - // ensure the resulting order is reverse post order. We could use the - // current reverse post order in the graph, but it would require making - // order queries to a GrowableArray, which is not the best data structure - // for it. - ArenaVector<uint32_t> forward_predecessors(graph_->GetBlocks().size(), - graph_->GetArena()->Adapter(kArenaAllocSsaLiveness)); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - size_t number_of_forward_predecessors = block->GetPredecessors().size(); - if (block->IsLoopHeader()) { - number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges(); - } - forward_predecessors[block->GetBlockId()] = number_of_forward_predecessors; - } - - // (2): Following a worklist approach, first start with the entry block, and - // iterate over the successors. When all non-back edge predecessors of a - // successor block are visited, the successor block is added in the worklist - // following an order that satisfies the requirements to build our linear graph. - graph_->linear_order_.reserve(graph_->GetReversePostOrder().size()); - ArenaVector<HBasicBlock*> worklist(graph_->GetArena()->Adapter(kArenaAllocSsaLiveness)); - worklist.push_back(graph_->GetEntryBlock()); - do { - HBasicBlock* current = worklist.back(); - worklist.pop_back(); - graph_->linear_order_.push_back(current); - for (HBasicBlock* successor : current->GetSuccessors()) { - int block_id = successor->GetBlockId(); - size_t number_of_remaining_predecessors = forward_predecessors[block_id]; - if (number_of_remaining_predecessors == 1) { - AddToListForLinearization(&worklist, successor); - } - forward_predecessors[block_id] = number_of_remaining_predecessors - 1; - } - } while (!worklist.empty()); -} - void SsaLivenessAnalysis::NumberInstructions() { int ssa_index = 0; size_t lifetime_position = 0; @@ -114,8 +45,7 @@ void SsaLivenessAnalysis::NumberInstructions() { // to differentiate between the start and end of an instruction. Adding 2 to // the lifetime position for each instruction ensures the start of an // instruction is different than the end of the previous instruction. - for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetLinearOrder()) { block->SetLifetimeStart(lifetime_position); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { @@ -157,8 +87,7 @@ void SsaLivenessAnalysis::NumberInstructions() { } void SsaLivenessAnalysis::ComputeLiveness() { - for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetLinearOrder()) { block_infos_[block->GetBlockId()] = new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_); } @@ -210,9 +139,7 @@ static void RecursivelyProcessInputs(HInstruction* current, void SsaLivenessAnalysis::ComputeLiveRanges() { // Do a post order visit, adding inputs of instructions live in the block where // that instruction is defined, and killing instructions that are being visited. - for (HLinearPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - + for (HBasicBlock* block : ReverseRange(graph_->GetLinearOrder())) { BitVector* kill = GetKillSet(*block); BitVector* live_in = GetLiveInSet(*block); @@ -329,15 +256,13 @@ void SsaLivenessAnalysis::ComputeLiveInAndLiveOutSets() { do { changed = false; - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - const HBasicBlock& block = *it.Current(); - + for (const HBasicBlock* block : graph_->GetPostOrder()) { // The live_in set depends on the kill set (which does not // change in this loop), and the live_out set. If the live_out // set does not change, there is no need to update the live_in set. - if (UpdateLiveOut(block) && UpdateLiveIn(block)) { + if (UpdateLiveOut(*block) && UpdateLiveIn(*block)) { if (kIsDebugBuild) { - CheckNoLiveInIrreducibleLoop(block); + CheckNoLiveInIrreducibleLoop(*block); } changed = true; } @@ -368,6 +293,27 @@ bool SsaLivenessAnalysis::UpdateLiveIn(const HBasicBlock& block) { return live_in->UnionIfNotIn(live_out, kill); } +void LiveInterval::DumpWithContext(std::ostream& stream, + const CodeGenerator& codegen) const { + Dump(stream); + if (IsFixed()) { + stream << ", register:" << GetRegister() << "("; + if (IsFloatingPoint()) { + codegen.DumpFloatingPointRegister(stream, GetRegister()); + } else { + codegen.DumpCoreRegister(stream, GetRegister()); + } + stream << ")"; + } else { + stream << ", spill slot:" << GetSpillSlot(); + } + stream << ", requires_register:" << (GetDefinedBy() != nullptr && RequiresRegister()); + if (GetParent()->GetDefinedBy() != nullptr) { + stream << ", defined_by:" << GetParent()->GetDefinedBy()->GetKind(); + stream << "(" << GetParent()->GetDefinedBy()->GetLifetimePosition() << ")"; + } +} + static int RegisterOrLowRegister(Location location) { return location.IsPair() ? location.low() : location.reg(); } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index dc98864d9b..b62bf4e5f9 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -150,9 +150,7 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> { if (GetIsEnvironment()) return false; if (IsSynthesized()) return false; Location location = GetUser()->GetLocations()->InAt(GetInputIndex()); - return location.IsUnallocated() - && (location.GetPolicy() == Location::kRequiresRegister - || location.GetPolicy() == Location::kRequiresFpuRegister); + return location.IsUnallocated() && location.RequiresRegisterKind(); } private: @@ -210,11 +208,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { return new (allocator) LiveInterval(allocator, type, instruction); } - static LiveInterval* MakeSlowPathInterval(ArenaAllocator* allocator, HInstruction* instruction) { - return new (allocator) LiveInterval( - allocator, Primitive::kPrimVoid, instruction, false, kNoRegister, false, true); - } - static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) { return new (allocator) LiveInterval(allocator, type, nullptr, true, reg, false); } @@ -225,7 +218,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { bool IsFixed() const { return is_fixed_; } bool IsTemp() const { return is_temp_; } - bool IsSlowPathSafepoint() const { return is_slow_path_safepoint_; } // This interval is the result of a split. bool IsSplit() const { return parent_ != this; } @@ -481,6 +473,10 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { return last_range_->GetEnd(); } + size_t GetLength() const { + return GetEnd() - GetStart(); + } + size_t FirstRegisterUseAfter(size_t position) const { if (is_temp_) { return position == GetStart() ? position : kNoLifetime; @@ -504,10 +500,18 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { return kNoLifetime; } + // Returns the location of the first register use for this live interval, + // including a register definition if applicable. size_t FirstRegisterUse() const { return FirstRegisterUseAfter(GetStart()); } + // Whether the interval requires a register rather than a stack location. + // If needed for performance, this could be cached. + bool RequiresRegister() const { + return !HasRegister() && FirstRegisterUse() != kNoLifetime; + } + size_t FirstUseAfter(size_t position) const { if (is_temp_) { return position == GetStart() ? position : kNoLifetime; @@ -693,6 +697,10 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { stream << " is_high: " << IsHighInterval(); } + // Same as Dump, but adds context such as the instruction defining this interval, and + // the register currently assigned to this interval. + void DumpWithContext(std::ostream& stream, const CodeGenerator& codegen) const; + LiveInterval* GetNextSibling() const { return next_sibling_; } LiveInterval* GetLastSibling() { LiveInterval* result = this; @@ -776,7 +784,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { DCHECK(!HasHighInterval()); DCHECK(!HasLowInterval()); high_or_low_interval_ = new (allocator_) LiveInterval( - allocator_, type_, defined_by_, false, kNoRegister, is_temp, false, true); + allocator_, type_, defined_by_, false, kNoRegister, is_temp, true); high_or_low_interval_->high_or_low_interval_ = this; if (first_range_ != nullptr) { high_or_low_interval_->first_range_ = first_range_->Dup(allocator_); @@ -871,6 +879,33 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { range_search_start_ = first_range_; } + bool DefinitionRequiresRegister() const { + DCHECK(IsParent()); + LocationSummary* locations = defined_by_->GetLocations(); + Location location = locations->Out(); + // This interval is the first interval of the instruction. If the output + // of the instruction requires a register, we return the position of that instruction + // as the first register use. + if (location.IsUnallocated()) { + if ((location.GetPolicy() == Location::kRequiresRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsRegister() + || locations->InAt(0).IsRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { + return true; + } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsFpuRegister() + || locations->InAt(0).IsFpuRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) { + return true; + } + } else if (location.IsRegister() || location.IsRegisterPair()) { + return true; + } + return false; + } + private: LiveInterval(ArenaAllocator* allocator, Primitive::Type type, @@ -878,7 +913,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { bool is_fixed = false, int reg = kNoRegister, bool is_temp = false, - bool is_slow_path_safepoint = false, bool is_high_interval = false) : allocator_(allocator), first_range_(nullptr), @@ -895,7 +929,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { spill_slot_(kNoSpillSlot), is_fixed_(is_fixed), is_temp_(is_temp), - is_slow_path_safepoint_(is_slow_path_safepoint), is_high_interval_(is_high_interval), high_or_low_interval_(nullptr), defined_by_(defined_by) {} @@ -925,33 +958,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { return range; } - bool DefinitionRequiresRegister() const { - DCHECK(IsParent()); - LocationSummary* locations = defined_by_->GetLocations(); - Location location = locations->Out(); - // This interval is the first interval of the instruction. If the output - // of the instruction requires a register, we return the position of that instruction - // as the first register use. - if (location.IsUnallocated()) { - if ((location.GetPolicy() == Location::kRequiresRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && (locations->InAt(0).IsRegister() - || locations->InAt(0).IsRegisterPair() - || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { - return true; - } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && (locations->InAt(0).IsFpuRegister() - || locations->InAt(0).IsFpuRegisterPair() - || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) { - return true; - } - } else if (location.IsRegister() || location.IsRegisterPair()) { - return true; - } - return false; - } - bool IsDefiningPosition(size_t position) const { return IsParent() && (position == GetStart()); } @@ -969,38 +975,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { return false; } - bool IsLinearOrderWellFormed(const HGraph& graph) { - for (HBasicBlock* header : graph.GetBlocks()) { - if (header == nullptr || !header->IsLoopHeader()) { - continue; - } - - HLoopInformation* loop = header->GetLoopInformation(); - size_t num_blocks = loop->GetBlocks().NumSetBits(); - size_t found_blocks = 0u; - - for (HLinearOrderIterator it(graph); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); - if (loop->Contains(*current)) { - found_blocks++; - if (found_blocks == 1u && current != header) { - // First block is not the header. - return false; - } else if (found_blocks == num_blocks && !loop->IsBackEdge(*current)) { - // Last block is not a back edge. - return false; - } - } else if (found_blocks != 0u && found_blocks != num_blocks) { - // Blocks are not adjacent. - return false; - } - } - DCHECK_EQ(found_blocks, num_blocks); - } - - return true; - } - void AddBackEdgeUses(const HBasicBlock& block_at_use) { DCHECK(block_at_use.IsInLoop()); if (block_at_use.GetGraph()->HasIrreducibleLoops()) { @@ -1010,8 +984,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { return; } - DCHECK(IsLinearOrderWellFormed(*block_at_use.GetGraph())); - // Add synthesized uses at the back edge of loops to help the register allocator. // Note that this method is called in decreasing liveness order, to faciliate adding // uses at the head of the `first_use_` linked list. Because below @@ -1107,9 +1079,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { // Whether the interval is for a temporary. const bool is_temp_; - // Whether the interval is for a safepoint that calls on slow path. - const bool is_slow_path_safepoint_; - // Whether this interval is a synthesized interval for register pair. const bool is_high_interval_; @@ -1217,12 +1186,6 @@ class SsaLivenessAnalysis : public ValueObject { static constexpr const char* kLivenessPassName = "liveness"; private: - // Linearize the graph so that: - // (1): a block is always after its dominator, - // (2): blocks of loops are contiguous. - // This creates a natural and efficient ordering when visualizing live ranges. - void LinearizeGraph(); - // Give an SSA number to each instruction that defines a value used by another instruction, // and setup the lifetime information of each instruction and block. void NumberInstructions(); diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index b1ec99ab8e..aec7a3c555 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -34,8 +34,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() { ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination)); // Add to the worklist phis referenced by non-phi instructions. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); if (phi->IsDead()) { @@ -84,8 +83,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() { // Remove phis that are not live. Visit in post order so that phis // that are not inputs of loop phis can be removed when they have // no users left (dead phis might use dead phis). - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { HInstruction* current = block->GetFirstPhi(); HInstruction* next = nullptr; HPhi* phi; @@ -119,8 +117,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() { void SsaRedundantPhiElimination::Run() { // Add all phis in the worklist. Order does not matter for correctness, and // neither will necessarily converge faster. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { worklist_.push_back(inst_it.Current()->AsPhi()); } diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc index 195159f61b..4e256832a2 100644 --- a/compiler/optimizing/x86_memory_gen.cc +++ b/compiler/optimizing/x86_memory_gen.cc @@ -16,6 +16,7 @@ #include "x86_memory_gen.h" #include "code_generator.h" +#include "driver/compiler_options.h" namespace art { namespace x86 { @@ -69,8 +70,8 @@ class MemoryOperandVisitor : public HGraphVisitor { }; X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph, - OptimizingCompilerStats* stats, - CodeGenerator* codegen) + CodeGenerator* codegen, + OptimizingCompilerStats* stats) : HOptimization(graph, kX86MemoryOperandGenerationPassName, stats), do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) { } diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h index 7e886819bb..5f15d9f1e6 100644 --- a/compiler/optimizing/x86_memory_gen.h +++ b/compiler/optimizing/x86_memory_gen.h @@ -28,8 +28,8 @@ namespace x86 { class X86MemoryOperandGeneration : public HOptimization { public: X86MemoryOperandGeneration(HGraph* graph, - OptimizingCompilerStats* stats, - CodeGenerator* codegen); + CodeGenerator* codegen, + OptimizingCompilerStats* stats); void Run() OVERRIDE; |