diff options
Diffstat (limited to 'compiler/optimizing')
26 files changed, 2775 insertions, 1154 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 167c35d075..3257de1858 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -1449,7 +1449,8 @@ void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc, uint32_t* args, uint32_t register_index) { HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc); - QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) + bool finalizable; + QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable) ? kQuickAllocArrayWithAccessCheck : kQuickAllocArray; HInstruction* object = new (arena_) HNewArray(length, @@ -1629,9 +1630,9 @@ void HGraphBuilder::BuildTypeCheck(const Instruction& instruction, } } -bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const { +bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const { return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks( - dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index); + dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable); } void HGraphBuilder::BuildSwitchJumpTable(const SwitchTable& table, @@ -2508,7 +2509,9 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(fake_string); UpdateLocal(register_index, fake_string, dex_pc); } else { - QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) + bool finalizable; + bool can_throw = NeedsAccessCheck(type_index, &finalizable); + QuickEntrypointEnum entrypoint = can_throw ? kQuickAllocObjectWithAccessCheck : kQuickAllocObject; @@ -2517,6 +2520,8 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), + can_throw, + finalizable, entrypoint)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc); } @@ -2526,7 +2531,8 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::NEW_ARRAY: { uint16_t type_index = instruction.VRegC_22c(); HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt, dex_pc); - QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) + bool finalizable; + QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable) ? kQuickAllocArrayWithAccessCheck : kQuickAllocArray; current_block_->AddInstruction(new (arena_) HNewArray(length, diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 9eaa4b62c5..f857ef0e12 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -138,7 +138,10 @@ class HGraphBuilder : public ValueObject { HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const; void PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc); void InitializeParameters(uint16_t number_of_parameters); - bool NeedsAccessCheck(uint32_t type_index) const; + + // Returns whether the current method needs access check for the type. + // Output parameter finalizable is set to whether the type is finalizable. + bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const; template<typename T> void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 5188e115e0..77d53fcd8f 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -310,7 +310,7 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fp_registers, + size_t maximum_number_of_live_fpu_registers, size_t number_of_out_slots, const ArenaVector<HBasicBlock*>& block_order) { block_order_ = &block_order; @@ -324,14 +324,14 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, && IsLeafMethod() && !RequiresCurrentMethod()) { DCHECK_EQ(maximum_number_of_live_core_registers, 0u); - DCHECK_EQ(maximum_number_of_live_fp_registers, 0u); + DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u); SetFrameSize(CallPushesPC() ? GetWordSize() : 0); } else { SetFrameSize(RoundUp( number_of_spill_slots * kVRegSize + number_of_out_slots * kVRegSize + maximum_number_of_live_core_registers * GetWordSize() - + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() + + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize() + FrameEntrySpillSize(), kStackAlignment)); } @@ -547,15 +547,19 @@ void CodeGenerator::GenerateUnresolvedFieldAccess( } } +// TODO: Remove argument `code_generator_supports_read_barrier` when +// all code generators have read barrier support. void CodeGenerator::CreateLoadClassLocationSummary(HLoadClass* cls, Location runtime_type_index_location, - Location runtime_return_location) { + Location runtime_return_location, + bool code_generator_supports_read_barrier) { ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena(); LocationSummary::CallKind call_kind = cls->NeedsAccessCheck() ? LocationSummary::kCall - : (cls->CanCallRuntime() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall); + : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) || + cls->CanCallRuntime()) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); LocationSummary* locations = new (allocator) LocationSummary(cls, call_kind); if (cls->NeedsAccessCheck()) { locations->SetInAt(0, Location::NoLocation()); @@ -1320,21 +1324,38 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod // coherent with the runtime call generated, and that the GC side effect is // set when required. if (slow_path == nullptr) { - DCHECK(instruction->GetLocations()->WillCall()) << instruction->DebugName(); + DCHECK(instruction->GetLocations()->WillCall()) + << "instruction->DebugName()=" << instruction->DebugName(); DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) - << instruction->DebugName() << instruction->GetSideEffects().ToString(); + << "instruction->DebugName()=" << instruction->DebugName() + << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString(); } else { DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal()) - << instruction->DebugName() << slow_path->GetDescription(); + << "instruction->DebugName()=" << instruction->DebugName() + << " slow_path->GetDescription()=" << slow_path->GetDescription(); DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || // Control flow would not come back into the code if a fatal slow // path is taken, so we do not care if it triggers GC. slow_path->IsFatal() || // HDeoptimize is a special case: we know we are not coming back from // it into the code. - instruction->IsDeoptimize()) - << instruction->DebugName() << instruction->GetSideEffects().ToString() - << slow_path->GetDescription(); + instruction->IsDeoptimize() || + // When read barriers are enabled, some instructions use a + // slow path to emit a read barrier, which does not trigger + // GC, is not fatal, nor is emitted by HDeoptimize + // instructions. + (kEmitCompilerReadBarrier && + (instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsArraySet() || + instruction->IsArrayGet() || + instruction->IsLoadClass() || + instruction->IsLoadString() || + instruction->IsInstanceOf() || + instruction->IsCheckCast()))) + << "instruction->DebugName()=" << instruction->DebugName() + << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString() + << " slow_path->GetDescription()=" << slow_path->GetDescription(); } // Check the coherency of leaf information. @@ -1346,11 +1367,12 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod } void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* register_set = locations->GetLiveRegisters(); + RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (register_set->ContainsCoreRegister(i)) { + if (live_registers->ContainsCoreRegister(i)) { // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { locations->SetStackBit(stack_offset / kVRegSize); @@ -1365,7 +1387,7 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (register_set->ContainsFloatingPointRegister(i)) { + if (live_registers->ContainsFloatingPointRegister(i)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); saved_fpu_stack_offsets_[i] = stack_offset; @@ -1376,12 +1398,14 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo } void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* register_set = locations->GetLiveRegisters(); + RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (register_set->ContainsCoreRegister(i)) { + if (live_registers->ContainsCoreRegister(i)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); stack_offset += codegen->RestoreCoreRegister(stack_offset, i); } } @@ -1389,8 +1413,9 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (register_set->ContainsFloatingPointRegister(i)) { + if (live_registers->ContainsFloatingPointRegister(i)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i); } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 2108abefcc..114d97be94 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -201,7 +201,7 @@ class CodeGenerator { virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; void InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fp_registers, + size_t maximum_number_of_live_fpu_registers, size_t number_of_out_slots, const ArenaVector<HBasicBlock*>& block_order); int32_t GetStackSlot(HLocal* local) const; @@ -250,6 +250,15 @@ class CodeGenerator { // Returns whether we should split long moves in parallel moves. virtual bool ShouldSplitLongMoves() const { return false; } + size_t GetNumberOfCoreCalleeSaveRegisters() const { + return POPCOUNT(core_callee_save_mask_); + } + + size_t GetNumberOfCoreCallerSaveRegisters() const { + DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); + return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); + } + bool IsCoreCalleeSaveRegister(int reg) const { return (core_callee_save_mask_ & (1 << reg)) != 0; } @@ -416,7 +425,8 @@ class CodeGenerator { // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design. static void CreateLoadClassLocationSummary(HLoadClass* cls, Location runtime_type_index_location, - Location runtime_return_location); + Location runtime_return_location, + bool code_generator_supports_read_barrier = false); static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 54c6cc8890..655bbb8a8e 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1240,26 +1240,19 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, __ b(true_label, final_condition); } -void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HIf* if_instr, - HCondition* condition, - Label* true_target, - Label* false_target, - Label* always_true_target) { +void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition, + Label* true_target_in, + Label* false_target_in) { + // Generated branching requires both targets to be explicit. If either of the + // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. + Label fallthrough_target; + Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; + Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; + LocationSummary* locations = condition->GetLocations(); Location left = locations->InAt(0); Location right = locations->InAt(1); - // We don't want true_target as a nullptr. - if (true_target == nullptr) { - true_target = always_true_target; - } - bool falls_through = (false_target == nullptr); - - // FP compares don't like null false_targets. - if (false_target == nullptr) { - false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - } - Primitive::Type type = condition->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: @@ -1278,117 +1271,125 @@ void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HIf* if_instr, LOG(FATAL) << "Unexpected compare type " << type; } - if (!falls_through) { + if (false_target != &fallthrough_target) { __ b(false_target); } + + if (fallthrough_target.IsLinked()) { + __ Bind(&fallthrough_target); + } } void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - if (cond->IsIntConstant()) { + Label* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ b(always_true_target); + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ b(true_target); } - return; } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ b(false_target); + } + } + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { + // Condition has been materialized, compare the output to 0. + Location cond_val = instruction->GetLocations()->InAt(condition_input_index); + DCHECK(cond_val.IsRegister()); + if (true_target == nullptr) { + __ CompareAndBranchIfZero(cond_val.AsRegister<Register>(), false_target); + } else { + __ CompareAndBranchIfNonZero(cond_val.AsRegister<Register>(), true_target); } } else { - // Can we optimize the jump if we know that the next block is the true case? + // Condition has not been materialized. Use its inputs as the comparison and + // its condition as the branch condition. HCondition* condition = cond->AsCondition(); - bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition); - if (condition == nullptr || condition->NeedsMaterialization()) { - // Condition has been materialized, compare the output to 0. - DCHECK(instruction->GetLocations()->InAt(0).IsRegister()); - if (can_jump_to_false) { - __ CompareAndBranchIfZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(), - false_target); - return; - } - __ CompareAndBranchIfNonZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(), - true_target); - } else { - // Condition has not been materialized, use its inputs as the - // comparison and its condition as the branch condition. - Primitive::Type type = (condition != nullptr) - ? cond->InputAt(0)->GetType() - : Primitive::kPrimInt; - // Is this a long or FP comparison that has been folded into the HCondition? - if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { - // Generate the comparison directly. - GenerateCompareTestAndBranch(instruction->AsIf(), condition, - true_target, false_target, always_true_target); - return; - } - LocationSummary* locations = cond->GetLocations(); - DCHECK(locations->InAt(0).IsRegister()) << locations->InAt(0); - Register left = locations->InAt(0).AsRegister<Register>(); - Location right = locations->InAt(1); - if (right.IsRegister()) { - __ cmp(left, ShifterOperand(right.AsRegister<Register>())); - } else { - DCHECK(right.IsConstant()); - GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); - } - if (can_jump_to_false) { - __ b(false_target, ARMCondition(condition->GetOppositeCondition())); - return; - } + // If this is a long or FP comparison that has been folded into + // the HCondition, generate the comparison directly. + Primitive::Type type = condition->InputAt(0)->GetType(); + if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { + GenerateCompareTestAndBranch(condition, true_target, false_target); + return; + } + LocationSummary* locations = cond->GetLocations(); + DCHECK(locations->InAt(0).IsRegister()); + Register left = locations->InAt(0).AsRegister<Register>(); + Location right = locations->InAt(1); + if (right.IsRegister()) { + __ cmp(left, ShifterOperand(right.AsRegister<Register>())); + } else { + DCHECK(right.IsConstant()); + GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); + } + if (true_target == nullptr) { + __ b(false_target, ARMCondition(condition->GetOppositeCondition())); + } else { __ b(true_target, ARMCondition(condition->GetCondition())); } } - if (false_target != nullptr) { + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { __ b(false_target); } } void LocationsBuilderARM::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { - Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - Label* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathARM(deoptimize); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize); codegen_->AddSlowPath(slow_path); - Label* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderARM::VisitCondition(HCondition* cond) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index cef1095c5d..32bfe0f0be 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -228,15 +228,13 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target); + Label* false_target); void GenerateCompareWithImmediate(Register left, int32_t right); - void GenerateCompareTestAndBranch(HIf* if_instr, - HCondition* condition, + void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, - Label* false_target, - Label* always_true_target); + Label* false_target); void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7e248b402a..d1bddf673a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -68,6 +68,10 @@ using helpers::ARM64EncodableConstantOrRegister; using helpers::ArtVixlRegCodeCoherentForRegSet; static constexpr int kCurrentMethodStackOffset = 0; +// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump +// table version generates 7 instructions and num_entries literals. Compare/jump sequence will +// generates less code/data with a small num_entries. +static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; inline Condition ARM64Condition(IfCondition cond) { switch (cond) { @@ -545,6 +549,28 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); }; +void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { + uint32_t num_entries = switch_instr_->GetNumEntries(); + DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold); + + // We are about to use the assembler to place literals directly. Make sure we have enough + // underlying code buffer and we have generated the jump table with right size. + CodeBufferCheckScope scope(codegen->GetVIXLAssembler(), num_entries * sizeof(int32_t), + CodeBufferCheckScope::kCheck, CodeBufferCheckScope::kExactSize); + + __ Bind(&table_start_); + const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + vixl::Label* target_label = codegen->GetLabelOf(successors[i]); + DCHECK(target_label->IsBound()); + ptrdiff_t jump_offset = target_label->location() - table_start_.location(); + DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min()); + DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max()); + Literal<int32_t> literal(jump_offset); + __ place(&literal); + } +} + #undef __ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { @@ -587,6 +613,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, compiler_options, stats), block_labels_(nullptr), + jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -603,10 +630,16 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, AddAllocatedRegister(LocationFrom(lr)); } -#undef __ #define __ GetVIXLAssembler()-> +void CodeGeneratorARM64::EmitJumpTables() { + for (auto jump_table : jump_tables_) { + jump_table->EmitTable(this); + } +} + void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { + EmitJumpTables(); // Ensure we emit the literal pool. __ FinalizeCode(); @@ -2283,38 +2316,56 @@ void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) } void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, vixl::Label* true_target, - vixl::Label* false_target, - vixl::Label* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - HCondition* condition = cond->AsCondition(); - - if (cond->IsIntConstant()) { - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ B(always_true_target); + vixl::Label* false_target) { + // FP branching requires both targets to be explicit. If either of the targets + // is nullptr (fallthrough) use and bind `fallthrough_target` instead. + vixl::Label fallthrough_target; + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { + // Constant condition, statically compared against 1. + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ B(true_target); } - return; } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ B(false_target); + } } - } else if (!cond->IsCondition() || condition->NeedsMaterialization()) { + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { // The condition instruction has been materialized, compare the output to 0. - Location cond_val = instruction->GetLocations()->InAt(0); + Location cond_val = instruction->GetLocations()->InAt(condition_input_index); DCHECK(cond_val.IsRegister()); - __ Cbnz(InputRegisterAt(instruction, 0), true_target); + if (true_target == nullptr) { + __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target); + } else { + __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target); + } } else { // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. - Primitive::Type type = - cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; + HCondition* condition = cond->AsCondition(); + Primitive::Type type = condition->InputAt(0)->GetType(); if (Primitive::IsFloatingPointType(type)) { - // FP compares don't like null false_targets. - if (false_target == nullptr) { - false_target = codegen_->GetLabelOf(instruction->AsIf()->IfFalseSuccessor()); - } FPRegister lhs = InputFPRegisterAt(condition, 0); if (condition->GetLocations()->InAt(1).IsConstant()) { DCHECK(IsFloatingPointZeroConstant(condition->GetLocations()->InAt(1).GetConstant())); @@ -2324,31 +2375,45 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct __ Fcmp(lhs, InputFPRegisterAt(condition, 1)); } if (condition->IsFPConditionTrueIfNaN()) { - __ B(vs, true_target); // VS for unordered. + __ B(vs, true_target == nullptr ? &fallthrough_target : true_target); } else if (condition->IsFPConditionFalseIfNaN()) { - __ B(vs, false_target); // VS for unordered. + __ B(vs, false_target == nullptr ? &fallthrough_target : false_target); + } + if (true_target == nullptr) { + __ B(ARM64Condition(condition->GetOppositeCondition()), false_target); + } else { + __ B(ARM64Condition(condition->GetCondition()), true_target); } - __ B(ARM64Condition(condition->GetCondition()), true_target); } else { // Integer cases. Register lhs = InputRegisterAt(condition, 0); Operand rhs = InputOperandAt(condition, 1); - Condition arm64_cond = ARM64Condition(condition->GetCondition()); + + Condition arm64_cond; + vixl::Label* non_fallthrough_target; + if (true_target == nullptr) { + arm64_cond = ARM64Condition(condition->GetOppositeCondition()); + non_fallthrough_target = false_target; + } else { + arm64_cond = ARM64Condition(condition->GetCondition()); + non_fallthrough_target = true_target; + } + if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) { switch (arm64_cond) { case eq: - __ Cbz(lhs, true_target); + __ Cbz(lhs, non_fallthrough_target); break; case ne: - __ Cbnz(lhs, true_target); + __ Cbnz(lhs, non_fallthrough_target); break; case lt: // Test the sign bit and branch accordingly. - __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); break; case ge: // Test the sign bit and branch accordingly. - __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); break; default: // Without the `static_cast` the compiler throws an error for @@ -2357,43 +2422,43 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct } } else { __ Cmp(lhs, rhs); - __ B(arm64_cond, true_target); + __ B(arm64_cond, non_fallthrough_target); } } } - if (false_target != nullptr) { + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { __ B(false_target); } + + if (fallthrough_target.IsLinked()) { + __ Bind(&fallthrough_target); + } } void LocationsBuilderARM64::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { - vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - vixl::Label* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + vixl::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + vixl::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } @@ -2402,8 +2467,10 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM64(deoptimize); codegen_->AddSlowPath(slow_path); - vixl::Label* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { @@ -2856,18 +2923,18 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: // temp = thread->string_init_entrypoint - __ Ldr(XRegisterFrom(temp).X(), MemOperand(tr, invoke->GetStringInitOffset())); + __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset())); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: // Load method address from literal pool. - __ Ldr(XRegisterFrom(temp).X(), DeduplicateUint64Literal(invoke->GetMethodAddress())); + __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: // Load method address from literal pool with a link-time patch. - __ Ldr(XRegisterFrom(temp).X(), + __ Ldr(XRegisterFrom(temp), DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { @@ -2877,16 +2944,19 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok vixl::Label* pc_insn_label = &pc_relative_dex_cache_patches_.back().label; { vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ adrp(XRegisterFrom(temp).X(), 0); + __ Bind(pc_insn_label); + __ adrp(XRegisterFrom(temp), 0); } - __ Bind(pc_insn_label); // Bind after ADRP. pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label; // Add LDR with its PC-relative DexCache access patch. pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, invoke->GetDexCacheArrayOffset()); - __ Ldr(XRegisterFrom(temp).X(), MemOperand(XRegisterFrom(temp).X(), 0)); - __ Bind(&pc_relative_dex_cache_patches_.back().label); // Bind after LDR. - pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label; + { + vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(&pc_relative_dex_cache_patches_.back().label); + __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), 0)); + pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label; + } break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { @@ -2920,8 +2990,9 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { relative_call_patches_.emplace_back(invoke->GetTargetMethod()); vixl::Label* label = &relative_call_patches_.back().label; - __ Bl(label); // Arbitrarily branch to the instruction after BL, override at link time. - __ Bind(label); // Bind after BL. + vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(label); + __ bl(0); // Branch and link to itself. This will be overriden at link time. break; } case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: @@ -2934,7 +3005,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // LR = callee_method->entry_point_from_quick_compiled_code_; __ Ldr(lr, MemOperand( - XRegisterFrom(callee_method).X(), + XRegisterFrom(callee_method), ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value())); // lr() __ Blr(lr); @@ -2990,14 +3061,14 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_method.dex_method_index)); } for (const MethodPatchInfo<vixl::Label>& info : relative_call_patches_) { - linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location() - 4u, + linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location(), info.target_method.dex_file, info.target_method.dex_method_index)); } for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location() - 4u, + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location(), &info.target_dex_file, - info.pc_insn_label->location() - 4u, + info.pc_insn_label->location(), info.element_offset)); } } @@ -3810,26 +3881,73 @@ void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); Register value_reg = InputRegisterAt(switch_instr, 0); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - // Create a series of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - vixl::Label* succ = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Cbz(value_reg, succ); - } else { - __ Cmp(value_reg, vixl::Operand(case_value)); - __ B(eq, succ); + // Roughly set 16 as max average assemblies generated per HIR in a graph. + static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * vixl::kInstructionSize; + // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to + // make sure we don't emit it if the target may run out of range. + // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR + // ranges and emit the tables only as required. + static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; + + if (num_entries < kPackedSwitchJumpTableThreshold || + // Current instruction id is an upper bound of the number of HIRs in the graph. + GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { + // Create a series of compare/jumps. + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + int32_t case_value = lower_bound + i; + vixl::Label* succ = codegen_->GetLabelOf(successors[i]); + if (case_value == 0) { + __ Cbz(value_reg, succ); + } else { + __ Cmp(value_reg, Operand(case_value)); + __ B(eq, succ); + } } - } - // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ B(codegen_->GetLabelOf(default_block)); + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ B(codegen_->GetLabelOf(default_block)); + } + } else { + JumpTableARM64* jump_table = new (GetGraph()->GetArena()) JumpTableARM64(switch_instr); + codegen_->AddJumpTable(jump_table); + + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); + + // Below instructions should use at most one blocked register. Since there are two blocked + // registers, we are free to block one. + Register temp_w = temps.AcquireW(); + Register index; + // Remove the bias. + if (lower_bound != 0) { + index = temp_w; + __ Sub(index, value_reg, Operand(lower_bound)); + } else { + index = value_reg; + } + + // Jump to default block if index is out of the range. + __ Cmp(index, Operand(num_entries)); + __ B(hs, codegen_->GetLabelOf(default_block)); + + // In current VIXL implementation, it won't require any blocked registers to encode the + // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the + // register pressure. + Register table_base = temps.AcquireX(); + // Load jump offset from the table. + __ Adr(table_base, jump_table->GetTableStartLabel()); + Register jump_offset = temp_w; + __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2)); + + // Jump to target block by branching to table_base(pc related) + offset. + Register target_address = table_base; + __ Add(target_address, table_base, Operand(jump_offset, SXTW)); + __ Br(target_address); } } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index aa5ad386e1..881afcc123 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -81,6 +81,22 @@ class SlowPathCodeARM64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64); }; +class JumpTableARM64 : public ArenaObject<kArenaAllocSwitchTable> { + public: + explicit JumpTableARM64(HPackedSwitch* switch_instr) + : switch_instr_(switch_instr), table_start_() {} + + vixl::Label* GetTableStartLabel() { return &table_start_; } + + void EmitTable(CodeGeneratorARM64* codegen); + + private: + HPackedSwitch* const switch_instr_; + vixl::Label table_start_; + + DISALLOW_COPY_AND_ASSIGN(JumpTableARM64); +}; + static const vixl::Register kRuntimeParameterCoreRegisters[] = { vixl::x0, vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 }; static constexpr size_t kRuntimeParameterCoreRegistersLength = @@ -203,9 +219,9 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, vixl::Label* true_target, - vixl::Label* false_target, - vixl::Label* always_true_target); + vixl::Label* false_target); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -358,6 +374,10 @@ class CodeGeneratorARM64 : public CodeGenerator { block_labels_ = CommonInitializeLabels<vixl::Label>(); } + void AddJumpTable(JumpTableARM64* jump_table) { + jump_tables_.push_back(jump_table); + } + void Finalize(CodeAllocator* allocator) OVERRIDE; // Code generation helpers. @@ -422,15 +442,16 @@ class CodeGeneratorARM64 : public CodeGenerator { const DexFile& target_dex_file; uint32_t element_offset; - // NOTE: Labels are bound to the end of the patched instruction because - // we don't know if there will be a veneer or how big it will be. vixl::Label label; vixl::Label* pc_insn_label; }; + void EmitJumpTables(); + // Labels for each block that will be compiled. vixl::Label* block_labels_; // Indexed by block id. vixl::Label frame_entry_label_; + ArenaVector<JumpTableARM64*> jump_tables_; LocationsBuilderARM64 location_builder_; InstructionCodeGeneratorARM64 instruction_visitor_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 959adb4238..919ed2db78 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -2420,30 +2420,51 @@ void InstructionCodeGeneratorMIPS::VisitTryBoundary(HTryBoundary* try_boundary) } void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, MipsLabel* true_target, - MipsLabel* false_target, - MipsLabel* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - HCondition* condition = cond->AsCondition(); - - if (cond->IsIntConstant()) { - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ B(always_true_target); + MipsLabel* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { + // Constant condition, statically compared against 1. + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ B(true_target); } - return; } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ B(false_target); + } } - } else if (!cond->IsCondition() || condition->NeedsMaterialization()) { + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { // The condition instruction has been materialized, compare the output to 0. - Location cond_val = instruction->GetLocations()->InAt(0); + Location cond_val = instruction->GetLocations()->InAt(condition_input_index); DCHECK(cond_val.IsRegister()); - __ Bnez(cond_val.AsRegister<Register>(), true_target); + if (true_target == nullptr) { + __ Beqz(cond_val.AsRegister<Register>(), false_target); + } else { + __ Bnez(cond_val.AsRegister<Register>(), true_target); + } } else { // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. + HCondition* condition = cond->AsCondition(); + Register lhs = condition->GetLocations()->InAt(0).AsRegister<Register>(); Location rhs_location = condition->GetLocations()->InAt(1); Register rhs_reg = ZERO; @@ -2455,37 +2476,46 @@ void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instructi rhs_reg = rhs_location.AsRegister<Register>(); } - IfCondition if_cond = condition->GetCondition(); + IfCondition if_cond; + MipsLabel* non_fallthrough_target; + if (true_target == nullptr) { + if_cond = condition->GetOppositeCondition(); + non_fallthrough_target = false_target; + } else { + if_cond = condition->GetCondition(); + non_fallthrough_target = true_target; + } + if (use_imm && rhs_imm == 0) { switch (if_cond) { case kCondEQ: - __ Beqz(lhs, true_target); + __ Beqz(lhs, non_fallthrough_target); break; case kCondNE: - __ Bnez(lhs, true_target); + __ Bnez(lhs, non_fallthrough_target); break; case kCondLT: - __ Bltz(lhs, true_target); + __ Bltz(lhs, non_fallthrough_target); break; case kCondGE: - __ Bgez(lhs, true_target); + __ Bgez(lhs, non_fallthrough_target); break; case kCondLE: - __ Blez(lhs, true_target); + __ Blez(lhs, non_fallthrough_target); break; case kCondGT: - __ Bgtz(lhs, true_target); + __ Bgtz(lhs, non_fallthrough_target); break; case kCondB: break; // always false case kCondBE: - __ Beqz(lhs, true_target); // <= 0 if zero + __ Beqz(lhs, non_fallthrough_target); // <= 0 if zero break; case kCondA: - __ Bnez(lhs, true_target); // > 0 if non-zero + __ Bnez(lhs, non_fallthrough_target); // > 0 if non-zero break; case kCondAE: - __ B(true_target); // always true + __ B(non_fallthrough_target); // always true break; } } else { @@ -2496,81 +2526,78 @@ void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instructi } switch (if_cond) { case kCondEQ: - __ Beq(lhs, rhs_reg, true_target); + __ Beq(lhs, rhs_reg, non_fallthrough_target); break; case kCondNE: - __ Bne(lhs, rhs_reg, true_target); + __ Bne(lhs, rhs_reg, non_fallthrough_target); break; case kCondLT: - __ Blt(lhs, rhs_reg, true_target); + __ Blt(lhs, rhs_reg, non_fallthrough_target); break; case kCondGE: - __ Bge(lhs, rhs_reg, true_target); + __ Bge(lhs, rhs_reg, non_fallthrough_target); break; case kCondLE: - __ Bge(rhs_reg, lhs, true_target); + __ Bge(rhs_reg, lhs, non_fallthrough_target); break; case kCondGT: - __ Blt(rhs_reg, lhs, true_target); + __ Blt(rhs_reg, lhs, non_fallthrough_target); break; case kCondB: - __ Bltu(lhs, rhs_reg, true_target); + __ Bltu(lhs, rhs_reg, non_fallthrough_target); break; case kCondAE: - __ Bgeu(lhs, rhs_reg, true_target); + __ Bgeu(lhs, rhs_reg, non_fallthrough_target); break; case kCondBE: - __ Bgeu(rhs_reg, lhs, true_target); + __ Bgeu(rhs_reg, lhs, non_fallthrough_target); break; case kCondA: - __ Bltu(rhs_reg, lhs, true_target); + __ Bltu(rhs_reg, lhs, non_fallthrough_target); break; } } } - if (false_target != nullptr) { + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { __ B(false_target); } } void LocationsBuilderMIPS::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) { - MipsLabel* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - MipsLabel* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - MipsLabel* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + MipsLabel* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + MipsLabel* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathMIPS(deoptimize); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize); codegen_->AddSlowPath(slow_path); - MipsLabel* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 059131dcfc..e3a2cb40ef 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -226,9 +226,9 @@ class InstructionCodeGeneratorMIPS : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, MipsLabel* true_target, - MipsLabel* false_target, - MipsLabel* always_true_target); + MipsLabel* false_target); void HandleGoto(HInstruction* got, HBasicBlock* successor); MipsAssembler* const assembler_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 9b78dec6c4..5864660890 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -420,7 +420,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, : CodeGenerator(graph, kNumberOfGpuRegisters, kNumberOfFpuRegisters, - 0, // kNumberOfRegisterPairs + /* number_of_register_pairs */ 0, ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), arraysize(kCoreCalleeSaves)), ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), @@ -2340,30 +2340,51 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary } void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - HCondition* condition = cond->AsCondition(); - - if (cond->IsIntConstant()) { - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ B(always_true_target); + Label* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { + // Constant condition, statically compared against 1. + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ B(true_target); } - return; } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ B(false_target); + } } - } else if (!cond->IsCondition() || condition->NeedsMaterialization()) { + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { // The condition instruction has been materialized, compare the output to 0. - Location cond_val = instruction->GetLocations()->InAt(0); + Location cond_val = instruction->GetLocations()->InAt(condition_input_index); DCHECK(cond_val.IsRegister()); - __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target); + if (true_target == nullptr) { + __ Beqzc(cond_val.AsRegister<GpuRegister>(), false_target); + } else { + __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target); + } } else { // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. + HCondition* condition = cond->AsCondition(); + GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>(); Location rhs_location = condition->GetLocations()->InAt(1); GpuRegister rhs_reg = ZERO; @@ -2375,37 +2396,46 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc rhs_reg = rhs_location.AsRegister<GpuRegister>(); } - IfCondition if_cond = condition->GetCondition(); + IfCondition if_cond; + Label* non_fallthrough_target; + if (true_target == nullptr) { + if_cond = condition->GetOppositeCondition(); + non_fallthrough_target = false_target; + } else { + if_cond = condition->GetCondition(); + non_fallthrough_target = true_target; + } + if (use_imm && rhs_imm == 0) { switch (if_cond) { case kCondEQ: - __ Beqzc(lhs, true_target); + __ Beqzc(lhs, non_fallthrough_target); break; case kCondNE: - __ Bnezc(lhs, true_target); + __ Bnezc(lhs, non_fallthrough_target); break; case kCondLT: - __ Bltzc(lhs, true_target); + __ Bltzc(lhs, non_fallthrough_target); break; case kCondGE: - __ Bgezc(lhs, true_target); + __ Bgezc(lhs, non_fallthrough_target); break; case kCondLE: - __ Blezc(lhs, true_target); + __ Blezc(lhs, non_fallthrough_target); break; case kCondGT: - __ Bgtzc(lhs, true_target); + __ Bgtzc(lhs, non_fallthrough_target); break; case kCondB: break; // always false case kCondBE: - __ Beqzc(lhs, true_target); // <= 0 if zero + __ Beqzc(lhs, non_fallthrough_target); // <= 0 if zero break; case kCondA: - __ Bnezc(lhs, true_target); // > 0 if non-zero + __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero break; case kCondAE: - __ B(true_target); // always true + __ B(non_fallthrough_target); // always true break; } } else { @@ -2424,7 +2454,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc case kCondBE: case kCondAE: // if lhs == rhs for a positive condition, then it is a branch - __ B(true_target); + __ B(non_fallthrough_target); break; case kCondNE: case kCondLT: @@ -2437,72 +2467,68 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc } else { switch (if_cond) { case kCondEQ: - __ Beqc(lhs, rhs_reg, true_target); + __ Beqc(lhs, rhs_reg, non_fallthrough_target); break; case kCondNE: - __ Bnec(lhs, rhs_reg, true_target); + __ Bnec(lhs, rhs_reg, non_fallthrough_target); break; case kCondLT: - __ Bltc(lhs, rhs_reg, true_target); + __ Bltc(lhs, rhs_reg, non_fallthrough_target); break; case kCondGE: - __ Bgec(lhs, rhs_reg, true_target); + __ Bgec(lhs, rhs_reg, non_fallthrough_target); break; case kCondLE: - __ Bgec(rhs_reg, lhs, true_target); + __ Bgec(rhs_reg, lhs, non_fallthrough_target); break; case kCondGT: - __ Bltc(rhs_reg, lhs, true_target); + __ Bltc(rhs_reg, lhs, non_fallthrough_target); break; case kCondB: - __ Bltuc(lhs, rhs_reg, true_target); + __ Bltuc(lhs, rhs_reg, non_fallthrough_target); break; case kCondAE: - __ Bgeuc(lhs, rhs_reg, true_target); + __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); break; case kCondBE: - __ Bgeuc(rhs_reg, lhs, true_target); + __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); break; case kCondA: - __ Bltuc(rhs_reg, lhs, true_target); + __ Bltuc(rhs_reg, lhs, non_fallthrough_target); break; } } } } - if (false_target != nullptr) { + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { __ B(false_target); } } void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { - Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - Label* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } @@ -2511,8 +2537,10 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS64(deoptimize); codegen_->AddSlowPath(slow_path); - Label* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index ac3162f736..a078dd1819 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -230,9 +230,9 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target); + Label* false_target); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc index bf354e7ee2..644a3fb75e 100644 --- a/compiler/optimizing/code_generator_utils.cc +++ b/compiler/optimizing/code_generator_utils.cc @@ -95,19 +95,8 @@ void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, *shift = is_long ? p - 64 : p - 32; } -// Is it valid to reverse the condition? Uses the values supplied to -// GenerateTestAndBranch() in instruction generators. -bool CanReverseCondition(Label* always_true_target, - Label* false_target, - HCondition* condition) { - // 'always_true_target' is null when the 'true' path is to the next - // block to be generated. Check the type of the condition to ensure that - // FP conditions are not swapped. This is for future fusing of HCompare and - // HCondition. - // Note: If the condition is nullptr, then it is always okay to reverse. - return always_true_target == nullptr && false_target != nullptr && - (condition == nullptr || - !Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())); +bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input) { + return !cond_input->IsCondition() || cond_input->AsCondition()->NeedsMaterialization(); } } // namespace art diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h index 628eee8885..7efed8c9ec 100644 --- a/compiler/optimizing/code_generator_utils.h +++ b/compiler/optimizing/code_generator_utils.h @@ -21,18 +21,16 @@ namespace art { -class Label; -class HCondition; +class HInstruction; // Computes the magic number and the shift needed in the div/rem by constant algorithm, as out // arguments `magic` and `shift` void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift); -// Is it valid to reverse the condition? Uses the values supplied to -// GenerateTestAndBranch() in instruction generators. -bool CanReverseCondition(Label* always_true_target, - Label* false_target, - HCondition* condition); +// Returns true if `cond_input` is expected to have a location. Assumes that +// `cond_input` is a conditional input of the currently emitted instruction and +// that it has been previously visited by the InstructionCodeGenerator. +bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input); } // namespace art diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0147b010f2..999306c34b 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -35,6 +35,9 @@ namespace art { +template<class MirrorType> +class GcRoot; + namespace x86 { static constexpr int kCurrentMethodStackOffset = 0; @@ -300,15 +303,6 @@ class TypeCheckSlowPathX86 : public SlowPathCode { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = locations->InAt(0).AsRegister<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -329,12 +323,15 @@ class TypeCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } if (!is_fatal_) { @@ -425,6 +422,221 @@ class ArraySetSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); }; +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { + public: + ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ movl(out, Address(out, offset)); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = index_.AsRegister<Register>(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::x86::X86Assembler::shll and + // art::x86::X86Assembler::AddImmediate below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ movl(free_reg, index_reg); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ shll(index_reg, Immediate(TIMES_4)); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ AddImmediate(index_reg, Immediate(offset_)); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_)); + } + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<Register>()); + size_t obj = static_cast<int>(obj_.AsRegister<Register>()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return static_cast<Register>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on x86 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathX86 : public SlowPathCode { + public: + ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86); +}; + #undef __ #define __ down_cast<X86Assembler*>(GetAssembler())-> @@ -513,9 +725,9 @@ void CodeGeneratorX86::InvokeRuntime(int32_t entry_point_offset, } CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) + const X86InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, @@ -582,7 +794,7 @@ Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const { LOG(FATAL) << "Unreachable type " << type; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const { @@ -783,7 +995,7 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type t LOG(FATAL) << "Unexpected parameter type " << type; break; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorX86::Move32(Location destination, Location source) { @@ -1158,26 +1370,19 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, __ j(final_condition, true_label); } -void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HIf* if_instr, - HCondition* condition, - Label* true_target, - Label* false_target, - Label* always_true_target) { +void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition, + Label* true_target_in, + Label* false_target_in) { + // Generated branching requires both targets to be explicit. If either of the + // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. + Label fallthrough_target; + Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; + Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; + LocationSummary* locations = condition->GetLocations(); Location left = locations->InAt(0); Location right = locations->InAt(1); - // We don't want true_target as a nullptr. - if (true_target == nullptr) { - true_target = always_true_target; - } - bool falls_through = (false_target == nullptr); - - // FP compares don't like null false_targets. - if (false_target == nullptr) { - false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - } - Primitive::Type type = condition->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: @@ -1195,138 +1400,141 @@ void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HIf* if_instr, LOG(FATAL) << "Unexpected compare type " << type; } - if (!falls_through) { + if (false_target != &fallthrough_target) { __ jmp(false_target); } + + if (fallthrough_target.IsLinked()) { + __ Bind(&fallthrough_target); + } +} + +static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { + // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS + // are set only strictly before `branch`. We can't use the eflags on long/FP + // conditions if they are materialized due to the complex branching. + return cond->IsCondition() && + cond->GetNext() == branch && + cond->InputAt(0)->GetType() != Primitive::kPrimLong && + !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType()); } void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - if (cond->IsIntConstant()) { + Label* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ jmp(always_true_target); + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ jmp(true_target); } - return; } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ jmp(false_target); + } } - } else { - HCondition* condition = cond->AsCondition(); - bool is_materialized = - condition == nullptr || condition->NeedsMaterialization(); - // Moves do not affect the eflags register, so if the condition is - // evaluated just before the if, we don't need to evaluate it - // again. We can't use the eflags on long/FP conditions if they are - // materialized due to the complex branching. - Primitive::Type type = (condition != nullptr) - ? cond->InputAt(0)->GetType() - : Primitive::kPrimInt; - bool eflags_set = condition != nullptr - && condition->IsBeforeWhenDisregardMoves(instruction) - && (type != Primitive::kPrimLong && !Primitive::IsFloatingPointType(type)); - // Can we optimize the jump if we know that the next block is the true case? - bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition); - if (is_materialized) { - if (!eflags_set) { - // Materialized condition, compare against 0. - Location lhs = instruction->GetLocations()->InAt(0); - if (lhs.IsRegister()) { - __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); - } else { - __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); - } - if (can_jump_to_false) { - __ j(kEqual, false_target); - return; - } - __ j(kNotEqual, true_target); + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { + if (AreEflagsSetFrom(cond, instruction)) { + if (true_target == nullptr) { + __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target); } else { - if (can_jump_to_false) { - __ j(X86Condition(condition->GetOppositeCondition()), false_target); - return; - } - __ j(X86Condition(condition->GetCondition()), true_target); + __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); } } else { - // Condition has not been materialized, use its inputs as the - // comparison and its condition as the branch condition. - - // Is this a long or FP comparison that has been folded into the HCondition? - if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { - // Generate the comparison directly. - GenerateCompareTestAndBranch(instruction->AsIf(), - condition, - true_target, - false_target, - always_true_target); - return; + // Materialized condition, compare against 0. + Location lhs = instruction->GetLocations()->InAt(condition_input_index); + if (lhs.IsRegister()) { + __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); + } else { + __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); } - - Location lhs = cond->GetLocations()->InAt(0); - Location rhs = cond->GetLocations()->InAt(1); - // LHS is guaranteed to be in a register (see - // LocationsBuilderX86::VisitCondition). - if (rhs.IsRegister()) { - __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); - } else if (rhs.IsConstant()) { - int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - if (constant == 0) { - __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); - } else { - __ cmpl(lhs.AsRegister<Register>(), Immediate(constant)); - } + if (true_target == nullptr) { + __ j(kEqual, false_target); } else { - __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); + __ j(kNotEqual, true_target); } + } + } else { + // Condition has not been materialized, use its inputs as the comparison and + // its condition as the branch condition. + HCondition* condition = cond->AsCondition(); - if (can_jump_to_false) { - __ j(X86Condition(condition->GetOppositeCondition()), false_target); - return; - } + // If this is a long or FP comparison that has been folded into + // the HCondition, generate the comparison directly. + Primitive::Type type = condition->InputAt(0)->GetType(); + if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { + GenerateCompareTestAndBranch(condition, true_target, false_target); + return; + } + Location lhs = condition->GetLocations()->InAt(0); + Location rhs = condition->GetLocations()->InAt(1); + // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition). + if (rhs.IsRegister()) { + __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); + } else if (rhs.IsConstant()) { + int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); + if (constant == 0) { + __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); + } else { + __ cmpl(lhs.AsRegister<Register>(), Immediate(constant)); + } + } else { + __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); + } + if (true_target == nullptr) { + __ j(X86Condition(condition->GetOppositeCondition()), false_target); + } else { __ j(X86Condition(condition->GetCondition()), true_target); } } - if (false_target != nullptr) { + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { __ jmp(false_target); } } void LocationsBuilderX86::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::Any()); } } void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { - Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - Label* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::Any()); } } @@ -1335,8 +1543,10 @@ void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathX86(deoptimize); codegen_->AddSlowPath(slow_path); - Label* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderX86::VisitLocal(HLocal* local) { @@ -1767,6 +1977,9 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { + // This call to HandleInvoke allocates a temporary (core) register + // which is also used to transfer the hidden argument from FP to + // core register. HandleInvoke(invoke); // Add the hidden argument. invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7)); @@ -1774,31 +1987,42 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); + LocationSummary* locations = invoke->GetLocations(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // Set the hidden argument. + // Set the hidden argument. This is safe to do this here, as XMM7 + // won't be modified thereafter, before the `call` instruction. + DCHECK_EQ(XMM7, hidden_reg); __ movl(temp, Immediate(invoke->GetDexMethodIndex())); - __ movd(invoke->GetLocations()->GetTemp(1).AsFpuRegister<XmmRegister>(), temp); + __ movd(hidden_reg, temp); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ movl(temp, Address(ESP, receiver.GetStackIndex())); + // /* HeapReference<Class> */ temp = temp->klass_ __ movl(temp, Address(temp, class_offset)); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetImtEntryAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); - __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86WordSize).Int32Value())); + __ call(Address(temp, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -3871,7 +4095,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, method_reg = reg; __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); } - // temp = temp->dex_cache_resolved_methods_; + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ movl(reg, Address(method_reg, ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value())); // temp = temp[index_in_cache] @@ -3915,10 +4139,17 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // temp = object->GetClass(); DCHECK(receiver.IsRegister()); + // /* HeapReference<Class> */ temp = receiver->klass_ __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); @@ -3980,18 +4211,29 @@ void CodeGeneratorX86::MarkGCCard(Register temp, void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + kEmitCompilerReadBarrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - // The output overlaps in case of long: we don't want the low move to overwrite - // the object's location. - locations->SetOut(Location::RequiresRegister(), - (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap - : Location::kNoOutputOverlap); + // The output overlaps in case of long: we don't want the low move + // to overwrite the object's location. Likewise, in the case of + // an object field get with read barriers enabled, we do not want + // the move to overwrite the object's location, as we need it to emit + // the read barrier. + locations->SetOut( + Location::RequiresRegister(), + (object_field_get_with_read_barrier || instruction->GetType() == Primitive::kPrimLong) ? + Location::kOutputOverlap : + Location::kNoOutputOverlap); } if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { @@ -4007,7 +4249,8 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - Register base = locations->InAt(0).AsRegister<Register>(); + Location base_loc = locations->InAt(0); + Register base = base_loc.AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); Primitive::Type field_type = field_info.GetFieldType(); @@ -4082,7 +4325,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(out.AsRegister<Register>()); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); } } @@ -4410,24 +4653,35 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { } void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - // The output overlaps in case of long: we don't want the low move to overwrite - // the array's location. - locations->SetOut(Location::RequiresRegister(), - (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap - : Location::kNoOutputOverlap); + // The output overlaps in case of long: we don't want the low move + // to overwrite the array's location. Likewise, in the case of an + // object array get with read barriers enabled, we do not want the + // move to overwrite the array's location, as we need it to emit + // the read barrier. + locations->SetOut( + Location::RequiresRegister(), + (instruction->GetType() == Primitive::kPrimLong || object_array_get_with_read_barrier) ? + Location::kOutputOverlap : + Location::kNoOutputOverlap); } } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type type = instruction->GetType(); @@ -4482,6 +4736,9 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { @@ -4546,8 +4803,17 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } if (type == Primitive::kPrimNot) { - Register out = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(out); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Location out = locations->Out(); + if (index.IsConstant()) { + uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); + } } } @@ -4558,14 +4824,18 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // optimization. Primitive::Type value_type = instruction->GetComponentType(); + bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - - bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); bool is_byte_type = (value_type == Primitive::kPrimBoolean) || (value_type == Primitive::kPrimByte); @@ -4586,20 +4856,21 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + locations->AddTemp(Location::RegisterLocation(ECX)); // Possibly used for read barrier too. } } void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register array = locations->InAt(0).AsRegister<Register>(); + Location array_loc = locations->InAt(0); + Register array = array_loc.AsRegister<Register>(); Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -4639,6 +4910,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<Register>(), TIMES_4, offset); + if (!value.IsRegister()) { // Just setting null. DCHECK(instruction->InputAt(2)->IsNullConstant()); @@ -4646,7 +4918,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { __ movl(address, Immediate(0)); codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); - DCHECK(!may_need_runtime_call); + DCHECK(!may_need_runtime_call_for_type_check); break; } @@ -4655,7 +4927,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { NearLabel done, not_null, do_put; SlowPathCode* slow_path = nullptr; Register temp = locations->GetTemp(0).AsRegister<Register>(); - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -4667,22 +4939,62 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { __ Bind(¬_null); } - __ movl(temp, Address(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, component_offset)); - // No need to poison/unpoison, we're comparing two poisoned references. - __ cmpl(temp, Address(register_value, class_offset)); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - __ j(kEqual, &do_put); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, super_offset)); - // No need to unpoison, we're comparing against null.. - __ testl(temp, temp); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ movl(temp2, temp); + // // /* HeapReference<Class> */ temp = temp->component_type_ + // __ movl(temp, Address(temp, component_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp_loc, temp_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = register_value->klass_ + // __ movl(temp2, Address(register_value, class_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); + // + // __ cmpl(temp, temp2); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ jmp(slow_path->GetEntryLabel()); } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); + // /* HeapReference<Class> */ temp = array->klass_ + __ movl(temp, Address(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ movl(temp, Address(temp, component_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor the object reference in `register_value->klass`, as + // we are comparing two poisoned references. + __ cmpl(temp, Address(register_value, class_offset)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + __ j(kEqual, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->super_class_ + __ movl(temp, Address(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ testl(temp, temp); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ j(kNotEqual, slow_path->GetEntryLabel()); + } } } @@ -4693,7 +5005,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { } else { __ movl(address, register_value); } - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -4708,6 +5020,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { break; } + case Primitive::kPrimInt: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Address address = index.IsConstant() @@ -5178,7 +5491,8 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(EAX)); + Location::RegisterLocation(EAX), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { @@ -5192,18 +5506,40 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { return; } - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); + if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leal(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } } else { DCHECK(cls->CanCallRuntime()); - __ movl(out, Address( - current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - // TODO: We will need a read barrier here. + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + __ movl(out, Address(current_method, + ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ leal(out, Address(out, cache_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ movl(out, Address(out, cache_offset)); + } SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); @@ -5257,12 +5593,35 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { codegen_->AddSlowPath(slow_path); LocationSummary* locations = load->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leal(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - // TODO: We will need a read barrier here. + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ leal(out, Address(out, cache_offset)); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ movl(out, Address(out, cache_offset)); + } + __ testl(out, out); __ j(kEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -5306,40 +5665,44 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86 uses this register too. - locations->SetOut(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetOut(Location::RegisterLocation(EAX)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86 uses this "out" register too. + locations->SetOut(Location::RequiresRegister()); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location cls = locations->InAt(1); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5354,15 +5717,9 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ j(kEqual, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ movl(target, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ movl(out, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -5379,13 +5736,23 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ jmp(&done); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. NearLabel loop; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5402,6 +5769,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop, success; @@ -5413,8 +5781,17 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &success); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -5426,6 +5803,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. NearLabel exact_check; @@ -5436,9 +5814,18 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &exact_check); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ movl(out, Address(out, component_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5449,6 +5836,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ jmp(&done); break; } + case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<Register>()); @@ -5457,8 +5845,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -5467,13 +5855,25 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved & interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ jmp(&done); } @@ -5498,75 +5898,73 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - break; - case TypeCheckKind::kInterfaceCheck: - case TypeCheckKind::kUnresolvedCheck: - call_kind = LocationSummary::kCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; case TypeCheckKind::kArrayCheck: + case TypeCheckKind::kUnresolvedCheck: + case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86 uses this register too. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86 uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location cls = locations->InAt(1); - Register temp = locations->WillCall() - ? kNoRegister - : locations->GetTemp(0).AsRegister<Register>(); - + Location temp_loc = locations->GetTemp(0); + Register temp = temp_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCode* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCode* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); - NearLabel done, abstract_entry; + NearLabel done; // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { __ testl(obj, obj); __ j(kEqual, &done); } - if (locations->WillCall()) { - __ movl(obj, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(obj); - } else { - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { @@ -5577,19 +5975,44 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } // Jump to slow path for throwing the exception or doing a // more involved array check. - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - NearLabel loop, success; + NearLabel loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. __ testl(temp, temp); - // Jump to the slow path to throw the exception. - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<Register>()); } else { @@ -5599,6 +6022,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kNotEqual, &loop); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop; @@ -5610,16 +6034,39 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &done); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ testl(temp, temp); __ j(kNotEqual, &loop); - // Jump to the slow path to throw the exception. - __ jmp(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + NearLabel check_non_primitive_component_type; if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<Register>()); } else { @@ -5627,29 +6074,67 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &done); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ movl(temp, Address(temp, component_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. __ testl(temp, temp); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kEqual, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved & + // interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ jmp(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) { @@ -5800,6 +6285,82 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr } } +void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorX86::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<Register>()); + } +} + +void CodeGeneratorX86::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 177a059df8..064051c7f4 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -227,14 +227,12 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target); - void GenerateCompareTestAndBranch(HIf* if_inst, - HCondition* condition, + Label* false_target); + void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, - Label* false_target, - Label* always_true_target); + Label* false_target); void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void HandleGoto(HInstruction* got, HBasicBlock* successor); @@ -397,6 +395,51 @@ class CodeGeneratorX86 : public CodeGenerator { void Finalize(CodeAllocator* allocator) OVERRIDE; + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index e2ad6673c7..4088160b3f 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -34,6 +34,9 @@ namespace art { +template<class MirrorType> +class GcRoot; + namespace x86_64 { static constexpr int kCurrentMethodStackOffset = 0; @@ -52,16 +55,16 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), + instruction_, + instruction_->GetDexPc(), + this); } bool IsFatal() const OVERRIDE { return true; } @@ -78,16 +81,16 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), + instruction_, + instruction_->GetDexPc(), + this); } bool IsFatal() const OVERRIDE { return true; } @@ -139,18 +142,18 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { : instruction_(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), + instruction_, + instruction_->GetDexPc(), + this); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { - __ jmp(x64_codegen->GetLabelOf(successor_)); + __ jmp(x86_64_codegen->GetLabelOf(successor_)); } } @@ -180,7 +183,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. @@ -196,8 +199,10 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), - instruction_, instruction_->GetDexPc(), this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), + instruction_, + instruction_->GetDexPc(), + this); } bool IsFatal() const OVERRIDE { return true; } @@ -222,22 +227,25 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = at_->GetLocations(); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex())); - x64_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) - : QUICK_ENTRY_POINT(pInitializeType), - at_, dex_pc_, this); + x86_64_codegen->InvokeRuntime(do_clinit_ ? + QUICK_ENTRY_POINT(pInitializeStaticStorage) : + QUICK_ENTRY_POINT(pInitializeType), + at_, + dex_pc_, + this); Location out = locations->Out(); // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); - x64_codegen->Move(out, Location::RegisterLocation(RAX)); + x86_64_codegen->Move(out, Location::RegisterLocation(RAX)); } RestoreLiveRegisters(codegen, locations); @@ -271,18 +279,18 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction_->GetStringIndex())); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), - instruction_, - instruction_->GetDexPc(), - this); - x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), + instruction_, + instruction_->GetDexPc(), + this); + x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -308,18 +316,9 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -336,21 +335,24 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction_, - dex_pc, - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), + instruction_, + dex_pc, + this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction_, - dex_pc, - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), + instruction_, + dex_pc, + this); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } if (!is_fatal_) { if (instruction_->IsInstanceOf()) { - x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); + x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); } RestoreLiveRegisters(codegen, locations); @@ -375,15 +377,15 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); DCHECK(instruction_->IsDeoptimize()); HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - deoptimize, - deoptimize->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + deoptimize, + deoptimize->GetDexPc(), + this); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } @@ -421,11 +423,11 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction_, - instruction_->GetDexPc(), - this); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), + instruction_, + instruction_->GetDexPc(), + this); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -438,6 +440,219 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); }; +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial + // object has been overwritten by (or after) the heap object + // reference load to be instrumented, e.g.: + // + // __ movl(out, Address(out, offset)); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; +} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + CpuRegister reg_out = out_.AsRegister<CpuRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute real offset and store it in index_. + Register index_reg = index_.AsRegister<CpuRegister>().AsRegister(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::x86_64::X86_64Assembler::shll and + // art::x86_64::X86_64Assembler::AddImmediate below), but it + // has not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister(); + __ movl(CpuRegister(free_reg), CpuRegister(index_reg)); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the + // scale factor (2) cannot overflow in practice, as the + // runtime is unable to allocate object arrays with a size + // larger than 2^26 - 1 (that is, 2^28 - 4 bytes). + __ shll(CpuRegister(index_reg), Immediate(TIMES_4)); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ AddImmediate(CpuRegister(index_reg), Immediate(offset_)); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegister()); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_)); + } + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierForHeapReferenceSlowPathX86_64"; + } + + private: + CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister()); + size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return static_cast<CpuRegister>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on x86-64 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64); +}; + #undef __ #define __ down_cast<X86_64Assembler*>(GetAssembler())-> @@ -533,7 +748,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo method_reg = reg.AsRegister(); __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); } - // temp = temp->dex_cache_resolved_methods_; + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ movq(reg, Address(CpuRegister(method_reg), ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue())); @@ -578,10 +793,17 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); - // temp = object->GetClass(); DCHECK(receiver.IsRegister()); + // /* HeapReference<Class> */ temp = receiver->klass_ __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); __ movq(temp, Address(temp, method_offset)); @@ -672,9 +894,9 @@ static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, - const X86_64InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) + const X86_64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, @@ -728,7 +950,7 @@ Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const { LOG(FATAL) << "Unreachable type " << type; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { @@ -1082,26 +1304,19 @@ void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, __ j(X86_64FPCondition(cond->GetCondition()), true_label); } -void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr, - HCondition* condition, - Label* true_target, - Label* false_target, - Label* always_true_target) { +void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition, + Label* true_target_in, + Label* false_target_in) { + // Generated branching requires both targets to be explicit. If either of the + // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. + Label fallthrough_target; + Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; + Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; + LocationSummary* locations = condition->GetLocations(); Location left = locations->InAt(0); Location right = locations->InAt(1); - // We don't want true_target as a nullptr. - if (true_target == nullptr) { - true_target = always_true_target; - } - bool falls_through = (false_target == nullptr); - - // FP compares don't like null false_targets. - if (false_target == nullptr) { - false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - } - Primitive::Type type = condition->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: { @@ -1160,135 +1375,140 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr, LOG(FATAL) << "Unexpected condition type " << type; } - if (!falls_through) { + if (false_target != &fallthrough_target) { __ jmp(false_target); } + + if (fallthrough_target.IsLinked()) { + __ Bind(&fallthrough_target); + } +} + +static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { + // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS + // are set only strictly before `branch`. We can't use the eflags on long + // conditions if they are materialized due to the complex branching. + return cond->IsCondition() && + cond->GetNext() == branch && + !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType()); } void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - if (cond->IsIntConstant()) { + Label* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ jmp(always_true_target); + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ jmp(true_target); } - return; } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ jmp(false_target); + } } - } else { - HCondition* condition = cond->AsCondition(); - bool is_materialized = condition == nullptr || condition->NeedsMaterialization(); - // Moves do not affect the eflags register, so if the condition is - // evaluated just before the if, we don't need to evaluate it - // again. We can't use the eflags on FP conditions if they are - // materialized due to the complex branching. - Primitive::Type type = (condition != nullptr) - ? cond->InputAt(0)->GetType() - : Primitive::kPrimInt; - bool eflags_set = condition != nullptr - && condition->IsBeforeWhenDisregardMoves(instruction) - && !Primitive::IsFloatingPointType(type); - // Can we optimize the jump if we know that the next block is the true case? - bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition); - - if (is_materialized) { - if (!eflags_set) { - // Materialized condition, compare against 0. - Location lhs = instruction->GetLocations()->InAt(0); - if (lhs.IsRegister()) { - __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); - } else { - __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), - Immediate(0)); - } - if (can_jump_to_false) { - __ j(kEqual, false_target); - return; - } - __ j(kNotEqual, true_target); + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { + if (AreEflagsSetFrom(cond, instruction)) { + if (true_target == nullptr) { + __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target); } else { - if (can_jump_to_false) { - __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); - return; - } - __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); + __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); } } else { - // Condition has not been materialized, use its inputs as the - // comparison and its condition as the branch condition. - - // Is this a long or FP comparison that has been folded into the HCondition? - if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { - // Generate the comparison directly. - GenerateCompareTestAndBranch(instruction->AsIf(), condition, - true_target, false_target, always_true_target); - return; + // Materialized condition, compare against 0. + Location lhs = instruction->GetLocations()->InAt(condition_input_index); + if (lhs.IsRegister()) { + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); + } else { + __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); } - - Location lhs = cond->GetLocations()->InAt(0); - Location rhs = cond->GetLocations()->InAt(1); - if (rhs.IsRegister()) { - __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); - } else if (rhs.IsConstant()) { - int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - if (constant == 0) { - __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); - } else { - __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); - } + if (true_target == nullptr) { + __ j(kEqual, false_target); } else { - __ cmpl(lhs.AsRegister<CpuRegister>(), - Address(CpuRegister(RSP), rhs.GetStackIndex())); + __ j(kNotEqual, true_target); } + } + } else { + // Condition has not been materialized, use its inputs as the + // comparison and its condition as the branch condition. + HCondition* condition = cond->AsCondition(); - if (can_jump_to_false) { - __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); - return; - } + // If this is a long or FP comparison that has been folded into + // the HCondition, generate the comparison directly. + Primitive::Type type = condition->InputAt(0)->GetType(); + if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { + GenerateCompareTestAndBranch(condition, true_target, false_target); + return; + } + Location lhs = condition->GetLocations()->InAt(0); + Location rhs = condition->GetLocations()->InAt(1); + if (rhs.IsRegister()) { + __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); + } else if (rhs.IsConstant()) { + int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); + if (constant == 0) { + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); + } else { + __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); + } + } else { + __ cmpl(lhs.AsRegister<CpuRegister>(), + Address(CpuRegister(RSP), rhs.GetStackIndex())); + } + if (true_target == nullptr) { + __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); + } else { __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); } } - if (false_target != nullptr) { + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { __ jmp(false_target); } } void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::Any()); } } void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { - Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - Label* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::Any()); } } @@ -1297,8 +1517,10 @@ void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathX86_64(deoptimize); codegen_->AddSlowPath(slow_path); - Label* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderX86_64::VisitLocal(HLocal* local) { @@ -1836,7 +2058,7 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Typ LOG(FATAL) << "Unexpected parameter type " << type; break; } - return Location(); + return Location::NoLocation(); } void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { @@ -1907,7 +2129,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -1920,31 +2141,41 @@ void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); + LocationSummary* locations = invoke->GetLocations(); + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); - // Set the hidden argument. - CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(); + // Set the hidden argument. This is safe to do this here, as RAX + // won't be modified thereafter, before the `call` instruction. + DCHECK_EQ(RAX, hidden_reg.AsRegister()); codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); + // /* HeapReference<Class> */ temp = temp->klass_ __ movl(temp, Address(temp, class_offset)); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetImtEntryAt(method_offset); __ movq(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); - __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64WordSize).SizeValue())); + __ call(Address(temp, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -3686,13 +3917,23 @@ void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps for an object field get when read barriers + // are enabled: we do not want the move to overwrite the object's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } @@ -3701,7 +3942,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); + Location base_loc = locations->InAt(0); + CpuRegister base = base_loc.AsRegister<CpuRegister>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); Primitive::Type field_type = field_info.GetFieldType(); @@ -3761,7 +4003,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>()); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); } } @@ -4079,20 +4321,31 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { } void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps for an object array get when read barriers + // are enabled: we do not want the move to overwrite the array's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + Location obj_loc = locations->InAt(0); + CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location index = locations->InAt(1); Primitive::Type type = instruction->GetType(); @@ -4147,8 +4400,9 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { - static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); if (index.IsConstant()) { @@ -4203,8 +4457,17 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ MaybeUnpoisonHeapReference(out); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Location out = locations->Out(); + if (index.IsConstant()) { + uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); + } } } @@ -4214,10 +4477,14 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); @@ -4229,18 +4496,24 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { if (needs_write_barrier) { // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + + // This first temporary register is possibly used for heap + // reference poisoning and/or read barrier emission too. + locations->AddTemp(Location::RequiresRegister()); + // This second temporary register is possibly used for read + // barrier emission too. locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister array = locations->InAt(0).AsRegister<CpuRegister>(); + Location array_loc = locations->InAt(0); + CpuRegister array = array_loc.AsRegister<CpuRegister>(); Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -4284,6 +4557,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); + if (!value.IsRegister()) { // Just setting null. DCHECK(instruction->InputAt(2)->IsNullConstant()); @@ -4312,22 +4586,62 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ Bind(¬_null); } - __ movl(temp, Address(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, component_offset)); - // No need to poison/unpoison, we're comparing two poisoned references. - __ cmpl(temp, Address(register_value, class_offset)); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - __ j(kEqual, &do_put); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, super_offset)); - // No need to unpoison the result, we're comparing against null. - __ testl(temp, temp); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ movl(temp2, temp); + // // /* HeapReference<Class> */ temp = temp->component_type_ + // __ movl(temp, Address(temp, component_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp_loc, temp_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = register_value->klass_ + // __ movl(temp2, Address(register_value, class_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); + // + // __ cmpl(temp, temp2); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ jmp(slow_path->GetEntryLabel()); } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); + // /* HeapReference<Class> */ temp = array->klass_ + __ movl(temp, Address(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ movl(temp, Address(temp, component_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor the object reference in `register_value->klass`, as + // we are comparing two poisoned references. + __ cmpl(temp, Address(register_value, class_offset)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + __ j(kEqual, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->super_class_ + __ movl(temp, Address(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ testl(temp, temp); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ j(kNotEqual, slow_path->GetEntryLabel()); + } } } @@ -4353,6 +4667,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } + case Primitive::kPrimInt: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Address address = index.IsConstant() @@ -4802,7 +5117,8 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(RAX)); + Location::RegisterLocation(RAX), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { @@ -4816,18 +5132,40 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { return; } - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); + if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leaq(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } } else { DCHECK(cls->CanCallRuntime()); - __ movq(out, Address( - current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - // TODO: We will need a read barrier here. + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + __ movq(out, Address(current_method, + ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ leaq(out, Address(out, cache_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ movl(out, Address(out, cache_offset)); + } SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); @@ -4872,12 +5210,35 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { codegen_->AddSlowPath(slow_path); LocationSummary* locations = load->GetLocations(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - // TODO: We will need a read barrier here. + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leaq(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ + __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ leaq(out, Address(out, cache_offset)); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ movl(out, Address(out, cache_offset)); + } + __ testl(out, out); __ j(kEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -4921,40 +5282,44 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86_64 uses this register too. - locations->SetOut(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetOut(Location::RegisterLocation(RAX)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86_64 uses this "out" register too. + locations->SetOut(Location::RequiresRegister()); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + Location obj_loc = locations->InAt(0); + CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location cls = locations->InAt(1); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -4969,15 +5334,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ j(kEqual, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - CpuRegister target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ movl(target, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ movl(out, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -4999,13 +5358,23 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. NearLabel loop, success; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5022,6 +5391,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop, success; @@ -5033,8 +5403,17 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &success); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -5046,6 +5425,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. NearLabel exact_check; @@ -5056,9 +5436,18 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &exact_check); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ movl(out, Address(out, component_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5069,6 +5458,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ jmp(&done); break; } + case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<CpuRegister>()); @@ -5077,8 +5467,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -5087,13 +5477,25 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved & interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ jmp(&done); } @@ -5118,58 +5520,60 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86_64 uses this register too. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + Location obj_loc = locations->InAt(0); + CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location cls = locations->InAt(1); - CpuRegister temp = locations->WillCall() - ? CpuRegister(kNoRegister) - : locations->GetTemp(0).AsRegister<CpuRegister>(); - + Location temp_loc = locations->GetTemp(0); + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCode* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCode* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); NearLabel done; // Avoid null check if we know obj is not null. @@ -5178,15 +5582,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); } - if (locations->WillCall()) { - __ movl(obj, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(obj); - } else { - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { @@ -5197,19 +5597,44 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // Jump to slow path for throwing the exception or doing a // more involved array check. - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - NearLabel loop; + NearLabel loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. __ testl(temp, temp); - // Jump to the slow path to throw the exception. - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); } else { @@ -5219,6 +5644,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kNotEqual, &loop); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop; @@ -5230,16 +5656,39 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &done); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ testl(temp, temp); __ j(kNotEqual, &loop); - // Jump to the slow path to throw the exception. - __ jmp(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + NearLabel check_non_primitive_component_type; if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); } else { @@ -5247,29 +5696,67 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &done); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ movl(temp, Address(temp, component_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. __ testl(temp, temp); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kEqual, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved & + // interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ jmp(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { @@ -5402,6 +5889,82 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in } } +void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<CpuRegister>()); + } +} + +void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 708450835d..145b1f33b4 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -217,14 +217,12 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void PushOntoFPStack(Location source, uint32_t temp_offset, uint32_t stack_adjustment, bool is_float); void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target); - void GenerateCompareTestAndBranch(HIf* if_inst, - HCondition* condition, + Label* false_target); + void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, - Label* false_target, - Label* always_true_target); + Label* false_target); void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void HandleGoto(HInstruction* got, HBasicBlock* successor); @@ -352,6 +350,51 @@ class CodeGeneratorX86_64 : public CodeGenerator { return isa_features_; } + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` provided (i.e., when it is different from + // Location::NoLocation()), the offset value passed to + // artReadBarrierSlow is adjusted to take `index` into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + int ConstantAreaStart() const { return constant_area_start_; } diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 040bf6a45e..371588fc47 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -55,7 +55,23 @@ ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() { bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathX86 slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect, + // ReadBarrierSlowPathX86 for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { @@ -1571,26 +1587,32 @@ void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) { GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>())); } -static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, - bool is_volatile, X86Assembler* assembler) { - Register base = locations->InAt(1).AsRegister<Register>(); - Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); - Location output = locations->Out(); +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile, + CodeGeneratorX86* codegen) { + X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + Location base_loc = locations->InAt(1); + Register base = base_loc.AsRegister<Register>(); + Location offset_loc = locations->InAt(2); + Register offset = offset_loc.AsRegisterPairLow<Register>(); + Location output_loc = locations->Out(); switch (type) { case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register output_reg = output.AsRegister<Register>(); - __ movl(output_reg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + Register output = output_loc.AsRegister<Register>(); + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); if (type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(output_reg); + codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc); } break; } case Primitive::kPrimLong: { - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); + Register output_lo = output_loc.AsRegisterPairLow<Register>(); + Register output_hi = output_loc.AsRegisterPairHigh<Register>(); if (is_volatile) { // Need to use a XMM to read atomically. XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); @@ -1613,8 +1635,13 @@ static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long, bool is_volatile) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1653,22 +1680,22 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); } @@ -1890,13 +1917,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); - // locked cmpxchg has full barrier semantics, and we don't need + // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. // Convert ZF into the boolean result. __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); + // In the case of the `UnsafeCASObject` intrinsic, accessing an + // object in the heap with LOCK CMPXCHG does not require a read + // barrier, as we do not keep a reference to this heap location. + // However, if heap poisoning is enabled, we need to unpoison the + // values that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value` has been moved to a temporary register, no need to @@ -1929,8 +1961,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code LOG(FATAL) << "Unexpected CAS type " << type; } - // locked cmpxchg has full barrier semantics, and we don't need - // scheduling barriers at this time. + // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we + // don't need scheduling barriers at this time. // Convert ZF into the boolean result. __ setb(kZero, out.AsRegister<Register>()); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index a29f3ef1d1..2d9f01b821 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -50,8 +50,24 @@ ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() { bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { Dispatch(invoke); - const LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + LocationSummary* res = invoke->GetLocations(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathX86_64 slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect, + // ReadBarrierSlowPathX86_64 for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { @@ -917,6 +933,10 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); } +// TODO: Implement read barriers in the SystemArrayCopy intrinsic. +// Note that this code path is not used (yet) because we do not +// intrinsify methods that can go into the IntrinsicSlowPathX86_64 +// slow path. void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1698,23 +1718,30 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true)); } -static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, - bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) { - CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); - CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); - CpuRegister trg = locations->Out().AsRegister<CpuRegister>(); +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile ATTRIBUTE_UNUSED, + CodeGeneratorX86_64* codegen) { + X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + Location base_loc = locations->InAt(1); + CpuRegister base = base_loc.AsRegister<CpuRegister>(); + Location offset_loc = locations->InAt(2); + CpuRegister offset = offset_loc.AsRegister<CpuRegister>(); + Location output_loc = locations->Out(); + CpuRegister output = locations->Out().AsRegister<CpuRegister>(); switch (type) { case Primitive::kPrimInt: case Primitive::kPrimNot: - __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); if (type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc); } break; case Primitive::kPrimLong: - __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); break; default: @@ -1724,8 +1751,13 @@ static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1754,22 +1786,22 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invo void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); } @@ -1961,13 +1993,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); - // locked cmpxchg has full barrier semantics, and we don't need + // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. // Convert ZF into the boolean result. __ setcc(kZero, out); __ movzxb(out, out); + // In the case of the `UnsafeCASObject` intrinsic, accessing an + // object in the heap with LOCK CMPXCHG does not require a read + // barrier, as we do not keep a reference to this heap location. + // However, if heap poisoning is enabled, we need to unpoison the + // values that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value_reg` has been moved to a temporary register, no need @@ -1992,7 +2029,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c LOG(FATAL) << "Unexpected CAS type " << type; } - // locked cmpxchg has full barrier semantics, and we don't need + // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. // Convert ZF into the boolean result. diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 6fbb6823d6..5b89cfef5a 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -119,19 +119,10 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { : ref_info_(ref_info), offset_(offset), index_(index), - declaring_class_def_index_(declaring_class_def_index), - may_become_unknown_(true) { + declaring_class_def_index_(declaring_class_def_index) { DCHECK(ref_info != nullptr); DCHECK((offset == kInvalidFieldOffset && index != nullptr) || (offset != kInvalidFieldOffset && index == nullptr)); - - if (ref_info->IsSingletonAndNotReturned()) { - // We try to track stores to singletons that aren't returned to eliminate the stores - // since values in singleton's fields cannot be killed due to aliasing. Those values - // can still be killed due to merging values since we don't build phi for merging heap - // values. SetMayBecomeUnknown(true) may be called later once such merge becomes possible. - may_become_unknown_ = false; - } } ReferenceInfo* GetReferenceInfo() const { return ref_info_; } @@ -148,21 +139,11 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { return index_ != nullptr; } - // Returns true if this heap location's value may become unknown after it's - // set to a value, due to merge of values, or killed due to aliasing. - bool MayBecomeUnknown() const { - return may_become_unknown_; - } - void SetMayBecomeUnknown(bool val) { - may_become_unknown_ = val; - } - private: ReferenceInfo* const ref_info_; // reference for instance/static field or array access. const size_t offset_; // offset of static/instance field. HInstruction* const index_; // index of an array element. const int16_t declaring_class_def_index_; // declaring class's def's dex index. - bool may_become_unknown_; // value may become kUnknownHeapValue. DISALLOW_COPY_AND_ASSIGN(HeapLocation); }; @@ -381,26 +362,13 @@ class HeapLocationCollector : public HGraphVisitor { return heap_locations_[heap_location_idx]; } - void VisitFieldAccess(HInstruction* field_access, - HInstruction* ref, - const FieldInfo& field_info, - bool is_store) { + void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { if (field_info.IsVolatile()) { has_volatile_ = true; } const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); - HeapLocation* location = GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); - // A store of a value may be eliminated if all future loads for that value can be eliminated. - // For a value that's stored into a singleton field, the value will not be killed due - // to aliasing. However if the value is set in a block that doesn't post dominate the definition, - // the value may be killed due to merging later. Before we have post dominating info, we check - // if the store is in the same block as the definition just to be conservative. - if (is_store && - location->GetReferenceInfo()->IsSingletonAndNotReturned() && - field_access->GetBlock() != ref->GetBlock()) { - location->SetMayBecomeUnknown(true); - } + GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); } void VisitArrayAccess(HInstruction* array, HInstruction* index) { @@ -409,20 +377,20 @@ class HeapLocationCollector : public HGraphVisitor { } void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; } void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); } void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; } @@ -464,9 +432,14 @@ class HeapLocationCollector : public HGraphVisitor { }; // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. +// A heap location can be set to kUnknownHeapValue when: +// - initially set a value. +// - killed due to aliasing, merging, invocation, or loop side effects. static HInstruction* const kUnknownHeapValue = reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-1)); + // Default heap value after an allocation. +// A heap location can be set to that value right after an allocation. static HInstruction* const kDefaultHeapValue = reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-2)); @@ -484,29 +457,17 @@ class LSEVisitor : public HGraphVisitor { kUnknownHeapValue, graph->GetArena()->Adapter(kArenaAllocLSE)), graph->GetArena()->Adapter(kArenaAllocLSE)), - removed_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)), - substitute_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)), + removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)), + substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)), + possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)), singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) { } void VisitBasicBlock(HBasicBlock* block) OVERRIDE { - int block_id = block->GetBlockId(); - ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + // Populate the heap_values array for this block. // TODO: try to reuse the heap_values array from one predecessor if possible. if (block->IsLoopHeader()) { - // We do a single pass in reverse post order. For loops, use the side effects as a hint - // to see if the heap values should be killed. - if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) { - // Leave all values as kUnknownHeapValue. - } else { - // Inherit the values from pre-header. - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - ArenaVector<HInstruction*>& pre_header_heap_values = - heap_values_for_[pre_header->GetBlockId()]; - for (size_t i = 0; i < heap_values.size(); i++) { - heap_values[i] = pre_header_heap_values[i]; - } - } + HandleLoopSideEffects(block); } else { MergePredecessorValues(block); } @@ -515,23 +476,34 @@ class LSEVisitor : public HGraphVisitor { // Remove recorded instructions that should be eliminated. void RemoveInstructions() { - size_t size = removed_instructions_.size(); - DCHECK_EQ(size, substitute_instructions_.size()); + size_t size = removed_loads_.size(); + DCHECK_EQ(size, substitute_instructions_for_loads_.size()); for (size_t i = 0; i < size; i++) { - HInstruction* instruction = removed_instructions_[i]; - DCHECK(instruction != nullptr); - HInstruction* substitute = substitute_instructions_[i]; - if (substitute != nullptr) { - // Keep tracing substitute till one that's not removed. - HInstruction* sub_sub = FindSubstitute(substitute); - while (sub_sub != substitute) { - substitute = sub_sub; - sub_sub = FindSubstitute(substitute); - } - instruction->ReplaceWith(substitute); + HInstruction* load = removed_loads_[i]; + DCHECK(load != nullptr); + DCHECK(load->IsInstanceFieldGet() || + load->IsStaticFieldGet() || + load->IsArrayGet()); + HInstruction* substitute = substitute_instructions_for_loads_[i]; + DCHECK(substitute != nullptr); + // Keep tracing substitute till one that's not removed. + HInstruction* sub_sub = FindSubstitute(substitute); + while (sub_sub != substitute) { + substitute = sub_sub; + sub_sub = FindSubstitute(substitute); } - instruction->GetBlock()->RemoveInstruction(instruction); + load->ReplaceWith(substitute); + load->GetBlock()->RemoveInstruction(load); } + + // At this point, stores in possibly_removed_stores_ can be safely removed. + size = possibly_removed_stores_.size(); + for (size_t i = 0; i < size; i++) { + HInstruction* store = possibly_removed_stores_[i]; + DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet()); + store->GetBlock()->RemoveInstruction(store); + } + // TODO: remove unnecessary allocations. // Eliminate instructions in singleton_new_instances_ that: // - don't have uses, @@ -541,6 +513,52 @@ class LSEVisitor : public HGraphVisitor { } private: + // If heap_values[index] is an instance field store, need to keep the store. + // This is necessary if a heap value is killed due to merging, or loop side + // effects (which is essentially merging also), since a load later from the + // location won't be eliminated. + void KeepIfIsStore(HInstruction* heap_value) { + if (heap_value == kDefaultHeapValue || + heap_value == kUnknownHeapValue || + !heap_value->IsInstanceFieldSet()) { + return; + } + auto idx = std::find(possibly_removed_stores_.begin(), + possibly_removed_stores_.end(), heap_value); + if (idx != possibly_removed_stores_.end()) { + // Make sure the store is kept. + possibly_removed_stores_.erase(idx); + } + } + + void HandleLoopSideEffects(HBasicBlock* block) { + DCHECK(block->IsLoopHeader()); + int block_id = block->GetBlockId(); + ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + ArenaVector<HInstruction*>& pre_header_heap_values = + heap_values_for_[pre_header->GetBlockId()]; + // We do a single pass in reverse post order. For loops, use the side effects as a hint + // to see if the heap values should be killed. + if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) { + for (size_t i = 0; i < pre_header_heap_values.size(); i++) { + // heap value is killed by loop side effects, need to keep the last store. + KeepIfIsStore(pre_header_heap_values[i]); + } + if (kIsDebugBuild) { + // heap_values should all be kUnknownHeapValue that it is inited with. + for (size_t i = 0; i < heap_values.size(); i++) { + DCHECK_EQ(heap_values[i], kUnknownHeapValue); + } + } + } else { + // Inherit the values from pre-header. + for (size_t i = 0; i < heap_values.size(); i++) { + heap_values[i] = pre_header_heap_values[i]; + } + } + } + void MergePredecessorValues(HBasicBlock* block) { const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors(); if (predecessors.size() == 0) { @@ -548,16 +566,25 @@ class LSEVisitor : public HGraphVisitor { } ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { - HInstruction* value = heap_values_for_[predecessors[0]->GetBlockId()][i]; - if (value != kUnknownHeapValue) { + HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i]; + heap_values[i] = pred0_value; + if (pred0_value != kUnknownHeapValue) { for (size_t j = 1; j < predecessors.size(); j++) { - if (heap_values_for_[predecessors[j]->GetBlockId()][i] != value) { - value = kUnknownHeapValue; + HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i]; + if (pred_value != pred0_value) { + heap_values[i] = kUnknownHeapValue; break; } } } - heap_values[i] = value; + + if (heap_values[i] == kUnknownHeapValue) { + // Keep the last store in each predecessor since future loads cannot be eliminated. + for (size_t j = 0; j < predecessors.size(); j++) { + ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()]; + KeepIfIsStore(pred_values[i]); + } + } } } @@ -616,21 +643,30 @@ class LSEVisitor : public HGraphVisitor { HInstruction* heap_value = heap_values[idx]; if (heap_value == kDefaultHeapValue) { HInstruction* constant = GetDefaultValue(instruction->GetType()); - removed_instructions_.push_back(instruction); - substitute_instructions_.push_back(constant); + removed_loads_.push_back(instruction); + substitute_instructions_for_loads_.push_back(constant); heap_values[idx] = constant; return; } + if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) { + HInstruction* store = heap_value; + // This load must be from a singleton since it's from the same field + // that a "removed" store puts the value. That store must be to a singleton's field. + DCHECK(ref_info->IsSingleton()); + // Get the real heap value of the store. + heap_value = store->InputAt(1); + } if ((heap_value != kUnknownHeapValue) && // Keep the load due to possible I/F, J/D array aliasing. // See b/22538329 for details. (heap_value->GetType() == instruction->GetType())) { - removed_instructions_.push_back(instruction); - substitute_instructions_.push_back(heap_value); + removed_loads_.push_back(instruction); + substitute_instructions_for_loads_.push_back(heap_value); TryRemovingNullCheck(instruction); return; } + // Load isn't eliminated. if (heap_value == kUnknownHeapValue) { // Put the load as the value into the HeapLocation. // This acts like GVN but with better aliasing analysis. @@ -662,51 +698,63 @@ class LSEVisitor : public HGraphVisitor { ArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; - bool redundant_store = false; + bool same_value = false; + bool possibly_redundant = false; if (Equal(heap_value, value)) { // Store into the heap location with the same value. - redundant_store = true; + same_value = true; } else if (index != nullptr) { // For array element, don't eliminate stores since it can be easily aliased // with non-constant index. } else if (!heap_location_collector_.MayDeoptimize() && - ref_info->IsSingletonAndNotReturned() && - !heap_location_collector_.GetHeapLocation(idx)->MayBecomeUnknown()) { - // Store into a field of a singleton that's not returned. And that value cannot be - // killed due to merge. It's redundant since future loads will get the value - // set by this instruction. - Primitive::Type type = Primitive::kPrimVoid; - if (instruction->IsInstanceFieldSet()) { - type = instruction->AsInstanceFieldSet()->GetFieldInfo().GetFieldType(); - } else if (instruction->IsStaticFieldSet()) { - type = instruction->AsStaticFieldSet()->GetFieldInfo().GetFieldType(); - } else { - DCHECK(false) << "Must be an instance/static field set instruction."; - } - if (value->GetType() != type) { - // I/F, J/D aliasing should not happen for fields. - DCHECK(Primitive::IsIntegralType(value->GetType())); - DCHECK(!Primitive::Is64BitType(value->GetType())); - DCHECK(Primitive::IsIntegralType(type)); - DCHECK(!Primitive::Is64BitType(type)); - // Keep the store since the corresponding load isn't eliminated due to different types. - // TODO: handle the different int types so that we can eliminate this store. - redundant_store = false; + ref_info->IsSingletonAndNotReturned()) { + // Store into a field of a singleton that's not returned. The value cannot be + // killed due to aliasing/invocation. It can be redundant since future loads can + // directly get the value set by this instruction. The value can still be killed due to + // merging or loop side effects. Stores whose values are killed due to merging/loop side + // effects later will be removed from possibly_removed_stores_ when that is detected. + possibly_redundant = true; + HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance(); + DCHECK(new_instance != nullptr); + if (new_instance->IsFinalizable()) { + // Finalizable objects escape globally. Need to keep the store. + possibly_redundant = false; } else { - redundant_store = true; + HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); + if (loop_info != nullptr) { + // instruction is a store in the loop so the loop must does write. + DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); + + if (loop_info->IsLoopInvariant(original_ref, false)) { + DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); + // Keep the store since its value may be needed at the loop header. + possibly_redundant = false; + } else { + // The singleton is created inside the loop. Value stored to it isn't needed at + // the loop header. This is true for outer loops also. + } + } } - // TODO: eliminate the store if the singleton object is not finalizable. - redundant_store = false; } - if (redundant_store) { - removed_instructions_.push_back(instruction); - substitute_instructions_.push_back(nullptr); - TryRemovingNullCheck(instruction); + if (same_value || possibly_redundant) { + possibly_removed_stores_.push_back(instruction); } - heap_values[idx] = value; + if (!same_value) { + if (possibly_redundant) { + DCHECK(instruction->IsInstanceFieldSet()); + // Put the store as the heap value. If the value is loaded from heap + // by a load later, this store isn't really redundant. + heap_values[idx] = instruction; + } else { + heap_values[idx] = value; + } + } // This store may kill values in other heap locations due to aliasing. for (size_t i = 0; i < heap_values.size(); i++) { + if (i == idx) { + continue; + } if (heap_values[i] == value) { // Same value should be kept even if aliasing happens. continue; @@ -834,9 +882,10 @@ class LSEVisitor : public HGraphVisitor { return; } if (!heap_location_collector_.MayDeoptimize() && - ref_info->IsSingletonAndNotReturned()) { - // The allocation might be eliminated. - singleton_new_instances_.push_back(new_instance); + ref_info->IsSingletonAndNotReturned() && + !new_instance->IsFinalizable() && + !new_instance->CanThrow()) { + // TODO: add new_instance to singleton_new_instances_ and enable allocation elimination. } ArenaVector<HInstruction*>& heap_values = heap_values_for_[new_instance->GetBlock()->GetBlockId()]; @@ -854,10 +903,10 @@ class LSEVisitor : public HGraphVisitor { // Find an instruction's substitute if it should be removed. // Return the same instruction if it should not be removed. HInstruction* FindSubstitute(HInstruction* instruction) { - size_t size = removed_instructions_.size(); + size_t size = removed_loads_.size(); for (size_t i = 0; i < size; i++) { - if (removed_instructions_[i] == instruction) { - return substitute_instructions_[i]; + if (removed_loads_[i] == instruction) { + return substitute_instructions_for_loads_[i]; } } return instruction; @@ -871,8 +920,13 @@ class LSEVisitor : public HGraphVisitor { // We record the instructions that should be eliminated but may be // used by heap locations. They'll be removed in the end. - ArenaVector<HInstruction*> removed_instructions_; - ArenaVector<HInstruction*> substitute_instructions_; + ArenaVector<HInstruction*> removed_loads_; + ArenaVector<HInstruction*> substitute_instructions_for_loads_; + + // Stores in this list may be removed from the list later when it's + // found that the store cannot be eliminated. + ArenaVector<HInstruction*> possibly_removed_stores_; + ArenaVector<HInstruction*> singleton_new_instances_; DISALLOW_COPY_AND_ASSIGN(LSEVisitor); diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 1181007666..63bbc2cd0a 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -594,6 +594,10 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { return intrinsified_; } + void SetIntrinsified(bool intrinsified) { + intrinsified_ = intrinsified; + } + private: ArenaVector<Location> inputs_; ArenaVector<Location> temps_; @@ -613,7 +617,7 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { RegisterSet live_registers_; // Whether these are locations for an intrinsified call. - const bool intrinsified_; + bool intrinsified_; ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint); ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 2160601e06..b68ea0fd9f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1436,7 +1436,7 @@ class SideEffects : public ValueObject { return flags_ == (kAllChangeBits | kAllDependOnBits); } - // Returns true if this may read something written by other. + // Returns true if `this` may read something written by `other`. bool MayDependOn(SideEffects other) const { const uint64_t depends_on_flags = (flags_ & kAllDependOnBits) >> kChangeBits; return (other.flags_ & depends_on_flags); @@ -3643,10 +3643,14 @@ class HNewInstance : public HExpression<1> { uint32_t dex_pc, uint16_t type_index, const DexFile& dex_file, + bool can_throw, + bool finalizable, QuickEntrypointEnum entrypoint) : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc), type_index_(type_index), dex_file_(dex_file), + can_throw_(can_throw), + finalizable_(finalizable), entrypoint_(entrypoint) { SetRawInputAt(0, current_method); } @@ -3656,11 +3660,13 @@ class HNewInstance : public HExpression<1> { // Calls runtime so needs an environment. bool NeedsEnvironment() const OVERRIDE { return true; } - // It may throw when called on: - // - interfaces - // - abstract/innaccessible/unknown classes - // TODO: optimize when possible. - bool CanThrow() const OVERRIDE { return true; } + + // It may throw when called on type that's not instantiable/accessible. + // It can throw OOME. + // TODO: distinguish between the two cases so we can for example allow allocation elimination. + bool CanThrow() const OVERRIDE { return can_throw_ || true; } + + bool IsFinalizable() const { return finalizable_; } bool CanBeNull() const OVERRIDE { return false; } @@ -3671,6 +3677,8 @@ class HNewInstance : public HExpression<1> { private: const uint16_t type_index_; const DexFile& dex_file_; + const bool can_throw_; + const bool finalizable_; const QuickEntrypointEnum entrypoint_; DISALLOW_COPY_AND_ASSIGN(HNewInstance); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 2be0680561..27ee47296c 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -383,6 +383,13 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { || instruction_set == kX86_64; } +// Read barrier are supported only on x86 and x86-64 at the moment. +// TODO: Add support for other architectures and remove this function +static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { + return instruction_set == kX86 + || instruction_set == kX86_64; +} + static void RunOptimizations(HOptimization* optimizations[], size_t length, PassObserver* pass_observer) { @@ -673,6 +680,12 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, return nullptr; } + // When read barriers are enabled, do not attempt to compile for + // instruction sets that have no read barrier support. + if (kEmitCompilerReadBarrier && !InstructionSetSupportsReadBarrier(instruction_set)) { + return nullptr; + } + if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) { MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological); return nullptr; @@ -841,9 +854,14 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, if (kIsDebugBuild && IsCompilingWithCoreImage() && - IsInstructionSetSupported(compiler_driver->GetInstructionSet())) { - // For testing purposes, we put a special marker on method names that should be compiled - // with this compiler. This makes sure we're not regressing. + IsInstructionSetSupported(compiler_driver->GetInstructionSet()) && + (!kEmitCompilerReadBarrier || + InstructionSetSupportsReadBarrier(compiler_driver->GetInstructionSet()))) { + // For testing purposes, we put a special marker on method names + // that should be compiled with this compiler (when the the + // instruction set is supported -- and has support for read + // barriers, if they are enabled). This makes sure we're not + // regressing. std::string method_name = PrettyMethod(method_idx, dex_file); bool shouldCompile = method_name.find("$opt$") != std::string::npos; DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index ecc085b985..0d05c49fc5 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -614,23 +614,36 @@ ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& } bool is_exact = a.IsExact() && b.IsExact(); - Handle<mirror::Class> type_handle; + ReferenceTypeInfo::TypeHandle result_type_handle; + ReferenceTypeInfo::TypeHandle a_type_handle = a.GetTypeHandle(); + ReferenceTypeInfo::TypeHandle b_type_handle = b.GetTypeHandle(); + bool a_is_interface = a_type_handle->IsInterface(); + bool b_is_interface = b_type_handle->IsInterface(); if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) { - type_handle = a.GetTypeHandle(); + result_type_handle = a_type_handle; } else if (a.IsSupertypeOf(b)) { - type_handle = a.GetTypeHandle(); + result_type_handle = a_type_handle; is_exact = false; } else if (b.IsSupertypeOf(a)) { - type_handle = b.GetTypeHandle(); + result_type_handle = b_type_handle; + is_exact = false; + } else if (!a_is_interface && !b_is_interface) { + result_type_handle = handles_->NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle)); is_exact = false; } else { - // TODO: Find the first common super class. - type_handle = object_class_handle_; + // This can happen if: + // - both types are interfaces. TODO(calin): implement + // - one is an interface, the other a class, and the type does not implement the interface + // e.g: + // void foo(Interface i, boolean cond) { + // Object o = cond ? i : new Object(); + // } + result_type_handle = object_class_handle_; is_exact = false; } - return ReferenceTypeInfo::Create(type_handle, is_exact); + return ReferenceTypeInfo::Create(result_type_handle, is_exact); } static void UpdateArrayGet(HArrayGet* instr, @@ -719,14 +732,35 @@ void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { instr->SetReferenceTypeInfo(new_rti); } +// NullConstant inputs are ignored during merging as they do not provide any useful information. +// If all the inputs are NullConstants then the type of the phi will be set to Object. void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { - ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); + size_t input_count = instr->InputCount(); + size_t first_input_index_not_null = 0; + while (first_input_index_not_null < input_count && + instr->InputAt(first_input_index_not_null)->IsNullConstant()) { + first_input_index_not_null++; + } + if (first_input_index_not_null == input_count) { + // All inputs are NullConstants, set the type to object. + // This may happen in the presence of inlining. + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false)); + return; + } + + ReferenceTypeInfo new_rti = instr->InputAt(first_input_index_not_null)->GetReferenceTypeInfo(); + if (new_rti.IsValid() && new_rti.IsObjectClass() && !new_rti.IsExact()) { // Early return if we are Object and inexact. instr->SetReferenceTypeInfo(new_rti); return; } - for (size_t i = 1; i < instr->InputCount(); i++) { + + for (size_t i = first_input_index_not_null + 1; i < input_count; i++) { + if (instr->InputAt(i)->IsNullConstant()) { + continue; + } new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo()); if (new_rti.IsValid() && new_rti.IsObjectClass()) { if (!new_rti.IsExact()) { diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc index ec45d6b2ca..9bbc354290 100644 --- a/compiler/optimizing/side_effects_test.cc +++ b/compiler/optimizing/side_effects_test.cc @@ -129,13 +129,13 @@ TEST(SideEffectsTest, NoDependences) { TEST(SideEffectsTest, VolatileDependences) { SideEffects volatile_write = - SideEffects::FieldWriteOfType(Primitive::kPrimInt, true); + SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ true); SideEffects any_write = - SideEffects::FieldWriteOfType(Primitive::kPrimInt, false); + SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false); SideEffects volatile_read = - SideEffects::FieldReadOfType(Primitive::kPrimByte, true); + SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ true); SideEffects any_read = - SideEffects::FieldReadOfType(Primitive::kPrimByte, false); + SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ false); EXPECT_FALSE(volatile_write.MayDependOn(any_read)); EXPECT_TRUE(any_read.MayDependOn(volatile_write)); @@ -151,15 +151,15 @@ TEST(SideEffectsTest, VolatileDependences) { TEST(SideEffectsTest, SameWidthTypes) { // Type I/F. testWriteAndReadDependence( - SideEffects::FieldWriteOfType(Primitive::kPrimInt, false), - SideEffects::FieldReadOfType(Primitive::kPrimFloat, false)); + SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false), + SideEffects::FieldReadOfType(Primitive::kPrimFloat, /* is_volatile */ false)); testWriteAndReadDependence( SideEffects::ArrayWriteOfType(Primitive::kPrimInt), SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); // Type L/D. testWriteAndReadDependence( - SideEffects::FieldWriteOfType(Primitive::kPrimLong, false), - SideEffects::FieldReadOfType(Primitive::kPrimDouble, false)); + SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false), + SideEffects::FieldReadOfType(Primitive::kPrimDouble, /* is_volatile */ false)); testWriteAndReadDependence( SideEffects::ArrayWriteOfType(Primitive::kPrimLong), SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); @@ -171,9 +171,9 @@ TEST(SideEffectsTest, AllWritesAndReads) { for (Primitive::Type type = Primitive::kPrimNot; type < Primitive::kPrimVoid; type = Primitive::Type(type + 1)) { - s = s.Union(SideEffects::FieldWriteOfType(type, false)); + s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayWriteOfType(type)); - s = s.Union(SideEffects::FieldReadOfType(type, false)); + s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayReadOfType(type)); } EXPECT_TRUE(s.DoesAllReadWrite()); @@ -225,10 +225,10 @@ TEST(SideEffectsTest, BitStrings) { "||DJ|||||", // note: DJ alias SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str()); SideEffects s = SideEffects::None(); - s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, false)); - s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, false)); + s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayWriteOfType(Primitive::kPrimShort)); - s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, false)); + s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); EXPECT_STREQ( |