diff options
Diffstat (limited to 'compiler/optimizing')
41 files changed, 2395 insertions, 585 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index 0ecc0d7433..be432c5a20 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -59,21 +59,23 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) { return new (allocator) HGreaterThan(lhs, rhs); } else if (cond->IsGreaterThan()) { return new (allocator) HLessThanOrEqual(lhs, rhs); - } else if (cond->IsGreaterThanOrEqual()) { + } else { + DCHECK(cond->IsGreaterThanOrEqual()); return new (allocator) HLessThan(lhs, rhs); } } else if (cond->IsIntConstant()) { HIntConstant* int_const = cond->AsIntConstant(); if (int_const->IsZero()) { - return graph->GetIntConstant1(); + return graph->GetIntConstant(1); } else { DCHECK(int_const->IsOne()); - return graph->GetIntConstant0(); + return graph->GetIntConstant(0); } + } else { + // General case when 'cond' is another instruction of type boolean. + // Negate with 'cond == 0'. + return new (allocator) HEqual(cond, graph->GetIntConstant(0)); } - - LOG(FATAL) << "Instruction " << cond->DebugName() << " used as a condition"; - UNREACHABLE(); } void HBooleanSimplifier::Run() { diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 24fa58317a..b3653fe903 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -52,12 +52,11 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { HParameterValue(0, Primitive::kPrimNot); // array HInstruction* parameter2 = new (&allocator) HParameterValue(0, Primitive::kPrimInt); // i - HInstruction* constant_1 = new (&allocator) HIntConstant(1); - HInstruction* constant_0 = new (&allocator) HIntConstant(0); entry->AddInstruction(parameter1); entry->AddInstruction(parameter2); - entry->AddInstruction(constant_1); - entry->AddInstruction(constant_0); + + HInstruction* constant_1 = graph->GetIntConstant(1); + HInstruction* constant_0 = graph->GetIntConstant(0); HBasicBlock* block1 = new (&allocator) HBasicBlock(graph); graph->AddBlock(block1); @@ -158,14 +157,12 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { HParameterValue(0, Primitive::kPrimNot); // array HInstruction* parameter2 = new (&allocator) HParameterValue(0, Primitive::kPrimInt); // i - HInstruction* constant_1 = new (&allocator) HIntConstant(1); - HInstruction* constant_0 = new (&allocator) HIntConstant(0); - HInstruction* constant_max_int = new (&allocator) HIntConstant(INT_MAX); entry->AddInstruction(parameter1); entry->AddInstruction(parameter2); - entry->AddInstruction(constant_1); - entry->AddInstruction(constant_0); - entry->AddInstruction(constant_max_int); + + HInstruction* constant_1 = graph->GetIntConstant(1); + HInstruction* constant_0 = graph->GetIntConstant(0); + HInstruction* constant_max_int = graph->GetIntConstant(INT_MAX); HBasicBlock* block1 = new (&allocator) HBasicBlock(graph); graph->AddBlock(block1); @@ -232,14 +229,12 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { HParameterValue(0, Primitive::kPrimNot); // array HInstruction* parameter2 = new (&allocator) HParameterValue(0, Primitive::kPrimInt); // i - HInstruction* constant_1 = new (&allocator) HIntConstant(1); - HInstruction* constant_0 = new (&allocator) HIntConstant(0); - HInstruction* constant_max_int = new (&allocator) HIntConstant(INT_MAX); entry->AddInstruction(parameter1); entry->AddInstruction(parameter2); - entry->AddInstruction(constant_1); - entry->AddInstruction(constant_0); - entry->AddInstruction(constant_max_int); + + HInstruction* constant_1 = graph->GetIntConstant(1); + HInstruction* constant_0 = graph->GetIntConstant(0); + HInstruction* constant_max_int = graph->GetIntConstant(INT_MAX); HBasicBlock* block1 = new (&allocator) HBasicBlock(graph); graph->AddBlock(block1); @@ -303,15 +298,12 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot); - HInstruction* constant_5 = new (&allocator) HIntConstant(5); - HInstruction* constant_4 = new (&allocator) HIntConstant(4); - HInstruction* constant_6 = new (&allocator) HIntConstant(6); - HInstruction* constant_1 = new (&allocator) HIntConstant(1); entry->AddInstruction(parameter); - entry->AddInstruction(constant_5); - entry->AddInstruction(constant_4); - entry->AddInstruction(constant_6); - entry->AddInstruction(constant_1); + + HInstruction* constant_5 = graph->GetIntConstant(5); + HInstruction* constant_4 = graph->GetIntConstant(4); + HInstruction* constant_6 = graph->GetIntConstant(6); + HInstruction* constant_1 = graph->GetIntConstant(1); HBasicBlock* block = new (&allocator) HBasicBlock(graph); graph->AddBlock(block); @@ -379,13 +371,11 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator, graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot); - HInstruction* constant_initial = new (allocator) HIntConstant(initial); - HInstruction* constant_increment = new (allocator) HIntConstant(increment); - HInstruction* constant_10 = new (allocator) HIntConstant(10); entry->AddInstruction(parameter); - entry->AddInstruction(constant_initial); - entry->AddInstruction(constant_increment); - entry->AddInstruction(constant_10); + + HInstruction* constant_initial = graph->GetIntConstant(initial); + HInstruction* constant_increment = graph->GetIntConstant(increment); + HInstruction* constant_10 = graph->GetIntConstant(10); HBasicBlock* block = new (allocator) HBasicBlock(graph); graph->AddBlock(block); @@ -518,15 +508,12 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator, graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot); - HInstruction* constant_initial = new (allocator) HIntConstant(initial); - HInstruction* constant_increment = new (allocator) HIntConstant(increment); - HInstruction* constant_minus_1 = new (allocator) HIntConstant(-1); - HInstruction* constant_10 = new (allocator) HIntConstant(10); entry->AddInstruction(parameter); - entry->AddInstruction(constant_initial); - entry->AddInstruction(constant_increment); - entry->AddInstruction(constant_minus_1); - entry->AddInstruction(constant_10); + + HInstruction* constant_initial = graph->GetIntConstant(initial); + HInstruction* constant_increment = graph->GetIntConstant(increment); + HInstruction* constant_minus_1 = graph->GetIntConstant(-1); + HInstruction* constant_10 = graph->GetIntConstant(10); HBasicBlock* block = new (allocator) HBasicBlock(graph); graph->AddBlock(block); @@ -651,12 +638,10 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* constant_10 = new (allocator) HIntConstant(10); - HInstruction* constant_initial = new (allocator) HIntConstant(initial); - HInstruction* constant_increment = new (allocator) HIntConstant(increment); - entry->AddInstruction(constant_10); - entry->AddInstruction(constant_initial); - entry->AddInstruction(constant_increment); + + HInstruction* constant_10 = graph->GetIntConstant(10); + HInstruction* constant_initial = graph->GetIntConstant(initial); + HInstruction* constant_increment = graph->GetIntConstant(increment); HBasicBlock* block = new (allocator) HBasicBlock(graph); graph->AddBlock(block); @@ -765,15 +750,12 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator, graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot); - HInstruction* constant_initial = new (allocator) HIntConstant(initial); - HInstruction* constant_1 = new (allocator) HIntConstant(1); - HInstruction* constant_10 = new (allocator) HIntConstant(10); - HInstruction* constant_minus_1 = new (allocator) HIntConstant(-1); entry->AddInstruction(parameter); - entry->AddInstruction(constant_initial); - entry->AddInstruction(constant_1); - entry->AddInstruction(constant_10); - entry->AddInstruction(constant_minus_1); + + HInstruction* constant_initial = graph->GetIntConstant(initial); + HInstruction* constant_1 = graph->GetIntConstant(1); + HInstruction* constant_10 = graph->GetIntConstant(10); + HInstruction* constant_minus_1 = graph->GetIntConstant(-1); HBasicBlock* block = new (allocator) HBasicBlock(graph); graph->AddBlock(block); @@ -893,13 +875,11 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot); - HInstruction* constant_0 = new (&allocator) HIntConstant(0); - HInstruction* constant_minus_1 = new (&allocator) HIntConstant(-1); - HInstruction* constant_1 = new (&allocator) HIntConstant(1); entry->AddInstruction(parameter); - entry->AddInstruction(constant_0); - entry->AddInstruction(constant_minus_1); - entry->AddInstruction(constant_1); + + HInstruction* constant_0 = graph->GetIntConstant(0); + HInstruction* constant_minus_1 = graph->GetIntConstant(-1); + HInstruction* constant_1 = graph->GetIntConstant(1); HBasicBlock* block = new (&allocator) HBasicBlock(graph); graph->AddBlock(block); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index f81935a7c6..2cdd5af9f3 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -215,7 +215,7 @@ void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) { DCHECK(fallthrough_target != nullptr); PotentiallyAddSuspendCheck(branch_target, dex_pc); HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); - T* comparison = new (arena_) T(value, GetIntConstant(0)); + T* comparison = new (arena_) T(value, graph_->GetIntConstant(0)); current_block_->AddInstruction(comparison); HInstruction* ifinst = new (arena_) HIf(comparison); current_block_->AddInstruction(ifinst); @@ -515,7 +515,7 @@ void HGraphBuilder::Binop_12x(const Instruction& instruction, template<typename T> void HGraphBuilder::Binop_22s(const Instruction& instruction, bool reverse) { HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); - HInstruction* second = GetIntConstant(instruction.VRegC_22s()); + HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22s()); if (reverse) { std::swap(first, second); } @@ -526,7 +526,7 @@ void HGraphBuilder::Binop_22s(const Instruction& instruction, bool reverse) { template<typename T> void HGraphBuilder::Binop_22b(const Instruction& instruction, bool reverse) { HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); - HInstruction* second = GetIntConstant(instruction.VRegC_22b()); + HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22b()); if (reverse) { std::swap(first, second); } @@ -824,9 +824,9 @@ void HGraphBuilder::BuildCheckedDivRem(uint16_t out_vreg, HInstruction* second = nullptr; if (second_is_constant) { if (type == Primitive::kPrimInt) { - second = GetIntConstant(second_vreg_or_constant); + second = graph_->GetIntConstant(second_vreg_or_constant); } else { - second = GetLongConstant(second_vreg_or_constant); + second = graph_->GetLongConstant(second_vreg_or_constant); } } else { second = LoadLocal(second_vreg_or_constant, type); @@ -890,7 +890,7 @@ void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc, bool is_range, uint32_t* args, uint32_t register_index) { - HInstruction* length = GetIntConstant(number_of_vreg_arguments); + HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments); QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) ? kQuickAllocArrayWithAccessCheck : kQuickAllocArray; @@ -910,7 +910,7 @@ void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc, temps.Add(object); for (size_t i = 0; i < number_of_vreg_arguments; ++i) { HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type); - HInstruction* index = GetIntConstant(i); + HInstruction* index = graph_->GetIntConstant(i); current_block_->AddInstruction( new (arena_) HArraySet(object, index, value, type, dex_pc)); } @@ -924,8 +924,8 @@ void HGraphBuilder::BuildFillArrayData(HInstruction* object, Primitive::Type anticipated_type, uint32_t dex_pc) { for (uint32_t i = 0; i < element_count; ++i) { - HInstruction* index = GetIntConstant(i); - HInstruction* value = GetIntConstant(data[i]); + HInstruction* index = graph_->GetIntConstant(i); + HInstruction* value = graph_->GetIntConstant(data[i]); current_block_->AddInstruction(new (arena_) HArraySet( object, index, value, anticipated_type, dex_pc)); } @@ -949,7 +949,7 @@ void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t // Implementation of this DEX instruction seems to be that the bounds check is // done before doing any stores. - HInstruction* last_index = GetIntConstant(payload->element_count - 1); + HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1); current_block_->AddInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc)); switch (payload->element_width) { @@ -990,8 +990,8 @@ void HGraphBuilder::BuildFillWideArrayData(HInstruction* object, uint32_t element_count, uint32_t dex_pc) { for (uint32_t i = 0; i < element_count; ++i) { - HInstruction* index = GetIntConstant(i); - HInstruction* value = GetLongConstant(data[i]); + HInstruction* index = graph_->GetIntConstant(i); + HInstruction* value = graph_->GetLongConstant(data[i]); current_block_->AddInstruction(new (arena_) HArraySet( object, index, value, Primitive::kPrimLong, dex_pc)); } @@ -1082,7 +1082,7 @@ void HGraphBuilder::BuildSwitchCaseHelper(const Instruction& instruction, size_t PotentiallyAddSuspendCheck(case_target, dex_pc); // The current case's value. - HInstruction* this_case_value = GetIntConstant(case_value_int); + HInstruction* this_case_value = graph_->GetIntConstant(case_value_int); // Compare value and this_case_value. HEqual* comparison = new (arena_) HEqual(value, this_case_value); @@ -1140,28 +1140,28 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 switch (instruction.Opcode()) { case Instruction::CONST_4: { int32_t register_index = instruction.VRegA(); - HIntConstant* constant = GetIntConstant(instruction.VRegB_11n()); + HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_11n()); UpdateLocal(register_index, constant); break; } case Instruction::CONST_16: { int32_t register_index = instruction.VRegA(); - HIntConstant* constant = GetIntConstant(instruction.VRegB_21s()); + HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21s()); UpdateLocal(register_index, constant); break; } case Instruction::CONST: { int32_t register_index = instruction.VRegA(); - HIntConstant* constant = GetIntConstant(instruction.VRegB_31i()); + HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_31i()); UpdateLocal(register_index, constant); break; } case Instruction::CONST_HIGH16: { int32_t register_index = instruction.VRegA(); - HIntConstant* constant = GetIntConstant(instruction.VRegB_21h() << 16); + HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21h() << 16); UpdateLocal(register_index, constant); break; } @@ -1172,7 +1172,7 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 int64_t value = instruction.VRegB_21s(); value <<= 48; value >>= 48; - HLongConstant* constant = GetLongConstant(value); + HLongConstant* constant = graph_->GetLongConstant(value); UpdateLocal(register_index, constant); break; } @@ -1183,14 +1183,14 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 int64_t value = instruction.VRegB_31i(); value <<= 32; value >>= 32; - HLongConstant* constant = GetLongConstant(value); + HLongConstant* constant = graph_->GetLongConstant(value); UpdateLocal(register_index, constant); break; } case Instruction::CONST_WIDE: { int32_t register_index = instruction.VRegA(); - HLongConstant* constant = GetLongConstant(instruction.VRegB_51l()); + HLongConstant* constant = graph_->GetLongConstant(instruction.VRegB_51l()); UpdateLocal(register_index, constant); break; } @@ -1198,7 +1198,7 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::CONST_WIDE_HIGH16: { int32_t register_index = instruction.VRegA(); int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48; - HLongConstant* constant = GetLongConstant(value); + HLongConstant* constant = graph_->GetLongConstant(value); UpdateLocal(register_index, constant); break; } @@ -2100,24 +2100,6 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 return true; } // NOLINT(readability/fn_size) -HIntConstant* HGraphBuilder::GetIntConstant(int32_t constant) { - switch (constant) { - case 0: return graph_->GetIntConstant0(); - case 1: return graph_->GetIntConstant1(); - default: { - HIntConstant* instruction = new (arena_) HIntConstant(constant); - graph_->AddConstant(instruction); - return instruction; - } - } -} - -HLongConstant* HGraphBuilder::GetLongConstant(int64_t constant) { - HLongConstant* instruction = new (arena_) HLongConstant(constant); - graph_->AddConstant(instruction); - return instruction; -} - HLocal* HGraphBuilder::GetLocalAt(int register_index) const { return locals_.Get(register_index); } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index b206660fdc..6a0738a7b9 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -96,8 +96,6 @@ class HGraphBuilder : public ValueObject { void MaybeUpdateCurrentBlock(size_t index); HBasicBlock* FindBlockStartingAt(int32_t index) const; - HIntConstant* GetIntConstant(int32_t constant); - HLongConstant* GetLongConstant(int64_t constant); void InitializeLocals(uint16_t count); HLocal* GetLocalAt(int register_index) const; void UpdateLocal(int register_index, HInstruction* instruction) const; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 97c470b730..1f95041a92 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -41,12 +41,6 @@ static bool ExpectedPairLayout(Location location) { static constexpr int kCurrentMethodStackOffset = 0; -static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 }; -static constexpr size_t kRuntimeParameterCoreRegistersLength = - arraysize(kRuntimeParameterCoreRegisters); -static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1, S2, S3 }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = - arraysize(kRuntimeParameterFpuRegisters); // We unconditionally allocate R5 to ensure we can do long operations // with baseline. static constexpr Register kCoreSavedRegisterForBaseline = R5; @@ -59,18 +53,6 @@ static constexpr SRegister kFpuCalleeSaves[] = // S registers. Therefore there is no need to block it. static constexpr DRegister DTMP = D31; -class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> { - public: - InvokeRuntimeCallingConvention() - : CallingConvention(kRuntimeParameterCoreRegisters, - kRuntimeParameterCoreRegistersLength, - kRuntimeParameterFpuRegisters, - kRuntimeParameterFpuRegistersLength) {} - - private: - DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); -}; - #define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())-> #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value() @@ -2105,16 +2087,32 @@ void InstructionCodeGeneratorARM::VisitMul(HMul* mul) { } void LocationsBuilderARM::VisitDiv(HDiv* div) { - LocationSummary::CallKind call_kind = div->GetResultType() == Primitive::kPrimLong - ? LocationSummary::kCall - : LocationSummary::kNoCall; + LocationSummary::CallKind call_kind = LocationSummary::kNoCall; + if (div->GetResultType() == Primitive::kPrimLong) { + // pLdiv runtime call. + call_kind = LocationSummary::kCall; + } else if (div->GetResultType() == Primitive::kPrimInt && + !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + // pIdivmod runtime call. + call_kind = LocationSummary::kCall; + } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind); switch (div->GetResultType()) { case Primitive::kPrimInt: { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } else { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + // Note: divrem will compute both the quotient and the remainder as the pair R0 and R1, but + // we only need the former. + locations->SetOut(Location::RegisterLocation(R0)); + } break; } case Primitive::kPrimLong: { @@ -2147,9 +2145,18 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { switch (div->GetResultType()) { case Primitive::kPrimInt: { - __ sdiv(out.AsRegister<Register>(), - first.AsRegister<Register>(), - second.AsRegister<Register>()); + if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + __ sdiv(out.AsRegister<Register>(), + first.AsRegister<Register>(), + second.AsRegister<Register>()); + } else { + InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegister<Register>()); + DCHECK_EQ(calling_convention.GetRegisterAt(1), second.AsRegister<Register>()); + DCHECK_EQ(R0, out.AsRegister<Register>()); + + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr); + } break; } @@ -2187,17 +2194,32 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { void LocationsBuilderARM::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = type == Primitive::kPrimInt - ? LocationSummary::kNoCall - : LocationSummary::kCall; + + // Most remainders are implemented in the runtime. + LocationSummary::CallKind call_kind = LocationSummary::kCall; + if (rem->GetResultType() == Primitive::kPrimInt && + codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + // Have hardware divide instruction for int, do it with three instructions. + call_kind = LocationSummary::kNoCall; + } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); switch (type) { case Primitive::kPrimInt: { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - locations->AddTemp(Location::RequiresRegister()); + if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); + } else { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + // Note: divrem will compute both the quotient and the remainder as the pair R0 and R1, but + // we only need the latter. + locations->SetOut(Location::RegisterLocation(R1)); + } break; } case Primitive::kPrimLong: { @@ -2242,16 +2264,25 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); switch (type) { case Primitive::kPrimInt: { - Register reg1 = first.AsRegister<Register>(); - Register reg2 = second.AsRegister<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); + if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + Register reg1 = first.AsRegister<Register>(); + Register reg2 = second.AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + // temp = reg1 / reg2 (integer division) + // temp = temp * reg2 + // dest = reg1 - temp + __ sdiv(temp, reg1, reg2); + __ mul(temp, temp, reg2); + __ sub(out.AsRegister<Register>(), reg1, ShifterOperand(temp)); + } else { + InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegister<Register>()); + DCHECK_EQ(calling_convention.GetRegisterAt(1), second.AsRegister<Register>()); + DCHECK_EQ(R1, out.AsRegister<Register>()); - // temp = reg1 / reg2 (integer division) - // temp = temp * reg2 - // dest = reg1 - temp - __ sdiv(temp, reg1, reg2); - __ mul(temp, temp, reg2); - __ sub(out.AsRegister<Register>(), reg1, ShifterOperand(temp)); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr); + } break; } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 57e1d2f2f5..bcdea7a639 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -41,6 +41,25 @@ static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRe static constexpr Register kArtMethodRegister = R0; +static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 }; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); +static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1, S2, S3 }; +static constexpr size_t kRuntimeParameterFpuRegistersLength = + arraysize(kRuntimeParameterFpuRegisters); + +class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> { + public: + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength, + kRuntimeParameterFpuRegisters, + kRuntimeParameterFpuRegistersLength) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + static constexpr DRegister FromLowSToD(SRegister reg) { return DCHECK_CONSTEXPR(reg % 2 == 0, , D0) static_cast<DRegister>(reg / 2); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 9455a918d4..32ada3837e 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -98,29 +98,6 @@ Location ARM64ReturnLocation(Primitive::Type return_type) { } } -static const Register kRuntimeParameterCoreRegisters[] = { x0, x1, x2, x3, x4, x5, x6, x7 }; -static constexpr size_t kRuntimeParameterCoreRegistersLength = - arraysize(kRuntimeParameterCoreRegisters); -static const FPRegister kRuntimeParameterFpuRegisters[] = { d0, d1, d2, d3, d4, d5, d6, d7 }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = - arraysize(kRuntimeParameterCoreRegisters); - -class InvokeRuntimeCallingConvention : public CallingConvention<Register, FPRegister> { - public: - static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); - - InvokeRuntimeCallingConvention() - : CallingConvention(kRuntimeParameterCoreRegisters, - kRuntimeParameterCoreRegistersLength, - kRuntimeParameterFpuRegisters, - kRuntimeParameterFpuRegistersLength) {} - - Location GetReturnLocation(Primitive::Type return_type); - - private: - DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); -}; - Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type return_type) { return ARM64ReturnLocation(return_type); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index cbb2e5c749..2c624d2926 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -80,6 +80,31 @@ class SlowPathCodeARM64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64); }; +static const vixl::Register kRuntimeParameterCoreRegisters[] = + { vixl::x0, vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 }; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); +static const vixl::FPRegister kRuntimeParameterFpuRegisters[] = + { vixl::d0, vixl::d1, vixl::d2, vixl::d3, vixl::d4, vixl::d5, vixl::d6, vixl::d7 }; +static constexpr size_t kRuntimeParameterFpuRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); + +class InvokeRuntimeCallingConvention : public CallingConvention<vixl::Register, vixl::FPRegister> { + public: + static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); + + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength, + kRuntimeParameterFpuRegisters, + kRuntimeParameterFpuRegistersLength) {} + + Location GetReturnLocation(Primitive::Type return_type); + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl::FPRegister> { public: InvokeDexCallingConvention() diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 4414a65efa..8d0ca0beb9 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -19,6 +19,8 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "intrinsics.h" +#include "intrinsics_x86.h" #include "mirror/array-inl.h" #include "mirror/art_method.h" #include "mirror/class.h" @@ -34,46 +36,14 @@ namespace x86 { static constexpr int kCurrentMethodStackOffset = 0; -static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX }; -static constexpr size_t kRuntimeParameterCoreRegistersLength = - arraysize(kRuntimeParameterCoreRegisters); static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI }; -static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = - arraysize(kRuntimeParameterFpuRegisters); static constexpr int kC2ConditionMask = 0x400; static constexpr int kFakeReturnRegister = Register(8); -class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> { - public: - InvokeRuntimeCallingConvention() - : CallingConvention(kRuntimeParameterCoreRegisters, - kRuntimeParameterCoreRegistersLength, - kRuntimeParameterFpuRegisters, - kRuntimeParameterFpuRegistersLength) {} - - private: - DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); -}; - #define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())-> -class SlowPathCodeX86 : public SlowPathCode { - public: - SlowPathCodeX86() : entry_label_(), exit_label_() {} - - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } - - private: - Label entry_label_; - Label exit_label_; - - DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86); -}; - class NullCheckSlowPathX86 : public SlowPathCodeX86 { public: explicit NullCheckSlowPathX86(HNullCheck* instruction) : instruction_(instruction) {} @@ -1140,35 +1110,30 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { } void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + IntrinsicLocationsBuilderX86 intrinsic(GetGraph()->GetArena()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } -void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); - - // TODO: Implement all kinds of calls: - // 1) boot -> boot - // 2) app -> boot - // 3) app -> app - // - // Currently we implement the app -> app logic, which looks up in the resolve cache. +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorX86 intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; +} - // temp = method; - codegen_->LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); - // (temp + offset_of_quick_compiled_code)() - __ call(Address( - temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); - } else { - __ call(codegen_->GetFrameEntryLabel()); +void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; } - DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen_->GenerateStaticOrDirectCall( + invoke, invoke->GetLocations()->GetTemp(0).AsRegister<Register>()); } void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -2863,6 +2828,32 @@ void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { } +void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + Register temp) { + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + // temp = method; + LoadCurrentMethod(temp); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address( + temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); + } else { + __ call(GetFrameEntryLabel()); + } + + DCHECK(!IsLeafMethod()); + RecordPcInfo(invoke, invoke->GetDexPc()); +} + void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { Label is_null; __ testl(value, value); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index c5763de05e..6a4d42dd01 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -39,6 +39,25 @@ static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCore static constexpr XmmRegister kParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); +static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX }; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); +static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 }; +static constexpr size_t kRuntimeParameterFpuRegistersLength = + arraysize(kRuntimeParameterFpuRegisters); + +class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> { + public: + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength, + kRuntimeParameterFpuRegisters, + kRuntimeParameterFpuRegistersLength) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegister> { public: InvokeDexCallingConvention() : CallingConvention( @@ -228,6 +247,9 @@ class CodeGeneratorX86 : public CodeGenerator { // Helper method to move a 64bits value between two locations. void Move64(Location destination, Location source); + // Generate a call to a static or direct method. + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp); + // Emit a write barrier. void MarkGCCard(Register temp, Register card, Register object, Register value); @@ -261,6 +283,20 @@ class CodeGeneratorX86 : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86); }; +class SlowPathCodeX86 : public SlowPathCode { + public: + SlowPathCodeX86() : entry_label_(), exit_label_() {} + + Label* GetEntryLabel() { return &entry_label_; } + Label* GetExitLabel() { return &exit_label_; } + + private: + Label entry_label_; + Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86); +}; + } // namespace x86 } // namespace art diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index c1f601e6d4..ef60280016 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -39,28 +39,11 @@ static constexpr Register TMP = R11; static constexpr int kCurrentMethodStackOffset = 0; -static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX }; -static constexpr size_t kRuntimeParameterCoreRegistersLength = - arraysize(kRuntimeParameterCoreRegisters); -static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = - arraysize(kRuntimeParameterFpuRegisters); static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; static constexpr int kC2ConditionMask = 0x400; -class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { - public: - InvokeRuntimeCallingConvention() - : CallingConvention(kRuntimeParameterCoreRegisters, - kRuntimeParameterCoreRegistersLength, - kRuntimeParameterFpuRegisters, - kRuntimeParameterFpuRegistersLength) {} - - private: - DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); -}; #define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())-> diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 707c9992c0..a380b6a04c 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -37,6 +37,25 @@ static constexpr FloatRegister kParameterFloatRegisters[] = static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); +static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX }; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); +static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; +static constexpr size_t kRuntimeParameterFpuRegistersLength = + arraysize(kRuntimeParameterFpuRegisters); + +class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { + public: + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength, + kRuntimeParameterFpuRegisters, + kRuntimeParameterFpuRegistersLength) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> { public: InvokeDexCallingConvention() : CallingConvention( diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 40f0adc63d..6053ad51f4 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -474,10 +474,8 @@ TEST(CodegenTest, NonMaterializedCondition) { HBasicBlock* first_block = new (&allocator) HBasicBlock(graph); graph->AddBlock(first_block); entry->AddSuccessor(first_block); - HIntConstant* constant0 = new (&allocator) HIntConstant(0); - entry->AddInstruction(constant0); - HIntConstant* constant1 = new (&allocator) HIntConstant(1); - entry->AddInstruction(constant1); + HIntConstant* constant0 = graph->GetIntConstant(0); + HIntConstant* constant1 = graph->GetIntConstant(1); HEqual* equal = new (&allocator) HEqual(constant0, constant0); first_block->AddInstruction(equal); first_block->AddInstruction(new (&allocator) HIf(equal)); @@ -582,11 +580,9 @@ TEST(CodegenTest, MaterializedCondition1) { code_block->AddSuccessor(exit_block); graph->SetExitBlock(exit_block); - HIntConstant cst_lhs(lhs[i]); - code_block->AddInstruction(&cst_lhs); - HIntConstant cst_rhs(rhs[i]); - code_block->AddInstruction(&cst_rhs); - HLessThan cmp_lt(&cst_lhs, &cst_rhs); + HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]); + HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]); + HLessThan cmp_lt(cst_lhs, cst_rhs); code_block->AddInstruction(&cmp_lt); HReturn ret(&cmp_lt); code_block->AddInstruction(&ret); @@ -639,11 +635,9 @@ TEST(CodegenTest, MaterializedCondition2) { if_false_block->AddSuccessor(exit_block); graph->SetExitBlock(exit_block); - HIntConstant cst_lhs(lhs[i]); - if_block->AddInstruction(&cst_lhs); - HIntConstant cst_rhs(rhs[i]); - if_block->AddInstruction(&cst_rhs); - HLessThan cmp_lt(&cst_lhs, &cst_rhs); + HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]); + HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]); + HLessThan cmp_lt(cst_lhs, cst_rhs); if_block->AddInstruction(&cmp_lt); // We insert a temporary to separate the HIf from the HLessThan and force // the materialization of the condition. @@ -652,13 +646,11 @@ TEST(CodegenTest, MaterializedCondition2) { HIf if_lt(&cmp_lt); if_block->AddInstruction(&if_lt); - HIntConstant cst_lt(1); - if_true_block->AddInstruction(&cst_lt); - HReturn ret_lt(&cst_lt); + HIntConstant* cst_lt = graph->GetIntConstant(1); + HReturn ret_lt(cst_lt); if_true_block->AddInstruction(&ret_lt); - HIntConstant cst_ge(0); - if_false_block->AddInstruction(&cst_ge); - HReturn ret_ge(&cst_ge); + HIntConstant* cst_ge = graph->GetIntConstant(0); + HReturn ret_ge(cst_ge); if_false_block->AddInstruction(&ret_ge); auto hook_before_codegen = [](HGraph* graph_in) { diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index ec0cc3e98b..b7a92b5ae5 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -55,20 +55,20 @@ void HConstantFolding::Run() { if (inst->IsBinaryOperation()) { // Constant folding: replace `op(a, b)' with a constant at // compile time if `a' and `b' are both constants. - HConstant* constant = - inst->AsBinaryOperation()->TryStaticEvaluation(); + HConstant* constant = inst->AsBinaryOperation()->TryStaticEvaluation(); if (constant != nullptr) { - inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant); + inst->ReplaceWith(constant); + inst->GetBlock()->RemoveInstruction(inst); } else { inst->Accept(&simplifier); } } else if (inst->IsUnaryOperation()) { // Constant folding: replace `op(a)' with a constant at compile // time if `a' is a constant. - HConstant* constant = - inst->AsUnaryOperation()->TryStaticEvaluation(); + HConstant* constant = inst->AsUnaryOperation()->TryStaticEvaluation(); if (constant != nullptr) { - inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant); + inst->ReplaceWith(constant); + inst->GetBlock()->RemoveInstruction(inst); } } else if (inst->IsDivZeroCheck()) { // We can safely remove the check if the input is a non-null constant. @@ -173,9 +173,8 @@ void InstructionWithAbsorbingInputSimplifier::VisitRem(HRem* instruction) { // REM dst, src, src // with // CONSTANT 0 - ArenaAllocator* allocator = GetGraph()->GetArena(); - block->ReplaceAndRemoveInstructionWith(instruction, - HConstant::NewConstant(allocator, type, 0)); + instruction->ReplaceWith(GetGraph()->GetConstant(type, 0)); + block->RemoveInstruction(instruction); } } @@ -195,7 +194,6 @@ void InstructionWithAbsorbingInputSimplifier::VisitSub(HSub* instruction) { } HBasicBlock* block = instruction->GetBlock(); - ArenaAllocator* allocator = GetGraph()->GetArena(); // We assume that GVN has run before, so we only perform a pointer // comparison. If for some reason the values are equal but the pointers are @@ -208,8 +206,8 @@ void InstructionWithAbsorbingInputSimplifier::VisitSub(HSub* instruction) { // CONSTANT 0 // Note that we cannot optimise `x - x` to `0` for floating-point. It does // not work when `x` is an infinity. - block->ReplaceAndRemoveInstructionWith(instruction, - HConstant::NewConstant(allocator, type, 0)); + instruction->ReplaceWith(GetGraph()->GetConstant(type, 0)); + block->RemoveInstruction(instruction); } } @@ -225,10 +223,8 @@ void InstructionWithAbsorbingInputSimplifier::VisitXor(HXor* instruction) { // CONSTANT 0 Primitive::Type type = instruction->GetType(); HBasicBlock* block = instruction->GetBlock(); - ArenaAllocator* allocator = GetGraph()->GetArena(); - - block->ReplaceAndRemoveInstructionWith(instruction, - HConstant::NewConstant(allocator, type, 0)); + instruction->ReplaceWith(GetGraph()->GetConstant(type, 0)); + block->RemoveInstruction(instruction); } } diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 6ceccfbf0e..6853d54c48 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -101,14 +101,16 @@ TEST(ConstantFolding, IntConstantFoldingNegation) { // Expected difference after constant folding. diff_t expected_cf_diff = { { " 2: IntConstant [5]\n", " 2: IntConstant\n" }, - { " 5: Neg(2) [8]\n", " 12: IntConstant [8]\n" }, + { " 10: SuspendCheck\n", " 10: SuspendCheck\n" + " 12: IntConstant [8]\n" }, + { " 5: Neg(2) [8]\n", removed }, { " 8: Return(5)\n", " 8: Return(12)\n" } }; std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the value of the computed constant. auto check_after_cf = [](HGraph* graph) { - HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); + HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0); ASSERT_TRUE(inst->IsIntConstant()); ASSERT_EQ(inst->AsIntConstant()->GetValue(), -1); }; @@ -160,14 +162,16 @@ TEST(ConstantFolding, IntConstantFoldingOnAddition1) { diff_t expected_cf_diff = { { " 3: IntConstant [9]\n", " 3: IntConstant\n" }, { " 5: IntConstant [9]\n", " 5: IntConstant\n" }, - { " 9: Add(3, 5) [12]\n", " 16: IntConstant [12]\n" }, + { " 14: SuspendCheck\n", " 14: SuspendCheck\n" + " 16: IntConstant [12]\n" }, + { " 9: Add(3, 5) [12]\n", removed }, { " 12: Return(9)\n", " 12: Return(16)\n" } }; std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the value of the computed constant. auto check_after_cf = [](HGraph* graph) { - HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); + HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0); ASSERT_TRUE(inst->IsIntConstant()); ASSERT_EQ(inst->AsIntConstant()->GetValue(), 3); }; @@ -195,8 +199,8 @@ TEST(ConstantFolding, IntConstantFoldingOnAddition1) { * v0 <- 1 0. const/4 v0, #+1 * v1 <- 2 1. const/4 v1, #+2 * v0 <- v0 + v1 2. add-int/2addr v0, v1 - * v1 <- 3 3. const/4 v1, #+3 - * v2 <- 4 4. const/4 v2, #+4 + * v1 <- 4 3. const/4 v1, #+4 + * v2 <- 5 4. const/4 v2, #+5 * v1 <- v1 + v2 5. add-int/2addr v1, v2 * v2 <- v0 + v1 6. add-int v2, v0, v1 * return v2 8. return v2 @@ -206,8 +210,8 @@ TEST(ConstantFolding, IntConstantFoldingOnAddition2) { Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::ADD_INT_2ADDR | 0 << 8 | 1 << 12, - Instruction::CONST_4 | 1 << 8 | 3 << 12, - Instruction::CONST_4 | 2 << 8 | 4 << 12, + Instruction::CONST_4 | 1 << 8 | 4 << 12, + Instruction::CONST_4 | 2 << 8 | 5 << 12, Instruction::ADD_INT_2ADDR | 1 << 8 | 2 << 12, Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, Instruction::RETURN | 2 << 8); @@ -234,24 +238,28 @@ TEST(ConstantFolding, IntConstantFoldingOnAddition2) { { " 5: IntConstant [9]\n", " 5: IntConstant\n" }, { " 11: IntConstant [17]\n", " 11: IntConstant\n" }, { " 13: IntConstant [17]\n", " 13: IntConstant\n" }, - { " 9: Add(3, 5) [21]\n", " 28: IntConstant\n" }, - { " 17: Add(11, 13) [21]\n", " 29: IntConstant\n" }, - { " 21: Add(9, 17) [24]\n", " 30: IntConstant [24]\n" }, + { " 26: SuspendCheck\n", " 26: SuspendCheck\n" + " 28: IntConstant\n" + " 29: IntConstant\n" + " 30: IntConstant [24]\n" }, + { " 9: Add(3, 5) [21]\n", removed }, + { " 17: Add(11, 13) [21]\n", removed }, + { " 21: Add(9, 17) [24]\n", removed }, { " 24: Return(21)\n", " 24: Return(30)\n" } }; std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the values of the computed constants. auto check_after_cf = [](HGraph* graph) { - HInstruction* inst1 = graph->GetBlock(1)->GetFirstInstruction(); + HInstruction* inst1 = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0); ASSERT_TRUE(inst1->IsIntConstant()); - ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 3); - HInstruction* inst2 = inst1->GetNext(); + ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 12); + HInstruction* inst2 = inst1->GetPrevious(); ASSERT_TRUE(inst2->IsIntConstant()); - ASSERT_EQ(inst2->AsIntConstant()->GetValue(), 7); - HInstruction* inst3 = inst2->GetNext(); + ASSERT_EQ(inst2->AsIntConstant()->GetValue(), 9); + HInstruction* inst3 = inst2->GetPrevious(); ASSERT_TRUE(inst3->IsIntConstant()); - ASSERT_EQ(inst3->AsIntConstant()->GetValue(), 10); + ASSERT_EQ(inst3->AsIntConstant()->GetValue(), 3); }; // Expected difference after dead code elimination. @@ -306,14 +314,16 @@ TEST(ConstantFolding, IntConstantFoldingOnSubtraction) { diff_t expected_cf_diff = { { " 3: IntConstant [9]\n", " 3: IntConstant\n" }, { " 5: IntConstant [9]\n", " 5: IntConstant\n" }, - { " 9: Sub(3, 5) [12]\n", " 16: IntConstant [12]\n" }, + { " 14: SuspendCheck\n", " 14: SuspendCheck\n" + " 16: IntConstant [12]\n" }, + { " 9: Sub(3, 5) [12]\n", removed }, { " 12: Return(9)\n", " 12: Return(16)\n" } }; std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the value of the computed constant. auto check_after_cf = [](HGraph* graph) { - HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); + HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0); ASSERT_TRUE(inst->IsIntConstant()); ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1); }; @@ -368,14 +378,16 @@ TEST(ConstantFolding, LongConstantFoldingOnAddition) { diff_t expected_cf_diff = { { " 6: LongConstant [12]\n", " 6: LongConstant\n" }, { " 8: LongConstant [12]\n", " 8: LongConstant\n" }, - { " 12: Add(6, 8) [15]\n", " 19: LongConstant [15]\n" }, + { " 17: SuspendCheck\n", " 17: SuspendCheck\n" + " 19: LongConstant [15]\n" }, + { " 12: Add(6, 8) [15]\n", removed }, { " 15: Return(12)\n", " 15: Return(19)\n" } }; std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the value of the computed constant. auto check_after_cf = [](HGraph* graph) { - HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); + HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0); ASSERT_TRUE(inst->IsLongConstant()); ASSERT_EQ(inst->AsLongConstant()->GetValue(), 3); }; @@ -431,14 +443,16 @@ TEST(ConstantFolding, LongConstantFoldingOnSubtraction) { diff_t expected_cf_diff = { { " 6: LongConstant [12]\n", " 6: LongConstant\n" }, { " 8: LongConstant [12]\n", " 8: LongConstant\n" }, - { " 12: Sub(6, 8) [15]\n", " 19: LongConstant [15]\n" }, + { " 17: SuspendCheck\n", " 17: SuspendCheck\n" + " 19: LongConstant [15]\n" }, + { " 12: Sub(6, 8) [15]\n", removed }, { " 15: Return(12)\n", " 15: Return(19)\n" } }; std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the value of the computed constant. auto check_after_cf = [](HGraph* graph) { - HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); + HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0); ASSERT_TRUE(inst->IsLongConstant()); ASSERT_EQ(inst->AsLongConstant()->GetValue(), 1); }; @@ -469,51 +483,51 @@ TEST(ConstantFolding, LongConstantFoldingOnSubtraction) { * 16-bit * offset * ------ - * v0 <- 0 0. const/4 v0, #+0 - * v1 <- 1 1. const/4 v1, #+1 + * v0 <- 1 0. const/4 v0, #+1 + * v1 <- 2 1. const/4 v1, #+2 * v2 <- v0 + v1 2. add-int v2, v0, v1 * goto L2 4. goto +4 - * L1: v1 <- v0 + 3 5. add-int/lit16 v1, v0, #+3 + * L1: v1 <- v0 + 5 5. add-int/lit16 v1, v0, #+5 * goto L3 7. goto +4 - * L2: v0 <- v2 + 2 8. add-int/lit16 v0, v2, #+2 + * L2: v0 <- v2 + 4 8. add-int/lit16 v0, v2, #+4 * goto L1 10. goto +(-5) - * L3: v2 <- v1 + 4 11. add-int/lit16 v2, v1, #+4 + * L3: v2 <- v1 + 8 11. add-int/lit16 v2, v1, #+8 * return v2 13. return v2 */ TEST(ConstantFolding, IntConstantFoldingAndJumps) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( - Instruction::CONST_4 | 0 << 8 | 0 << 12, - Instruction::CONST_4 | 1 << 8 | 1 << 12, + Instruction::CONST_4 | 0 << 8 | 1 << 12, + Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, Instruction::GOTO | 4 << 8, - Instruction::ADD_INT_LIT16 | 1 << 8 | 0 << 12, 3, + Instruction::ADD_INT_LIT16 | 1 << 8 | 0 << 12, 5, Instruction::GOTO | 4 << 8, - Instruction::ADD_INT_LIT16 | 0 << 8 | 2 << 12, 2, + Instruction::ADD_INT_LIT16 | 0 << 8 | 2 << 12, 4, static_cast<uint16_t>(Instruction::GOTO | -5 << 8), - Instruction::ADD_INT_LIT16 | 2 << 8 | 1 << 12, 4, + Instruction::ADD_INT_LIT16 | 2 << 8 | 1 << 12, 8, Instruction::RETURN | 2 << 8); std::string expected_before = "BasicBlock 0, succ: 1\n" - " 3: IntConstant [9]\n" // v0 <- 0 - " 5: IntConstant [9]\n" // v1 <- 1 - " 13: IntConstant [14]\n" // const 3 - " 18: IntConstant [19]\n" // const 2 - " 24: IntConstant [25]\n" // const 4 + " 3: IntConstant [9]\n" // v0 <- 1 + " 5: IntConstant [9]\n" // v1 <- 2 + " 13: IntConstant [14]\n" // const 5 + " 18: IntConstant [19]\n" // const 4 + " 24: IntConstant [25]\n" // const 8 " 30: SuspendCheck\n" " 31: Goto 1\n" "BasicBlock 1, pred: 0, succ: 3\n" - " 9: Add(3, 5) [19]\n" // v2 <- v0 + v1 = 0 + 1 = 1 + " 9: Add(3, 5) [19]\n" // v2 <- v0 + v1 = 1 + 2 = 3 " 11: Goto 3\n" // goto L2 "BasicBlock 2, pred: 3, succ: 4\n" // L1: - " 14: Add(19, 13) [25]\n" // v1 <- v0 + 3 = 3 + 3 = 6 + " 14: Add(19, 13) [25]\n" // v1 <- v0 + 3 = 7 + 5 = 12 " 16: Goto 4\n" // goto L3 "BasicBlock 3, pred: 1, succ: 2\n" // L2: - " 19: Add(9, 18) [14]\n" // v0 <- v2 + 2 = 1 + 2 = 3 + " 19: Add(9, 18) [14]\n" // v0 <- v2 + 2 = 3 + 4 = 7 " 21: SuspendCheck\n" " 22: Goto 2\n" // goto L1 "BasicBlock 4, pred: 2, succ: 5\n" // L3: - " 25: Add(14, 24) [28]\n" // v2 <- v1 + 4 = 6 + 4 = 10 + " 25: Add(14, 24) [28]\n" // v2 <- v1 + 4 = 12 + 8 = 20 " 28: Return(25)\n" // return v2 "BasicBlock 5, pred: 4\n" " 29: Exit\n"; @@ -525,28 +539,33 @@ TEST(ConstantFolding, IntConstantFoldingAndJumps) { { " 13: IntConstant [14]\n", " 13: IntConstant\n" }, { " 18: IntConstant [19]\n", " 18: IntConstant\n" }, { " 24: IntConstant [25]\n", " 24: IntConstant\n" }, - { " 9: Add(3, 5) [19]\n", " 32: IntConstant []\n" }, - { " 14: Add(19, 13) [25]\n", " 34: IntConstant\n" }, - { " 19: Add(9, 18) [14]\n", " 33: IntConstant []\n" }, - { " 25: Add(14, 24) [28]\n", " 35: IntConstant [28]\n" }, + { " 30: SuspendCheck\n", " 30: SuspendCheck\n" + " 32: IntConstant []\n" + " 33: IntConstant []\n" + " 34: IntConstant\n" + " 35: IntConstant [28]\n" }, + { " 9: Add(3, 5) [19]\n", removed }, + { " 14: Add(19, 13) [25]\n", removed }, + { " 19: Add(9, 18) [14]\n", removed }, + { " 25: Add(14, 24) [28]\n", removed }, { " 28: Return(25)\n", " 28: Return(35)\n"} }; std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the values of the computed constants. auto check_after_cf = [](HGraph* graph) { - HInstruction* inst1 = graph->GetBlock(1)->GetFirstInstruction(); + HInstruction* inst1 = graph->GetBlock(4)->GetFirstInstruction()->InputAt(0); ASSERT_TRUE(inst1->IsIntConstant()); - ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 1); - HInstruction* inst2 = graph->GetBlock(2)->GetFirstInstruction(); + ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 20); + HInstruction* inst2 = inst1->GetPrevious(); ASSERT_TRUE(inst2->IsIntConstant()); - ASSERT_EQ(inst2->AsIntConstant()->GetValue(), 6); - HInstruction* inst3 = graph->GetBlock(3)->GetFirstInstruction(); + ASSERT_EQ(inst2->AsIntConstant()->GetValue(), 12); + HInstruction* inst3 = inst2->GetPrevious(); ASSERT_TRUE(inst3->IsIntConstant()); - ASSERT_EQ(inst3->AsIntConstant()->GetValue(), 3); - HInstruction* inst4 = graph->GetBlock(4)->GetFirstInstruction(); + ASSERT_EQ(inst3->AsIntConstant()->GetValue(), 7); + HInstruction* inst4 = inst3->GetPrevious(); ASSERT_TRUE(inst4->IsIntConstant()); - ASSERT_EQ(inst4->AsIntConstant()->GetValue(), 10); + ASSERT_EQ(inst4->AsIntConstant()->GetValue(), 3); }; // Expected difference after dead code elimination. @@ -611,25 +630,25 @@ TEST(ConstantFolding, ConstantCondition) { // Expected difference after constant folding. diff_t expected_cf_diff = { - { " 3: IntConstant [15, 22, 8]\n", " 3: IntConstant [15, 22]\n" }, + { " 3: IntConstant [15, 22, 8]\n", " 3: IntConstant [9, 15, 22]\n" }, { " 5: IntConstant [22, 8]\n", " 5: IntConstant [22]\n" }, - { " 8: GreaterThanOrEqual(3, 5) [9]\n", " 23: IntConstant [9]\n" }, - { " 9: If(8)\n", " 9: If(23)\n" } + { " 8: GreaterThanOrEqual(3, 5) [9]\n", removed }, + { " 9: If(8)\n", " 9: If(3)\n" } }; std::string expected_after_cf = Patch(expected_before, expected_cf_diff); // Check the values of the computed constants. auto check_after_cf = [](HGraph* graph) { - HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction(); + HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0); ASSERT_TRUE(inst->IsIntConstant()); ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1); }; // Expected difference after dead code elimination. diff_t expected_dce_diff = { - { " 3: IntConstant [15, 22]\n", " 3: IntConstant [22]\n" }, - { " 22: Phi(3, 5) [15]\n", " 22: Phi(3, 5)\n" }, - { " 15: Add(22, 3)\n", removed } + { " 3: IntConstant [9, 15, 22]\n", " 3: IntConstant [9, 22]\n" }, + { " 22: Phi(3, 5) [15]\n", " 22: Phi(3, 5)\n" }, + { " 15: Add(22, 3)\n", removed } }; std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 09a3ae431f..7c3c2bf03d 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -80,8 +80,7 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } // Ensure `block` ends with a branch instruction. - HInstruction* last_inst = block->GetLastInstruction(); - if (last_inst == nullptr || !last_inst->IsControlFlow()) { + if (!block->EndsWithControlFlowInstruction()) { AddError(StringPrintf("Block %d does not end with a branch instruction.", block->GetBlockId())); } @@ -476,4 +475,15 @@ void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) { } } +void SSAChecker::VisitConstant(HConstant* instruction) { + HBasicBlock* block = instruction->GetBlock(); + if (!block->IsEntryBlock()) { + AddError(StringPrintf( + "%s %d should be in the entry block but is in block %d.", + instruction->DebugName(), + instruction->GetId(), + block->GetBlockId())); + } +} + } // namespace art diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 5ec3003ac8..89fea0a07f 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -107,6 +107,7 @@ class SSAChecker : public GraphChecker { void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE; void VisitCondition(HCondition* op) OVERRIDE; void VisitIf(HIf* instruction) OVERRIDE; + void VisitConstant(HConstant* instruction) OVERRIDE; private: DISALLOW_COPY_AND_ASSIGN(SSAChecker); diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index 4742e4d073..50398b4790 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -28,8 +28,7 @@ namespace art { static HBasicBlock* createIfBlock(HGraph* graph, ArenaAllocator* allocator) { HBasicBlock* if_block = new (allocator) HBasicBlock(graph); graph->AddBlock(if_block); - HInstruction* instr = new (allocator) HIntConstant(4); - if_block->AddInstruction(instr); + HInstruction* instr = graph->GetIntConstant(4); HInstruction* equal = new (allocator) HEqual(instr, instr); if_block->AddInstruction(equal); instr = new (allocator) HIf(equal); @@ -45,6 +44,12 @@ static HBasicBlock* createGotoBlock(HGraph* graph, ArenaAllocator* allocator) { return block; } +static HBasicBlock* createEntryBlock(HGraph* graph, ArenaAllocator* allocator) { + HBasicBlock* block = createGotoBlock(graph, allocator); + graph->SetEntryBlock(block); + return block; +} + static HBasicBlock* createReturnBlock(HGraph* graph, ArenaAllocator* allocator) { HBasicBlock* block = new (allocator) HBasicBlock(graph); graph->AddBlock(block); @@ -69,7 +74,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock1) { ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* if_true = createGotoBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); @@ -104,7 +109,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock2) { ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* if_false = createGotoBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); @@ -139,12 +144,11 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) { ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); HBasicBlock* exit_block = createExitBlock(graph, &allocator); - graph->SetEntryBlock(entry_block); entry_block->AddSuccessor(if_block); if_block->AddSuccessor(if_block); if_block->AddSuccessor(return_block); @@ -175,12 +179,11 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) { ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); HBasicBlock* exit_block = createExitBlock(graph, &allocator); - graph->SetEntryBlock(entry_block); entry_block->AddSuccessor(if_block); if_block->AddSuccessor(return_block); if_block->AddSuccessor(if_block); @@ -211,13 +214,12 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders1) { ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* first_if_block = createIfBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* loop_block = createGotoBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); - graph->SetEntryBlock(entry_block); entry_block->AddSuccessor(first_if_block); first_if_block->AddSuccessor(if_block); first_if_block->AddSuccessor(loop_block); @@ -251,13 +253,12 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders2) { ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* first_if_block = createIfBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* loop_block = createGotoBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); - graph->SetEntryBlock(entry_block); entry_block->AddSuccessor(first_if_block); first_if_block->AddSuccessor(if_block); first_if_block->AddSuccessor(loop_block); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index cabfa488c0..49c0d3884f 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -149,6 +149,8 @@ class HGraphVisualizerPrinter : public HGraphVisitor { codegen_.DumpCoreRegister(output_, location.low()); output_ << " and "; codegen_.DumpCoreRegister(output_, location.high()); + } else if (location.IsUnallocated()) { + output_ << "<U>"; } else { DCHECK(location.IsDoubleStackSlot()); output_ << "2x" << location.GetStackIndex() << "(sp)"; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 4b990f1ddd..2c17a67867 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -49,7 +49,8 @@ void HInliner::Run() { for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) { HInstruction* next = instruction->GetNext(); HInvokeStaticOrDirect* call = instruction->AsInvokeStaticOrDirect(); - if (call != nullptr) { + // As long as the call is not intrinsified, it is worth trying to inline. + if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { // We use the original invoke type to ensure the resolution of the called method // works properly. if (!TryInline(call, call->GetDexMethodIndex(), call->GetOriginalInvokeType())) { diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 2ef19b92a1..56ec8a7ed1 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -292,8 +292,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { // MUL dst, src, pow_of_2 // with // SHL dst, src, log2(pow_of_2) - HIntConstant* shift = new (allocator) HIntConstant(WhichPowerOf2(factor)); - block->InsertInstructionBefore(shift, instruction); + HIntConstant* shift = GetGraph()->GetIntConstant(WhichPowerOf2(factor)); HShl* shl = new(allocator) HShl(type, input_other, shift); block->ReplaceAndRemoveInstructionWith(instruction, shl); } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 36cf8568e5..628a844cc7 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -191,8 +191,10 @@ static Intrinsics GetIntrinsic(InlineMethod method) { case kIntrinsicCompareTo: return Intrinsics::kStringCompareTo; case kIntrinsicIsEmptyOrLength: - return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ? - Intrinsics::kStringLength : Intrinsics::kStringIsEmpty; + // The inliner can handle these two cases - and this is the preferred approach + // since after inlining the call is no longer visible (as opposed to waiting + // until codegen to handle intrinsic). + return Intrinsics::kNone; case kIntrinsicIndexOf: return ((method.d.data & kIntrinsicFlagBase0) == 0) ? Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 0c9eb94172..33176f009c 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -847,6 +847,36 @@ void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) { + // The inputs plus one temp. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetOut(Location::RegisterLocation(R0)); +} + +void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheck()); + + Register argument = locations->InAt(1).AsRegister<Register>(); + __ cmp(argument, ShifterOperand(0)); + SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel(), EQ); + + __ LoadFromOffset( + kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pStringCompareTo).Int32Value()); + __ blx(LR); + __ Bind(slow_path->GetExitLabel()); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -873,9 +903,6 @@ UNIMPLEMENTED_INTRINSIC(MathRoundDouble) // Could be done by changing rounding UNIMPLEMENTED_INTRINSIC(MathRoundFloat) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(StringCompareTo) -UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should -UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here. UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 19b04ae094..72d303c870 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -40,6 +40,7 @@ namespace arm64 { using helpers::DRegisterFrom; using helpers::FPRegisterFrom; using helpers::HeapOperand; +using helpers::LocationFrom; using helpers::RegisterFrom; using helpers::SRegisterFrom; using helpers::WRegisterFrom; @@ -990,6 +991,36 @@ void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { + // The inputs plus one temp. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); +} + +void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { + vixl::MacroAssembler* masm = GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheck()); + + Register argument = WRegisterFrom(locations->InAt(1)); + __ Cmp(argument, 0); + SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(slow_path); + __ B(eq, slow_path->GetEntryLabel()); + + __ Ldr( + lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pStringCompareTo).Int32Value())); + __ Blr(lr); + __ Bind(slow_path->GetExitLabel()); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -999,9 +1030,6 @@ void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED } UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(StringCompareTo) -UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should -UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here. UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h index 9cc77c6251..10f6e1d6c7 100644 --- a/compiler/optimizing/intrinsics_list.h +++ b/compiler/optimizing/intrinsics_list.h @@ -60,10 +60,8 @@ V(MemoryPokeShortNative, kStatic) \ V(StringCharAt, kDirect) \ V(StringCompareTo, kDirect) \ - V(StringIsEmpty, kDirect) \ V(StringIndexOf, kDirect) \ V(StringIndexOfAfter, kDirect) \ - V(StringLength, kDirect) \ V(UnsafeCASInt, kDirect) \ V(UnsafeCASLong, kDirect) \ V(UnsafeCASObject, kDirect) \ diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc new file mode 100644 index 0000000000..384737f55a --- /dev/null +++ b/compiler/optimizing/intrinsics_x86.cc @@ -0,0 +1,1208 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_x86.h" + +#include "code_generator_x86.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "intrinsics.h" +#include "mirror/array-inl.h" +#include "mirror/art_method.h" +#include "mirror/string.h" +#include "thread.h" +#include "utils/x86/assembler_x86.h" +#include "utils/x86/constants_x86.h" + +namespace art { + +namespace x86 { + +static constexpr int kDoubleNaNHigh = 0x7FF80000; +static constexpr int kDoubleNaNLow = 0x00000000; +static constexpr int kFloatNaN = 0x7FC00000; + +X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() { + return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); +} + +ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() { + return codegen_->GetGraph()->GetArena(); +} + +bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + LocationSummary* res = invoke->GetLocations(); + return res != nullptr && res->Intrinsified(); +} + +#define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())-> + +// TODO: target as memory. +static void MoveFromReturnRegister(Location target, + Primitive::Type type, + CodeGeneratorX86* codegen) { + if (!target.IsValid()) { + DCHECK(type == Primitive::kPrimVoid); + return; + } + + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + Register target_reg = target.AsRegister<Register>(); + if (target_reg != EAX) { + __ movl(target_reg, EAX); + } + break; + } + case Primitive::kPrimLong: { + Register target_reg_lo = target.AsRegisterPairLow<Register>(); + Register target_reg_hi = target.AsRegisterPairHigh<Register>(); + if (target_reg_lo != EAX) { + __ movl(target_reg_lo, EAX); + } + if (target_reg_hi != EDX) { + __ movl(target_reg_hi, EDX); + } + break; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected void type for valid location " << target; + UNREACHABLE(); + + case Primitive::kPrimDouble: { + XmmRegister target_reg = target.AsFpuRegister<XmmRegister>(); + if (target_reg != XMM0) { + __ movsd(target_reg, XMM0); + } + break; + } + case Primitive::kPrimFloat: { + XmmRegister target_reg = target.AsFpuRegister<XmmRegister>(); + if (target_reg != XMM0) { + __ movss(target_reg, XMM0); + } + break; + } + } +} + +static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) { + if (invoke->InputCount() == 0) { + return; + } + + LocationSummary* locations = invoke->GetLocations(); + InvokeDexCallingConventionVisitor calling_convention_visitor; + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(arena); + + for (size_t i = 0; i < invoke->InputCount(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(actual_loc, cc_loc, nullptr); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +} + +// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified +// call. This will copy the arguments into the positions for a regular call. +// +// Note: The actual parameters are required to be in the locations given by the invoke's location +// summary. If an intrinsic modifies those locations before a slowpath call, they must be +// restored! +class IntrinsicSlowPathX86 : public SlowPathCodeX86 { + public: + explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp) + : invoke_(invoke) { + // The temporary register has to be EAX for x86 invokes. + DCHECK_EQ(temp, EAX); + } + + void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in); + __ Bind(GetEntryLabel()); + + SaveLiveRegisters(codegen, invoke_->GetLocations()); + + MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + + if (invoke_->IsInvokeStaticOrDirect()) { + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX); + } else { + UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; + UNREACHABLE(); + } + + // Copy the result back to the expected output. + Location out = invoke_->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. + DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + MoveFromReturnRegister(out, invoke_->GetType(), codegen); + } + + RestoreLiveRegisters(codegen, invoke_->GetLocations()); + __ jmp(GetExitLabel()); + } + + private: + // The instruction where this slow path is happening. + HInvoke* const invoke_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86); +}; + +#undef __ +#define __ assembler-> + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + if (is64bit) { + locations->AddTemp(Location::RequiresFpuRegister()); + } +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + if (is64bit) { + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + } +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + // Need to use the temporary. + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movsd(temp, input.AsFpuRegister<XmmRegister>()); + __ movd(output.AsRegisterPairLow<Register>(), temp); + __ psrlq(temp, Immediate(32)); + __ movd(output.AsRegisterPairHigh<Register>(), temp); + } else { + __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>()); + } +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + // Need to use the temporary. + XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + __ movd(temp1, input.AsRegisterPairLow<Register>()); + __ movd(temp2, input.AsRegisterPairHigh<Register>()); + __ punpckldq(temp1, temp2); + __ movsd(output.AsFpuRegister<XmmRegister>(), temp1); + } else { + __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>()); + } +} + +void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke, true); +} +void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke, true); +} + +void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} +void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke, false); +} +void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke, false); +} + +void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +static void GenReverseBytes(LocationSummary* locations, + Primitive::Type size, + X86Assembler* assembler) { + Register out = locations->Out().AsRegister<Register>(); + + switch (size) { + case Primitive::kPrimShort: + // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. + __ bswapl(out); + __ sarl(out, Immediate(16)); + break; + case Primitive::kPrimInt: + __ bswapl(out); + break; + default: + LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + + +// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we +// need is 64b. + +static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) { + // TODO: Enable memory operations when the assembler supports them. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + // TODO: Allow x86 to work with memory. This requires assembler support, see below. + // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. + locations->SetOut(Location::SameAsFirstInput()); +} + +static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) { + Location output = locations->Out(); + + if (output.IsFpuRegister()) { + // Create the right constant on an aligned stack. + if (is64bit) { + __ subl(ESP, Immediate(8)); + __ pushl(Immediate(0x7FFFFFFF)); + __ pushl(Immediate(0xFFFFFFFF)); + __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); + } else { + __ subl(ESP, Immediate(12)); + __ pushl(Immediate(0x7FFFFFFF)); + __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); + } + __ addl(ESP, Immediate(16)); + } else { + // TODO: update when assember support is available. + UNIMPLEMENTED(FATAL) << "Needs assembler support."; +// Once assembler support is available, in-memory operations look like this: +// if (is64bit) { +// DCHECK(output.IsDoubleStackSlot()); +// __ andl(Address(Register(RSP), output.GetHighStackIndex(kX86WordSize)), +// Immediate(0x7FFFFFFF)); +// } else { +// DCHECK(output.IsStackSlot()); +// // Can use and with a literal directly. +// __ andl(Address(Register(RSP), output.GetStackIndex()), Immediate(0x7FFFFFFF)); +// } + } +} + +void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { + CreateFloatToFloat(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { + CreateFloatToFloat(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RegisterLocation(EAX)); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RegisterLocation(EDX)); +} + +static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) { + Location output = locations->Out(); + Register out = output.AsRegister<Register>(); + DCHECK_EQ(out, EAX); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + DCHECK_EQ(temp, EDX); + + // Sign extend EAX into EDX. + __ cdq(); + + // XOR EAX with sign. + __ xorl(EAX, EDX); + + // Subtract out sign to correct. + __ subl(EAX, EDX); + + // The result is in EAX. +} + +static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) { + Location input = locations->InAt(0); + Register input_lo = input.AsRegisterPairLow<Register>(); + Register input_hi = input.AsRegisterPairHigh<Register>(); + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + // Compute the sign into the temporary. + __ movl(temp, input_hi); + __ sarl(temp, Immediate(31)); + + // Store the sign into the output. + __ movl(output_lo, temp); + __ movl(output_hi, temp); + + // XOR the input to the output. + __ xorl(output_lo, input_lo); + __ xorl(output_hi, input_hi); + + // Subtract the sign. + __ subl(output_lo, temp); + __ sbbl(output_hi, temp); +} + +void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) { + CreateAbsIntLocation(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) { + CreateAbsLongLocation(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { + GenAbsLong(invoke->GetLocations(), GetAssembler()); +} + +static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, + X86Assembler* assembler) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + Label nan, done, op2_label; + if (is_double) { + __ ucomisd(out, op2); + } else { + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (is_double) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (is_double) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (is_double) { + __ pushl(Immediate(kDoubleNaNHigh)); + __ pushl(Immediate(kDoubleNaNLow)); + __ movsd(out, Address(ESP, 0)); + __ addl(ESP, Immediate(8)); + } else { + __ pushl(Immediate(kFloatNaN)); + __ movss(out, Address(ESP, 0)); + __ addl(ESP, Immediate(4)); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (is_double) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + // The following is sub-optimal, but all we can do for now. It would be fine to also accept + // the second input to be the output (we can simply swap inputs). + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); +} + +static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, + X86Assembler* assembler) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + if (is_long) { + // Need to perform a subtract to get the sign right. + // op1 is already in the same location as the output. + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + + Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); + Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); + + // Spare register to compute the subtraction to set condition code. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + // Subtract off op2_low. + __ movl(temp, output_lo); + __ subl(temp, op2_lo); + + // Now use the same tempo and the borrow to finish the subtraction of op2_hi. + __ movl(temp, output_hi); + __ sbbl(temp, op2_hi); + + // Now the condition code is correct. + Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; + __ cmovl(cond, output_lo, op2_lo); + __ cmovl(cond, output_hi, op2_hi); + } else { + Register out = locations->Out().AsRegister<Register>(); + Register op2 = op2_loc.AsRegister<Register>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + __ cmpl(out, op2); + Condition cond = is_min ? Condition::kGreater : Condition::kLess; + __ cmovl(cond, out, op2); + } +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + // Register to use to perform a long subtract to set cc. + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { + CreateLongLongToLongLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { + CreateLongLongToLongLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, true, GetAssembler()); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + + GetAssembler()->sqrtsd(out, in); +} + +void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { + // The inputs plus one temp. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + // Needs to be EAX for the invoke. + locations->AddTemp(Location::RegisterLocation(EAX)); +} + +void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + + // Location of reference to data array + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + // Starting offset within data array + const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value(); + // Start of char data with array_ + const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value(); + + Register obj = locations->InAt(0).AsRegister<Register>(); + Register idx = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + Location temp_loc = locations->GetTemp(0); + Register temp = temp_loc.AsRegister<Register>(); + + // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth + // the cost. + // TODO: For simplicity, the index parameter is requested in a register, so different from Quick + // we will not optimize the code for constants (which would save a register). + + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke, temp); + codegen_->AddSlowPath(slow_path); + + X86Assembler* assembler = GetAssembler(); + + __ cmpl(idx, Address(obj, count_offset)); + codegen_->MaybeRecordImplicitNullCheck(invoke); + __ j(kAboveEqual, slow_path->GetEntryLabel()); + + // Get the actual element. + __ movl(temp, idx); // temp := idx. + __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx. + __ movl(out, Address(obj, value_offset)); // obj := obj.array. + // out = out[2*temp]. + __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset)); + + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { + // The inputs plus one temp. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetOut(Location::RegisterLocation(EAX)); + // Needs to be EAX for the invoke. + locations->AddTemp(Location::RegisterLocation(EAX)); +} + +void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { + X86Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheck()); + + Register argument = locations->InAt(1).AsRegister<Register>(); + __ testl(argument, argument); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86( + invoke, locations->GetTemp(0).AsRegister<Register>()); + codegen_->AddSlowPath(slow_path); + __ j(kEqual, slow_path->GetEntryLabel()); + + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pStringCompareTo))); + __ Bind(slow_path->GetExitLabel()); +} + +static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) { + Register address = locations->InAt(0).AsRegisterPairLow<Register>(); + Location out_loc = locations->Out(); + // x86 allows unaligned access. We do not have to check the input or use specific instructions + // to avoid a SIGBUS. + switch (size) { + case Primitive::kPrimByte: + __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0)); + break; + case Primitive::kPrimShort: + __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0)); + break; + case Primitive::kPrimInt: + __ movl(out_loc.AsRegister<Register>(), Address(address, 0)); + break; + case Primitive::kPrimLong: + __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0)); + __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4)); + break; + default: + LOG(FATAL) << "Type not recognized for peek: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) { + CreateLongToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateLongToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateLongToLongLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateLongToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + +static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + HInstruction *value = invoke->InputAt(1); + if (size == Primitive::kPrimByte) { + locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value)); + } else { + locations->SetInAt(1, Location::RegisterOrConstant(value)); + } +} + +static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) { + Register address = locations->InAt(0).AsRegisterPairLow<Register>(); + Location value_loc = locations->InAt(1); + // x86 allows unaligned access. We do not have to check the input or use specific instructions + // to avoid a SIGBUS. + switch (size) { + case Primitive::kPrimByte: + if (value_loc.IsConstant()) { + __ movb(Address(address, 0), + Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>()); + } + break; + case Primitive::kPrimShort: + if (value_loc.IsConstant()) { + __ movw(Address(address, 0), + Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ movw(Address(address, 0), value_loc.AsRegister<Register>()); + } + break; + case Primitive::kPrimInt: + if (value_loc.IsConstant()) { + __ movl(Address(address, 0), + Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ movl(Address(address, 0), value_loc.AsRegister<Register>()); + } + break; + case Primitive::kPrimLong: + if (value_loc.IsConstant()) { + int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue(); + __ movl(Address(address, 0), Immediate(Low32Bits(value))); + __ movl(Address(address, 4), Immediate(High32Bits(value))); + } else { + __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>()); + __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>()); + } + break; + default: + LOG(FATAL) << "Type not recognized for poke: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) { + CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) { + Register out = invoke->GetLocations()->Out().AsRegister<Register>(); + GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>())); +} + +static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, + bool is_volatile, X86Assembler* assembler) { + Register base = locations->InAt(1).AsRegister<Register>(); + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); + Location output = locations->Out(); + + switch (type) { + case Primitive::kPrimInt: + case Primitive::kPrimNot: + __ movl(output.AsRegister<Register>(), Address(base, offset, ScaleFactor::TIMES_1, 0)); + break; + + case Primitive::kPrimLong: { + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + if (is_volatile) { + // Need to use a XMM to read atomically. + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ movd(output_lo, temp); + __ psrlq(temp, Immediate(32)); + __ movd(output_hi, temp); + } else { + __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4)); + } + } + break; + + default: + LOG(FATAL) << "Unsupported op size " << type; + UNREACHABLE(); + } +} + +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke, + bool is_long, bool is_volatile) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + if (is_long) { + if (is_volatile) { + // Need to use XMM to read volatile. + locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + } else { + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } + } else { + locations->SetOut(Location::RequiresRegister()); + } +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, false, false); +} +void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, false, true); +} +void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, false, false); +} +void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, true, true); +} +void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, false, false); +} +void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, false, true); +} + + +void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler()); +} +void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler()); +} +void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler()); +} + + +static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, + Primitive::Type type, + HInvoke* invoke, + bool is_volatile) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + // Ensure the value is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } else if (type == Primitive::kPrimLong && is_volatile) { + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + } +} + +void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false); +} +void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false); +} +void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, true); +} +void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false); +} +void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false); +} +void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, true); +} +void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false); +} +void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false); +} +void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, true); +} + +// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 +// memory model. +static void GenUnsafePut(LocationSummary* locations, + Primitive::Type type, + bool is_volatile, + CodeGeneratorX86* codegen) { + X86Assembler* assembler = reinterpret_cast<X86Assembler*>(codegen->GetAssembler()); + Register base = locations->InAt(1).AsRegister<Register>(); + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); + Location value_loc = locations->InAt(3); + + if (type == Primitive::kPrimLong) { + Register value_lo = value_loc.AsRegisterPairLow<Register>(); + Register value_hi = value_loc.AsRegisterPairHigh<Register>(); + if (is_volatile) { + XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + __ movd(temp1, value_lo); + __ movd(temp2, value_hi); + __ punpckldq(temp1, temp2); + __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1); + } else { + __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo); + __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi); + } + } else { + __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>()); + } + + if (is_volatile) { + __ mfence(); + } + + if (type == Primitive::kPrimNot) { + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), + locations->GetTemp(1).AsRegister<Register>(), + base, + value_loc.AsRegister<Register>()); + } +} + +void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); +} +void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); +} +void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_); +} +void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); +} +void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); +} +void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_); +} +void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); +} +void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); +} +void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); +} + +// Unimplemented intrinsics. + +#define UNIMPLEMENTED_INTRINSIC(Name) \ +void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} \ +void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} + +UNIMPLEMENTED_INTRINSIC(IntegerReverse) +UNIMPLEMENTED_INTRINSIC(LongReverse) +UNIMPLEMENTED_INTRINSIC(LongReverseBytes) +UNIMPLEMENTED_INTRINSIC(MathFloor) +UNIMPLEMENTED_INTRINSIC(MathCeil) +UNIMPLEMENTED_INTRINSIC(MathRint) +UNIMPLEMENTED_INTRINSIC(MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(MathRoundFloat) +UNIMPLEMENTED_INTRINSIC(StringIndexOf) +UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) +UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) +UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) +UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) + +} // namespace x86 +} // namespace art diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h new file mode 100644 index 0000000000..e1e8260a5f --- /dev/null +++ b/compiler/optimizing/intrinsics_x86.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_ + +#include "intrinsics.h" + +namespace art { + +class ArenaAllocator; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace x86 { + +class CodeGeneratorX86; +class X86Assembler; + +class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderX86(ArenaAllocator* arena) : arena_(arena) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86); +}; + +class IntrinsicCodeGeneratorX86 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorX86(CodeGeneratorX86* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + X86Assembler* GetAssembler(); + + ArenaAllocator* GetAllocator(); + + CodeGeneratorX86* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86); +}; + +} // namespace x86 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_ diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 2064b18138..736cea88cb 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -667,6 +667,34 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetOut(Location::RegisterLocation(RAX)); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { + X86_64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheck()); + + CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>(); + __ testl(argument, argument); + SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(slow_path); + __ j(kEqual, slow_path->GetEntryLabel()); + + __ gs()->call(Address::Absolute( + QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true)); + __ Bind(slow_path->GetExitLabel()); +} + static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity. @@ -986,9 +1014,6 @@ UNIMPLEMENTED_INTRINSIC(MathCeil) UNIMPLEMENTED_INTRINSIC(MathRint) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(MathRoundFloat) -UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should -UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here. -UNIMPLEMENTED_INTRINSIC(StringCompareTo) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 4f6565d315..dca612e6b7 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -287,39 +287,62 @@ bool HGraph::AnalyzeNaturalLoops() const { return true; } -void HGraph::AddConstant(HConstant* instruction) { - HInstruction* last_instruction = entry_block_->GetLastInstruction(); - if (last_instruction == nullptr || !last_instruction->IsControlFlow()) { - // Called from the builder. Insert at the end of the block. - entry_block_->AddInstruction(instruction); +void HGraph::InsertConstant(HConstant* constant) { + // New constants are inserted before the final control-flow instruction + // of the graph, or at its end if called from the graph builder. + if (entry_block_->EndsWithControlFlowInstruction()) { + entry_block_->InsertInstructionBefore(constant, entry_block_->GetLastInstruction()); } else { - // Entry block ends with control-flow. Insert before the last instruction. - entry_block_->InsertInstructionBefore(instruction, last_instruction); + entry_block_->AddInstruction(constant); } } HNullConstant* HGraph::GetNullConstant() { if (cached_null_constant_ == nullptr) { cached_null_constant_ = new (arena_) HNullConstant(); - AddConstant(cached_null_constant_); + InsertConstant(cached_null_constant_); } return cached_null_constant_; } -HIntConstant* HGraph::GetIntConstant0() { - if (cached_int_constant0_ == nullptr) { - cached_int_constant0_ = new (arena_) HIntConstant(0); - AddConstant(cached_int_constant0_); +template <class InstructionType, typename ValueType> +InstructionType* HGraph::CreateConstant(ValueType value, + ArenaSafeMap<ValueType, InstructionType*>* cache) { + // Try to find an existing constant of the given value. + InstructionType* constant = nullptr; + auto cached_constant = cache->find(value); + if (cached_constant != cache->end()) { + constant = cached_constant->second; } - return cached_int_constant0_; + + // If not found or previously deleted, create and cache a new instruction. + if (constant == nullptr || constant->GetBlock() == nullptr) { + constant = new (arena_) InstructionType(value); + cache->Overwrite(value, constant); + InsertConstant(constant); + } + return constant; } -HIntConstant* HGraph::GetIntConstant1() { - if (cached_int_constant1_ == nullptr) { - cached_int_constant1_ = new (arena_) HIntConstant(1); - AddConstant(cached_int_constant1_); +HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value) { + switch (type) { + case Primitive::Type::kPrimBoolean: + DCHECK(IsUint<1>(value)); + FALLTHROUGH_INTENDED; + case Primitive::Type::kPrimByte: + case Primitive::Type::kPrimChar: + case Primitive::Type::kPrimShort: + case Primitive::Type::kPrimInt: + DCHECK(IsInt(Primitive::ComponentSize(type) * kBitsPerByte, value)); + return GetIntConstant(static_cast<int32_t>(value)); + + case Primitive::Type::kPrimLong: + return GetLongConstant(value); + + default: + LOG(FATAL) << "Unsupported constant type"; + UNREACHABLE(); } - return cached_int_constant1_; } void HLoopInformation::Add(HBasicBlock* block) { @@ -676,7 +699,7 @@ void HGraphVisitor::VisitBasicBlock(HBasicBlock* block) { HConstant* HUnaryOperation::TryStaticEvaluation() const { if (GetInput()->IsIntConstant()) { int32_t value = Evaluate(GetInput()->AsIntConstant()->GetValue()); - return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value); + return GetBlock()->GetGraph()->GetIntConstant(value); } else if (GetInput()->IsLongConstant()) { // TODO: Implement static evaluation of long unary operations. // @@ -692,15 +715,15 @@ HConstant* HBinaryOperation::TryStaticEvaluation() const { if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) { int32_t value = Evaluate(GetLeft()->AsIntConstant()->GetValue(), GetRight()->AsIntConstant()->GetValue()); - return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value); + return GetBlock()->GetGraph()->GetIntConstant(value); } else if (GetLeft()->IsLongConstant() && GetRight()->IsLongConstant()) { int64_t value = Evaluate(GetLeft()->AsLongConstant()->GetValue(), GetRight()->AsLongConstant()->GetValue()); if (GetResultType() == Primitive::kPrimLong) { - return new(GetBlock()->GetGraph()->GetArena()) HLongConstant(value); + return GetBlock()->GetGraph()->GetLongConstant(value); } else { DCHECK_EQ(GetResultType(), Primitive::kPrimInt); - return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value); + return GetBlock()->GetGraph()->GetIntConstant(static_cast<int32_t>(value)); } } return nullptr; @@ -733,16 +756,6 @@ bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const { return this == if_->GetPreviousDisregardingMoves(); } -HConstant* HConstant::NewConstant(ArenaAllocator* allocator, Primitive::Type type, int64_t val) { - if (type == Primitive::kPrimInt) { - DCHECK(IsInt<32>(val)); - return new (allocator) HIntConstant(val); - } else { - DCHECK_EQ(type, Primitive::kPrimLong); - return new (allocator) HLongConstant(val); - } -} - bool HInstruction::Equals(HInstruction* other) const { if (!InstructionTypeEquals(other)) return false; DCHECK_EQ(GetKind(), other->GetKind()); @@ -832,6 +845,10 @@ bool HBasicBlock::IsSingleGoto() const { && (loop_info == nullptr || !loop_info->IsBackEdge(*this)); } +bool HBasicBlock::EndsWithControlFlowInstruction() const { + return !GetInstructions().IsEmpty() && GetLastInstruction()->IsControlFlow(); +} + bool HBasicBlock::EndsWithIf() const { return !GetInstructions().IsEmpty() && GetLastInstruction()->IsIf(); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 9c751fb9c5..21ed3504f1 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_NODES_H_ #define ART_COMPILER_OPTIMIZING_NODES_H_ +#include "base/arena_containers.h" #include "base/arena_object.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "handle.h" @@ -33,16 +34,20 @@ namespace art { class GraphChecker; class HBasicBlock; +class HDoubleConstant; class HEnvironment; +class HFloatConstant; +class HGraphVisitor; class HInstruction; class HIntConstant; class HInvoke; -class HGraphVisitor; +class HLongConstant; class HNullConstant; class HPhi; class HSuspendCheck; class LiveInterval; class LocationSummary; +class SsaBuilder; static const int kDefaultNumberOfBlocks = 8; static const int kDefaultNumberOfSuccessors = 2; @@ -115,7 +120,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { temporaries_vreg_slots_(0), has_array_accesses_(false), debuggable_(debuggable), - current_instruction_id_(start_instruction_id) {} + current_instruction_id_(start_instruction_id), + cached_null_constant_(nullptr), + cached_int_constants_(std::less<int32_t>(), arena->Adapter()), + cached_long_constants_(std::less<int64_t>(), arena->Adapter()) {} ArenaAllocator* GetArena() const { return arena_; } const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; } @@ -128,7 +136,6 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { void SetExitBlock(HBasicBlock* block) { exit_block_ = block; } void AddBlock(HBasicBlock* block); - void AddConstant(HConstant* instruction); // Try building the SSA form of this graph, with dominance computation and loop // recognition. Returns whether it was successful in doing all these steps. @@ -219,9 +226,17 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { bool IsDebuggable() const { return debuggable_; } + // Returns a constant of the given type and value. If it does not exist + // already, it is created and inserted into the graph. Only integral types + // are currently supported. + HConstant* GetConstant(Primitive::Type type, int64_t value); HNullConstant* GetNullConstant(); - HIntConstant* GetIntConstant0(); - HIntConstant* GetIntConstant1(); + HIntConstant* GetIntConstant(int32_t value) { + return CreateConstant(value, &cached_int_constants_); + } + HLongConstant* GetLongConstant(int64_t value) { + return CreateConstant(value, &cached_long_constants_); + } private: HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const; @@ -235,6 +250,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited) const; + template <class InstType, typename ValueType> + InstType* CreateConstant(ValueType value, ArenaSafeMap<ValueType, InstType*>* cache); + void InsertConstant(HConstant* instruction); + ArenaAllocator* const arena_; // List of blocks in insertion order. @@ -269,12 +288,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // The current id to assign to a newly added instruction. See HInstruction.id_. int32_t current_instruction_id_; - // Cached null constant that might be created when building SSA form. - HNullConstant* cached_null_constant_; - // Cached common constants often needed by optimization passes. - HIntConstant* cached_int_constant0_; - HIntConstant* cached_int_constant1_; + HNullConstant* cached_null_constant_; + ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_; + ArenaSafeMap<int64_t, HLongConstant*> cached_long_constants_; ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); DISALLOW_COPY_AND_ASSIGN(HGraph); @@ -602,6 +619,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { bool IsCatchBlock() const { return is_catch_block_; } void SetIsCatchBlock() { is_catch_block_ = true; } + bool EndsWithControlFlowInstruction() const; bool EndsWithIf() const; bool HasSinglePhi() const; @@ -1908,8 +1926,6 @@ class HConstant : public HExpression<0> { virtual bool IsZero() const { return false; } virtual bool IsOne() const { return false; } - static HConstant* NewConstant(ArenaAllocator* allocator, Primitive::Type type, int64_t val); - DECLARE_INSTRUCTION(Constant); private: @@ -1918,8 +1934,6 @@ class HConstant : public HExpression<0> { class HFloatConstant : public HConstant { public: - explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {} - float GetValue() const { return value_; } bool InstructionDataEquals(HInstruction* other) const OVERRIDE { @@ -1944,15 +1958,19 @@ class HFloatConstant : public HConstant { DECLARE_INSTRUCTION(FloatConstant); private: + explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {} + const float value_; + // Only the SsaBuilder can currently create floating-point constants. If we + // ever need to create them later in the pipeline, we will have to handle them + // the same way as integral constants. + friend class SsaBuilder; DISALLOW_COPY_AND_ASSIGN(HFloatConstant); }; class HDoubleConstant : public HConstant { public: - explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {} - double GetValue() const { return value_; } bool InstructionDataEquals(HInstruction* other) const OVERRIDE { @@ -1977,15 +1995,19 @@ class HDoubleConstant : public HConstant { DECLARE_INSTRUCTION(DoubleConstant); private: + explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {} + const double value_; + // Only the SsaBuilder can currently create floating-point constants. If we + // ever need to create them later in the pipeline, we will have to handle them + // the same way as integral constants. + friend class SsaBuilder; DISALLOW_COPY_AND_ASSIGN(HDoubleConstant); }; class HNullConstant : public HConstant { public: - HNullConstant() : HConstant(Primitive::kPrimNot) {} - bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; } @@ -1997,6 +2019,9 @@ class HNullConstant : public HConstant { DECLARE_INSTRUCTION(NullConstant); private: + HNullConstant() : HConstant(Primitive::kPrimNot) {} + + friend class HGraph; DISALLOW_COPY_AND_ASSIGN(HNullConstant); }; @@ -2004,8 +2029,6 @@ class HNullConstant : public HConstant { // synthesized (for example with the if-eqz instruction). class HIntConstant : public HConstant { public: - explicit HIntConstant(int32_t value) : HConstant(Primitive::kPrimInt), value_(value) {} - int32_t GetValue() const { return value_; } bool InstructionDataEquals(HInstruction* other) const OVERRIDE { @@ -2026,15 +2049,18 @@ class HIntConstant : public HConstant { DECLARE_INSTRUCTION(IntConstant); private: + explicit HIntConstant(int32_t value) : HConstant(Primitive::kPrimInt), value_(value) {} + const int32_t value_; + friend class HGraph; + ART_FRIEND_TEST(GraphTest, InsertInstructionBefore); + ART_FRIEND_TEST(ParallelMoveTest, ConstantLast); DISALLOW_COPY_AND_ASSIGN(HIntConstant); }; class HLongConstant : public HConstant { public: - explicit HLongConstant(int64_t value) : HConstant(Primitive::kPrimLong), value_(value) {} - int64_t GetValue() const { return value_; } bool InstructionDataEquals(HInstruction* other) const OVERRIDE { @@ -2050,8 +2076,11 @@ class HLongConstant : public HConstant { DECLARE_INSTRUCTION(LongConstant); private: + explicit HLongConstant(int64_t value) : HConstant(Primitive::kPrimLong), value_(value) {} + const int64_t value_; + friend class HGraph; DISALLOW_COPY_AND_ASSIGN(HLongConstant); }; @@ -3086,7 +3115,7 @@ class HLoadString : public HExpression<0> { class HClinitCheck : public HExpression<1> { public: explicit HClinitCheck(HLoadClass* constant, uint32_t dex_pc) - : HExpression(Primitive::kPrimNot, SideEffects::All()), + : HExpression(Primitive::kPrimNot, SideEffects::ChangesSomething()), dex_pc_(dex_pc) { SetRawInputAt(0, constant); } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 5ce73baef2..b2f9c65153 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -583,8 +583,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, if (method != nullptr) { return method; } - return delegate_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx, - class_loader, dex_file); + method = delegate_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx, + class_loader, dex_file); + + if (method != nullptr) { + compilation_stats_.RecordStat(MethodCompilationStat::kCompiledQuick); + } + return method; } Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 22ec2a5167..b97a66719d 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -28,6 +28,7 @@ enum MethodCompilationStat { kAttemptCompilation = 0, kCompiledBaseline, kCompiledOptimized, + kCompiledQuick, kInlinedInvoke, kNotCompiledUnsupportedIsa, kNotCompiledPathological, @@ -65,16 +66,22 @@ class OptimizingCompilerStats { compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation]; size_t optimized_percent = compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation]; + size_t quick_percent = + compile_stats_[kCompiledQuick] * 100 / compile_stats_[kAttemptCompilation]; std::ostringstream oss; - oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: " - << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, " - << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized."; + oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: "; + + oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, "; + oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, "; + oss << quick_percent << "% (" << compile_stats_[kCompiledQuick] << ") quick."; + + LOG(INFO) << oss.str(); + for (int i = 0; i < kLastStat; i++) { if (compile_stats_[i] != 0) { - oss << "\n" << PrintMethodCompilationStat(i) << ": " << compile_stats_[i]; + VLOG(compiler) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i]; } } - LOG(INFO) << oss.str(); } } @@ -84,6 +91,7 @@ class OptimizingCompilerStats { case kAttemptCompilation : return "kAttemptCompilation"; case kCompiledBaseline : return "kCompiledBaseline"; case kCompiledOptimized : return "kCompiledOptimized"; + case kCompiledQuick : return "kCompiledQuick"; case kInlinedInvoke : return "kInlinedInvoke"; case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; case kNotCompiledPathological : return "kNotCompiledPathological"; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index cecc210cbf..cf38bd3f8c 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -213,7 +213,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); temp_intervals_.Add(interval); - interval->AddRange(position, position + 1); + interval->AddTempUse(instruction, i); unhandled_core_intervals_.Add(interval); break; } @@ -222,7 +222,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); temp_intervals_.Add(interval); - interval->AddRange(position, position + 1); + interval->AddTempUse(instruction, i); if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { interval->AddHighInterval(true); LiveInterval* high = interval->GetHighInterval(); @@ -851,6 +851,23 @@ bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position return false; } +bool RegisterAllocator::PotentiallyRemoveOtherHalf(LiveInterval* interval, + GrowableArray<LiveInterval*>* intervals, + size_t index) { + if (interval->IsLowInterval()) { + DCHECK_EQ(intervals->Get(index), interval->GetHighInterval()); + intervals->DeleteAt(index); + return true; + } else if (interval->IsHighInterval()) { + DCHECK_GT(index, 0u); + DCHECK_EQ(intervals->Get(index - 1), interval->GetLowInterval()); + intervals->DeleteAt(index - 1); + return true; + } else { + return false; + } +} + // Find the register that is used the last, and spill the interval // that holds it. If the first use of `current` is after that register // we spill `current` instead. @@ -974,33 +991,17 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { if (active->GetRegister() == reg) { DCHECK(!active->IsFixed()); LiveInterval* split = Split(active, current->GetStart()); - active_.DeleteAt(i); if (split != active) { handled_.Add(active); } + active_.DeleteAt(i); + PotentiallyRemoveOtherHalf(active, &active_, i); AddSorted(unhandled_, split); - - if (active->IsLowInterval() || active->IsHighInterval()) { - LiveInterval* other_half = active->IsLowInterval() - ? active->GetHighInterval() - : active->GetLowInterval(); - // We also need to remove the other half from the list of actives. - bool found = false; - for (size_t j = 0; j < active_.Size(); ++j) { - if (active_.Get(j) == other_half) { - found = true; - active_.DeleteAt(j); - handled_.Add(other_half); - break; - } - } - DCHECK(found); - } break; } } - for (size_t i = 0, e = inactive_.Size(); i < e; ++i) { + for (size_t i = 0; i < inactive_.Size(); ++i) { LiveInterval* inactive = inactive_.Get(i); if (inactive->GetRegister() == reg) { if (!current->IsSplit() && !inactive->IsFixed()) { @@ -1024,29 +1025,14 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // If it's inactive, it must start before the current interval. DCHECK_NE(split, inactive); inactive_.DeleteAt(i); + if (PotentiallyRemoveOtherHalf(inactive, &inactive_, i) && inactive->IsHighInterval()) { + // We have removed an entry prior to `inactive`. So we need to decrement. + --i; + } + // Decrement because we have removed `inactive` from the list. --i; - --e; handled_.Add(inactive); AddSorted(unhandled_, split); - - if (inactive->IsLowInterval() || inactive->IsHighInterval()) { - LiveInterval* other_half = inactive->IsLowInterval() - ? inactive->GetHighInterval() - : inactive->GetLowInterval(); - - // We also need to remove the other half from the list of inactives. - bool found = false; - for (size_t j = 0; j < inactive_.Size(); ++j) { - if (inactive_.Get(j) == other_half) { - found = true; - inactive_.DeleteAt(j); - --e; - handled_.Add(other_half); - break; - } - } - DCHECK(found); - } } } } @@ -1695,8 +1681,6 @@ void RegisterAllocator::Resolve() { } // Assign temp locations. - HInstruction* current = nullptr; - size_t temp_index = 0; for (size_t i = 0; i < temp_intervals_.Size(); ++i) { LiveInterval* temp = temp_intervals_.Get(i); if (temp->IsHighInterval()) { @@ -1704,25 +1688,20 @@ void RegisterAllocator::Resolve() { continue; } HInstruction* at = liveness_.GetTempUser(temp); - if (at != current) { - temp_index = 0; - current = at; - } + size_t temp_index = liveness_.GetTempIndex(temp); LocationSummary* locations = at->GetLocations(); switch (temp->GetType()) { case Primitive::kPrimInt: - locations->SetTempAt( - temp_index++, Location::RegisterLocation(temp->GetRegister())); + locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister())); break; case Primitive::kPrimDouble: if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { Location location = Location::FpuRegisterPairLocation( temp->GetRegister(), temp->GetHighInterval()->GetRegister()); - locations->SetTempAt(temp_index++, location); + locations->SetTempAt(temp_index, location); } else { - locations->SetTempAt( - temp_index++, Location::FpuRegisterLocation(temp->GetRegister())); + locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister())); } break; diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index fcc61128a6..717be75533 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -144,6 +144,13 @@ class RegisterAllocator { size_t first_register_use, size_t* next_use); + // If `interval` has another half, remove it from the list of `intervals`. + // `index` holds the index at which `interval` is in `intervals`. + // Returns whether there is another half. + bool PotentiallyRemoveOtherHalf(LiveInterval* interval, + GrowableArray<LiveInterval*>* intervals, + size_t index); + ArenaAllocator* const allocator_; CodeGenerator* const codegen_; const SsaLivenessAnalysis& liveness_; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 7a2d84b056..7c3a0357d6 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -644,11 +644,10 @@ static HGraph* BuildTwoSubs(ArenaAllocator* allocator, graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimInt); - HInstruction* constant1 = new (allocator) HIntConstant(0); - HInstruction* constant2 = new (allocator) HIntConstant(0); entry->AddInstruction(parameter); - entry->AddInstruction(constant1); - entry->AddInstruction(constant2); + + HInstruction* constant1 = graph->GetIntConstant(1); + HInstruction* constant2 = graph->GetIntConstant(2); HBasicBlock* block = new (allocator) HBasicBlock(graph); graph->AddBlock(block); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index ae6bf16f77..fcc4e69b37 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -353,7 +353,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { * is used for floating point operations. We create a floating-point equivalent * constant to make the operations correctly typed. */ -static HFloatConstant* GetFloatEquivalent(HIntConstant* constant) { +HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) { // We place the floating point constant next to this constant. HFloatConstant* result = constant->GetNext()->AsFloatConstant(); if (result == nullptr) { @@ -375,7 +375,7 @@ static HFloatConstant* GetFloatEquivalent(HIntConstant* constant) { * is used for floating point operations. We create a floating-point equivalent * constant to make the operations correctly typed. */ -static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant) { +HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) { // We place the floating point constant next to this constant. HDoubleConstant* result = constant->GetNext()->AsDoubleConstant(); if (result == nullptr) { @@ -398,7 +398,7 @@ static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant) { * floating point registers and core registers), we need to create a copy of the * phi with a floating point / reference type. */ -static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) { +HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) { // We place the floating point /reference phi next to this phi. HInstruction* next = phi->GetNext(); if (next != nullptr diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 24dc449513..569b3e2223 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -85,6 +85,10 @@ class SsaBuilder : public HGraphVisitor { static constexpr const char* kSsaBuilderPassName = "ssa_builder"; private: + static HFloatConstant* GetFloatEquivalent(HIntConstant* constant); + static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant); + static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); + // Locals for the current block being visited. HEnvironment* current_locals_; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 56ccd717cf..0f3973e5fb 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -318,6 +318,8 @@ static int RegisterOrLowRegister(Location location) { int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { DCHECK(!IsHighInterval()); + if (IsTemp()) return kNoRegister; + if (GetParent() == this && defined_by_ != nullptr) { // This is the first interval for the instruction. Try to find // a register based on its definition. diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index b57029d1a7..bc78dc2e76 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -180,6 +180,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // This interval is the result of a split. bool IsSplit() const { return parent_ != this; } + void AddTempUse(HInstruction* instruction, size_t temp_index) { + DCHECK(IsTemp()); + DCHECK(first_use_ == nullptr) << "A temporary can only have one user"; + size_t position = instruction->GetLifetimePosition(); + first_use_ = new (allocator_) UsePosition( + instruction, temp_index, /* is_environment */ false, position, first_use_); + AddRange(position, position + 1); + } + void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) { // Set the use within the instruction. size_t position = instruction->GetLifetimePosition() + 1; @@ -856,7 +865,15 @@ class SsaLivenessAnalysis : public ValueObject { HInstruction* GetTempUser(LiveInterval* temp) const { // A temporary shares the same lifetime start as the instruction that requires it. DCHECK(temp->IsTemp()); - return GetInstructionFromPosition(temp->GetStart() / 2); + HInstruction* user = GetInstructionFromPosition(temp->GetStart() / 2); + DCHECK_EQ(user, temp->GetFirstUse()->GetUser()); + return user; + } + + size_t GetTempIndex(LiveInterval* temp) const { + // We use the input index to store the index of the temporary in the user's temporary list. + DCHECK(temp->IsTemp()); + return temp->GetFirstUse()->GetInputIndex(); } size_t GetMaxLifetimePosition() const { diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 5818a37a46..b77e60471b 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -27,6 +27,32 @@ namespace art { +// Helper to build art::StackMapStream::LocationCatalogEntriesIndices. +class LocationCatalogEntriesIndicesEmptyFn { + public: + void MakeEmpty(std::pair<DexRegisterLocation, size_t>& item) const { + item.first = DexRegisterLocation::None(); + } + bool IsEmpty(const std::pair<DexRegisterLocation, size_t>& item) const { + return item.first == DexRegisterLocation::None(); + } +}; + +// Hash function for art::StackMapStream::LocationCatalogEntriesIndices. +// This hash function does not create collisions. +class DexRegisterLocationHashFn { + public: + size_t operator()(DexRegisterLocation key) const { + // Concatenate `key`s fields to create a 64-bit value to be hashed. + int64_t kind_and_value = + (static_cast<int64_t>(key.kind_) << 32) | static_cast<int64_t>(key.value_); + return inner_hash_fn_(kind_and_value); + } + private: + std::hash<int64_t> inner_hash_fn_; +}; + + /** * Collects and builds stack maps for a method. All the stack maps * for a method are placed in a CodeInfo object. @@ -36,6 +62,7 @@ class StackMapStream : public ValueObject { explicit StackMapStream(ArenaAllocator* allocator) : allocator_(allocator), stack_maps_(allocator, 10), + location_catalog_entries_(allocator, 4), dex_register_locations_(allocator, 10 * 4), inline_infos_(allocator, 2), stack_mask_max_(-1), @@ -111,6 +138,7 @@ class StackMapStream : public ValueObject { size_t ComputeNeededSize() { size_t size = CodeInfo::kFixedSize + + ComputeDexRegisterLocationCatalogSize() + ComputeStackMapsSize() + ComputeDexRegisterMapsSize() + ComputeInlineInfoSize(); @@ -131,21 +159,39 @@ class StackMapStream : public ValueObject { native_pc_offset_max_); } - // Compute the size of the Dex register map of `entry`. + // Compute the size of the Dex register location catalog of `entry`. + size_t ComputeDexRegisterLocationCatalogSize() const { + size_t size = DexRegisterLocationCatalog::kFixedSize; + for (size_t location_catalog_entry_index = 0; + location_catalog_entry_index < location_catalog_entries_.Size(); + ++location_catalog_entry_index) { + DexRegisterLocation dex_register_location = + location_catalog_entries_.Get(location_catalog_entry_index); + size += DexRegisterLocationCatalog::EntrySize(dex_register_location); + } + return size; + } + size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const { + // Size of the map in bytes. size_t size = DexRegisterMap::kFixedSize; - // Add the bit mask for the dex register liveness. - size += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers); - for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; + // Add the live bit mask for the Dex register liveness. + size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers); + // Compute the size of the set of live Dex register entries. + size_t number_of_live_dex_registers = 0; + for (size_t dex_register_number = 0; dex_register_number < entry.num_dex_registers; ++dex_register_number) { if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { - DexRegisterLocation dex_register_location = dex_register_locations_.Get( - entry.dex_register_locations_start_index + index_in_dex_register_locations); - size += DexRegisterMap::EntrySize(dex_register_location); - index_in_dex_register_locations++; + ++number_of_live_dex_registers; } } + size_t map_entries_size_in_bits = + DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size()) + * number_of_live_dex_registers; + size_t map_entries_size_in_bytes = + RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; + size += map_entries_size_in_bytes; return size; } @@ -168,8 +214,16 @@ class StackMapStream : public ValueObject { + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); } + size_t ComputeDexRegisterLocationCatalogStart() const { + return CodeInfo::kFixedSize; + } + + size_t ComputeStackMapsStart() const { + return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize(); + } + size_t ComputeDexRegisterMapsStart() { - return CodeInfo::kFixedSize + ComputeStackMapsSize(); + return ComputeStackMapsStart() + ComputeStackMapsSize(); } size_t ComputeInlineInfoStart() { @@ -198,7 +252,25 @@ class StackMapStream : public ValueObject { inline_info_size, dex_register_map_size, dex_pc_max_, native_pc_offset_max_); code_info.SetNumberOfStackMaps(stack_maps_.Size()); code_info.SetStackMaskSize(stack_mask_size); - DCHECK_EQ(code_info.StackMapsSize(), ComputeStackMapsSize()); + DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize()); + + // Set the Dex register location catalog. + code_info.SetNumberOfDexRegisterLocationCatalogEntries( + location_catalog_entries_.Size()); + MemoryRegion dex_register_location_catalog_region = region.Subregion( + ComputeDexRegisterLocationCatalogStart(), + ComputeDexRegisterLocationCatalogSize()); + DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); + // Offset in `dex_register_location_catalog` where to store the next + // register location. + size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; + for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) { + DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i); + dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); + location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); + } + // Ensure we reached the end of the Dex registers location_catalog. + DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size()); uintptr_t next_dex_register_map_offset = 0; uintptr_t next_inline_info_offset = 0; @@ -234,25 +306,25 @@ class StackMapStream : public ValueObject { stack_map.SetDexRegisterMapOffset( code_info, register_region.start() - dex_register_locations_region.start()); - // Offset in `dex_register_map` where to store the next register entry. - size_t offset = DexRegisterMap::kFixedSize; - dex_register_map.SetLiveBitMask(offset, - entry.num_dex_registers, - *entry.live_dex_registers_mask); - offset += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers); + // Set the live bit mask. + dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask); + + // Set the dex register location mapping data. for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; dex_register_number < entry.num_dex_registers; ++dex_register_number) { if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { - DexRegisterLocation dex_register_location = dex_register_locations_.Get( - entry.dex_register_locations_start_index + index_in_dex_register_locations); - dex_register_map.SetRegisterInfo(offset, dex_register_location); - offset += DexRegisterMap::EntrySize(dex_register_location); + size_t location_catalog_entry_index = + dex_register_locations_.Get(entry.dex_register_locations_start_index + + index_in_dex_register_locations); + dex_register_map.SetLocationCatalogEntryIndex( + index_in_dex_register_locations, + location_catalog_entry_index, + entry.num_dex_registers, + location_catalog_entries_.Size()); ++index_in_dex_register_locations; } } - // Ensure we reached the end of the Dex registers region. - DCHECK_EQ(offset, register_region.size()); } } @@ -282,12 +354,31 @@ class StackMapStream : public ValueObject { } void AddDexRegisterEntry(uint16_t dex_register, DexRegisterLocation::Kind kind, int32_t value) { + StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1); + DCHECK_LT(dex_register, entry.num_dex_registers); + if (kind != DexRegisterLocation::Kind::kNone) { // Ensure we only use non-compressed location kind at this stage. DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) << DexRegisterLocation::PrettyDescriptor(kind); - dex_register_locations_.Add(DexRegisterLocation(kind, value)); - StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1); + DexRegisterLocation location(kind, value); + + // Look for Dex register `location` in the location catalog (using the + // companion hash map of locations to indices). Use its index if it + // is already in the location catalog. If not, insert it (in the + // location catalog and the hash map) and use the newly created index. + auto it = location_catalog_entries_indices_.Find(location); + if (it != location_catalog_entries_indices_.end()) { + // Retrieve the index from the hash map. + dex_register_locations_.Add(it->second); + } else { + // Create a new entry in the location catalog and the hash map. + size_t index = location_catalog_entries_.Size(); + location_catalog_entries_.Add(location); + dex_register_locations_.Add(index); + location_catalog_entries_indices_.Insert(std::make_pair(location, index)); + } + entry.live_dex_registers_mask->SetBit(dex_register); entry.dex_register_map_hash += (1 << dex_register); entry.dex_register_map_hash += static_cast<uint32_t>(value); @@ -354,9 +445,9 @@ class StackMapStream : public ValueObject { return false; } if (a.live_dex_registers_mask->IsBitSet(i)) { - DexRegisterLocation a_loc = dex_register_locations_.Get( + size_t a_loc = dex_register_locations_.Get( a.dex_register_locations_start_index + index_in_dex_register_locations); - DexRegisterLocation b_loc = dex_register_locations_.Get( + size_t b_loc = dex_register_locations_.Get( b.dex_register_locations_start_index + index_in_dex_register_locations); if (a_loc != b_loc) { return false; @@ -369,7 +460,18 @@ class StackMapStream : public ValueObject { ArenaAllocator* allocator_; GrowableArray<StackMapEntry> stack_maps_; - GrowableArray<DexRegisterLocation> dex_register_locations_; + + // A catalog of unique [location_kind, register_value] pairs (per method). + GrowableArray<DexRegisterLocation> location_catalog_entries_; + // Map from Dex register location catalog entries to their indices in the + // location catalog. + typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn, + DexRegisterLocationHashFn> LocationCatalogEntriesIndices; + LocationCatalogEntriesIndices location_catalog_entries_indices_; + + // A set of concatenated maps of Dex register locations indices to + // `location_catalog_entries_`. + GrowableArray<size_t> dex_register_locations_; GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; uint32_t dex_pc_max_; @@ -380,10 +482,6 @@ class StackMapStream : public ValueObject { static constexpr uint32_t kNoSameDexMapFound = -1; - ART_FRIEND_TEST(StackMapTest, Test1); - ART_FRIEND_TEST(StackMapTest, Test2); - ART_FRIEND_TEST(StackMapTest, TestNonLiveDexRegisters); - DISALLOW_COPY_AND_ASSIGN(StackMapStream); }; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index e5a9790254..b9bf0165f3 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -31,6 +31,8 @@ static bool SameBits(MemoryRegion region, const BitVector& bit_vector) { return true; } +using Kind = DexRegisterLocation::Kind; + TEST(StackMapTest, Test1) { ArenaPool pool; ArenaAllocator arena(&pool); @@ -39,8 +41,8 @@ TEST(StackMapTest, Test1) { ArenaBitVector sp_mask(&arena, 0, false); size_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Short location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -51,6 +53,16 @@ TEST(StackMapTest, Test1) { ASSERT_EQ(0u, code_info.GetStackMaskSize()); ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(2u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + // The Dex register location catalog contains: + // - one 1-byte short Dex register location, and + // - one 5-byte large Dex register location. + size_t expected_location_catalog_size = 1u + 5u; + ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); + StackMap stack_map = code_info.GetStackMapAt(0); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); @@ -62,14 +74,40 @@ TEST(StackMapTest, Test1) { ASSERT_TRUE(SameBits(stack_mask, sp_mask)); ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(7u, dex_registers.Size()); - DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); - DexRegisterLocation location1 = dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind()); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask, and + // - one 1-byte set of location catalog entry indices composed of two 2-bit values. + size_t expected_dex_register_map_size = 1u + 1u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstant, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstantLargeValue, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(0u, index0); + ASSERT_EQ(1u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kInStack, location0.GetKind()); + ASSERT_EQ(Kind::kConstant, location1.GetKind()); + ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); + ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); @@ -86,8 +124,8 @@ TEST(StackMapTest, Test2) { sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. stream.AddInlineInfoEntry(42); stream.AddInlineInfoEntry(82); @@ -95,8 +133,8 @@ TEST(StackMapTest, Test2) { sp_mask2.SetBit(3); sp_mask1.SetBit(8); stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 18); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kInFpuRegister, 3); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 18); // Short location. + stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3); // Short location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -107,6 +145,16 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(1u, code_info.GetStackMaskSize()); ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(4u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + // The Dex register location catalog contains: + // - three 1-byte short Dex register locations, and + // - one 5-byte large Dex register location. + size_t expected_location_catalog_size = 3u * 1u + 5u; + ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); + // First stack map. { StackMap stack_map = code_info.GetStackMapAt(0); @@ -120,17 +168,40 @@ TEST(StackMapTest, Test2) { ASSERT_TRUE(SameBits(stack_mask, sp_mask1)); ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(7u, dex_registers.Size()); - DexRegisterLocation location0 = - dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); - DexRegisterLocation location1 = - dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind()); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask, and + // - one 1-byte set of location catalog entry indices composed of two 2-bit values. + size_t expected_dex_register_map_size = 1u + 1u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstant, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstantLargeValue, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(0u, index0); + ASSERT_EQ(1u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kInStack, location0.GetKind()); + ASSERT_EQ(Kind::kConstant, location1.GetKind()); + ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); + ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); @@ -154,17 +225,40 @@ TEST(StackMapTest, Test2) { ASSERT_TRUE(SameBits(stack_mask, sp_mask2)); ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(3u, dex_registers.Size()); - DexRegisterLocation location0 = - dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); - DexRegisterLocation location1 = - dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); - ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetInternalKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetInternalKind()); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask, and + // - one 1-byte set of location catalog entry indices composed of two 2-bit values. + size_t expected_dex_register_map_size = 1u + 1u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kInRegister, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInFpuRegister, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInRegister, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInFpuRegister, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(18, dex_register_map.GetMachineRegister(0, number_of_dex_registers, code_info)); + ASSERT_EQ(3, dex_register_map.GetMachineRegister(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(2u, index0); + ASSERT_EQ(3u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kInRegister, location0.GetKind()); + ASSERT_EQ(Kind::kInFpuRegister, location1.GetKind()); + ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind()); + ASSERT_EQ(Kind::kInFpuRegister, location1.GetInternalKind()); ASSERT_EQ(18, location0.GetValue()); ASSERT_EQ(3, location1.GetValue()); @@ -180,8 +274,8 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kNone, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kNone, 0); // No location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -189,14 +283,62 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { stream.FillIn(region); CodeInfo code_info(region); + ASSERT_EQ(0u, code_info.GetStackMaskSize()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(1u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + // The Dex register location catalog contains: + // - one 5-byte large Dex register location. + size_t expected_location_catalog_size = 5u; + ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); + ASSERT_EQ(0u, stack_map.GetDexPc(code_info)); + ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info)); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info)); + ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2); - ASSERT_EQ(DexRegisterLocation::Kind::kNone, - dex_registers.GetLocationKind(0, number_of_dex_registers)); - ASSERT_EQ(DexRegisterLocation::Kind::kConstant, - dex_registers.GetLocationKind(1, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers.GetConstant(1, number_of_dex_registers)); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask. + // No space is allocated for the sole location catalog entry index, as it is useless. + size_t expected_dex_register_map_size = 1u + 0u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kNone, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstant, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kNone, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstantLargeValue, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0); + ASSERT_EQ(0u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kNone, location0.GetKind()); + ASSERT_EQ(Kind::kConstant, location1.GetKind()); + ASSERT_EQ(Kind::kNone, location0.GetInternalKind()); + ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); + ASSERT_EQ(0, location0.GetValue()); + ASSERT_EQ(-2, location1.GetValue()); + ASSERT_FALSE(stack_map.HasInlineInfo(code_info)); } @@ -209,14 +351,21 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { StackMapStream stream(&arena); ArenaBitVector sp_mask(&arena, 0, false); - uint32_t number_of_dex_registers = 0xEA; + uint32_t number_of_dex_registers = 1024; + // Create the first stack map (and its Dex register map). stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - for (uint32_t i = 0; i < number_of_dex_registers - 9; ++i) { - stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0); + uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8; + for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) { + // Use two different Dex register locations to populate this map, + // as using a single value (in the whole CodeInfo object) would + // make this Dex register mapping data empty (see + // art::DexRegisterMap::SingleEntrySizeInBits). + stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2); // Short location. } + // Create the second stack map (and its Dex register map). stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); for (uint32_t i = 0; i < number_of_dex_registers; ++i) { - stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0); + stream.AddDexRegisterEntry(i, Kind::kConstant, 0); // Short location. } size_t size = stream.ComputeNeededSize(); @@ -225,10 +374,36 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { stream.FillIn(region); CodeInfo code_info(region); - StackMap stack_map = code_info.GetStackMapAt(1); - ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - ASSERT_NE(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap); - ASSERT_EQ(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMapSmallEncoding); + // The location catalog contains two entries (DexRegisterLocation(kConstant, 0) + // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index + // has a size of 1 bit. + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(2u, number_of_location_catalog_entries); + ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_location_catalog_entries)); + + // The first Dex register map contains: + // - a live register bit mask for 1024 registers (that is, 128 bytes of + // data); and + // - Dex register mapping information for 1016 1-bit Dex (live) register + // locations (that is, 127 bytes of data). + // Hence it has a size of 255 bytes, and therefore... + ASSERT_EQ(128u, DexRegisterMap::GetLiveBitMaskSize(number_of_dex_registers)); + StackMap stack_map0 = code_info.GetStackMapAt(0); + DexRegisterMap dex_register_map0 = + code_info.GetDexRegisterMapOf(stack_map0, number_of_dex_registers); + ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers, + number_of_location_catalog_entries)); + ASSERT_EQ(255u, dex_register_map0.Size()); + + StackMap stack_map1 = code_info.GetStackMapAt(1); + ASSERT_TRUE(stack_map1.HasDexRegisterMap(code_info)); + // ...the offset of the second Dex register map (relative to the + // beginning of the Dex register maps region) is 255 (i.e., + // kNoDexRegisterMapSmallEncoding). + ASSERT_NE(StackMap::kNoDexRegisterMap, stack_map1.GetDexRegisterMapOffset(code_info)); + ASSERT_EQ(StackMap::kNoDexRegisterMapSmallEncoding, + stack_map1.GetDexRegisterMapOffset(code_info)); } TEST(StackMapTest, TestShareDexRegisterMap) { @@ -240,16 +415,16 @@ TEST(StackMapTest, TestShareDexRegisterMap) { uint32_t number_of_dex_registers = 2; // First stack map. stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. // Second stack map, which should share the same dex register map. stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. // Third stack map (doesn't share the dex register map). stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 2); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 2); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -260,20 +435,20 @@ TEST(StackMapTest, TestShareDexRegisterMap) { // Verify first stack map. StackMap sm0 = ci.GetStackMapAt(0); DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, number_of_dex_registers); - ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers)); + ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers, ci)); + ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers, ci)); // Verify second stack map. StackMap sm1 = ci.GetStackMapAt(1); DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, number_of_dex_registers); - ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers)); + ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers, ci)); + ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers, ci)); // Verify third stack map. StackMap sm2 = ci.GetStackMapAt(2); DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, number_of_dex_registers); - ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers)); + ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers, ci)); + ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci)); // Verify dex register map offsets. ASSERT_EQ(sm0.GetDexRegisterMapOffset(ci), sm1.GetDexRegisterMapOffset(ci)); @@ -281,4 +456,39 @@ TEST(StackMapTest, TestShareDexRegisterMap) { ASSERT_NE(sm1.GetDexRegisterMapOffset(ci), sm2.GetDexRegisterMapOffset(ci)); } +TEST(StackMapTest, TestNoDexRegisterMap) { + ArenaPool pool; + ArenaAllocator arena(&pool); + StackMapStream stream(&arena); + + ArenaBitVector sp_mask(&arena, 0, false); + uint32_t number_of_dex_registers = 0; + stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + + size_t size = stream.ComputeNeededSize(); + void* memory = arena.Alloc(size, kArenaAllocMisc); + MemoryRegion region(memory, size); + stream.FillIn(region); + + CodeInfo code_info(region); + ASSERT_EQ(0u, code_info.GetStackMaskSize()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(0u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + ASSERT_EQ(0u, location_catalog.Size()); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); + ASSERT_EQ(0u, stack_map.GetDexPc(code_info)); + ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info)); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info)); + + ASSERT_FALSE(stack_map.HasDexRegisterMap(code_info)); + ASSERT_FALSE(stack_map.HasInlineInfo(code_info)); +} + } // namespace art |