diff options
Diffstat (limited to 'compiler/optimizing')
30 files changed, 1671 insertions, 518 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index bcc32403d3..cca0baf274 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1169,8 +1169,10 @@ class BCEVisitor : public HGraphVisitor { // Return the range resulting from induction variable analysis of "instruction" when the value // is used from "context", for example, an index used from a bounds-check inside a loop body. ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) { - InductionVarRange::Value v1 = induction_range_.GetMinInduction(context, instruction); - InductionVarRange::Value v2 = induction_range_.GetMaxInduction(context, instruction); + InductionVarRange::Value v1; + InductionVarRange::Value v2; + bool needs_finite_test = false; + induction_range_.GetInductionRange(context, instruction, &v1, &v2, &needs_finite_test); if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) && v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index ed193c7b61..167c35d075 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -359,18 +359,10 @@ void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item) // need a strategy for splitting exceptional edges. We split the block // after the move-exception (if present) and mark the first part not // throwing. The normal-flow edge between them will be split later. - HInstruction* first_insn = block->GetFirstInstruction(); - if (first_insn->IsLoadException()) { - // Catch block starts with a LoadException. Split the block after - // the StoreLocal and ClearException which must come after the load. - DCHECK(first_insn->GetNext()->IsStoreLocal()); - DCHECK(first_insn->GetNext()->GetNext()->IsClearException()); - throwing_block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext()); - } else { - // Catch block does not load the exception. Split at the beginning - // to create an empty catch block. - throwing_block = block->SplitBefore(first_insn); - } + throwing_block = block->SplitCatchBlockAfterMoveException(); + // Move-exception does not throw and the block has throwing insructions + // so it must have been possible to split it. + DCHECK(throwing_block != nullptr); } try_block_info.Put(throwing_block->GetBlockId(), @@ -1006,7 +998,9 @@ bool HGraphBuilder::SetupInvokeArguments(HInvoke* invoke, return false; } - if (invoke->IsInvokeStaticOrDirect()) { + if (invoke->IsInvokeStaticOrDirect() && + HInvokeStaticOrDirect::NeedsCurrentMethodInput( + invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) { invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod()); (*argument_index)++; } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index a1bb5e0838..ce92470868 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -42,7 +42,7 @@ #include "compiled_method.h" #include "dex/verified_method.h" -#include "driver/dex_compilation_unit.h" +#include "driver/compiler_driver.h" #include "gc_map_builder.h" #include "graph_visualizer.h" #include "intrinsics.h" @@ -787,9 +787,10 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph, } void CodeGenerator::BuildNativeGCMap( - ArenaVector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const { + ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const { const std::vector<uint8_t>& gc_map_raw = - dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap(); + compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx()) + ->GetDexGcMap(); verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]); uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset(); @@ -911,19 +912,22 @@ void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const { vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); } -void CodeGenerator::BuildStackMaps(ArenaVector<uint8_t>* data) { - uint32_t size = stack_map_stream_.PrepareForFillIn(); - data->resize(size); - MemoryRegion region(data->data(), size); +size_t CodeGenerator::ComputeStackMapsSize() { + return stack_map_stream_.PrepareForFillIn(); +} + +void CodeGenerator::BuildStackMaps(MemoryRegion region) { stack_map_stream_.FillIn(region); } void CodeGenerator::RecordNativeDebugInfo(uint32_t dex_pc, uintptr_t native_pc_begin, uintptr_t native_pc_end) { - if (src_map_ != nullptr && dex_pc != kNoDexPc && native_pc_begin != native_pc_end) { - src_map_->push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin), - static_cast<int32_t>(dex_pc)})); + if (compiler_options_.GetGenerateDebugInfo() && + dex_pc != kNoDexPc && + native_pc_begin != native_pc_end) { + src_map_.push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin), + static_cast<int32_t>(dex_pc)})); } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 47b6f30450..a92014dc79 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -22,6 +22,7 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/bit_field.h" +#include "compiled_method.h" #include "driver/compiler_options.h" #include "globals.h" #include "graph_visualizer.h" @@ -51,13 +52,9 @@ static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); class Assembler; class CodeGenerator; -class DexCompilationUnit; +class CompilerDriver; class LinkerPatch; class ParallelMoveResolver; -class SrcMapElem; -template <class Alloc> -class SrcMap; -using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; class CodeAllocator { public: @@ -284,13 +281,12 @@ class CodeGenerator { slow_paths_.push_back(slow_path); } - void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; } - void BuildMappingTable(ArenaVector<uint8_t>* vector) const; void BuildVMapTable(ArenaVector<uint8_t>* vector) const; void BuildNativeGCMap( - ArenaVector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; - void BuildStackMaps(ArenaVector<uint8_t>* vector); + ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const; + void BuildStackMaps(MemoryRegion region); + size_t ComputeStackMapsSize(); bool IsBaseline() const { return is_baseline_; @@ -446,6 +442,10 @@ class CodeGenerator { // Copy the result of a call into the given target. virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; + const ArenaVector<SrcMapElem>& GetSrcMappingTable() const { + return src_map_; + } + protected: // Method patch info used for recording locations of required linker patches and // target methods. The target method can be used for various purposes, whether for @@ -488,7 +488,7 @@ class CodeGenerator { stats_(stats), graph_(graph), compiler_options_(compiler_options), - src_map_(nullptr), + src_map_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), current_block_index_(0), is_leaf_(true), @@ -602,7 +602,7 @@ class CodeGenerator { const CompilerOptions& compiler_options_; // Native to dex_pc map used for native debugging/profiling tools. - DefaultSrcMap* src_map_; + ArenaVector<SrcMapElem> src_map_; ArenaVector<SlowPathCode*> slow_paths_; // The current block index in `block_order_` of the block diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3dc3b7fba0..6d05293277 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1300,20 +1300,29 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio DCHECK_EQ(cond_value, 0); } } else { - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - // Condition has been materialized, compare the output to 0 + // Can we optimize the jump if we know that the next block is the true case? + HCondition* condition = cond->AsCondition(); + bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition); + if (condition == nullptr || condition->NeedsMaterialization()) { + // Condition has been materialized, compare the output to 0. DCHECK(instruction->GetLocations()->InAt(0).IsRegister()); + if (can_jump_to_false) { + __ CompareAndBranchIfZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(), + false_target); + return; + } __ CompareAndBranchIfNonZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(), true_target); } else { // Condition has not been materialized, use its inputs as the // comparison and its condition as the branch condition. - Primitive::Type type = - cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; + Primitive::Type type = (condition != nullptr) + ? cond->InputAt(0)->GetType() + : Primitive::kPrimInt; // Is this a long or FP comparison that has been folded into the HCondition? if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { // Generate the comparison directly. - GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(), + GenerateCompareTestAndBranch(instruction->AsIf(), condition, true_target, false_target, always_true_target); return; } @@ -1328,7 +1337,12 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio DCHECK(right.IsConstant()); GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); } - __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition())); + if (can_jump_to_false) { + __ b(false_target, ARMCondition(condition->GetOppositeCondition())); + return; + } + + __ b(true_target, ARMCondition(condition->GetCondition())); } } if (false_target != nullptr) { diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 8106499c02..959adb4238 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -40,12 +40,8 @@ static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; // We need extra temporary/scratch registers (in addition to AT) in some cases. -static constexpr Register TMP = T8; static constexpr FRegister FTMP = F8; -// ART Thread Register. -static constexpr Register TR = S1; - Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 55efd5f9de..b36a04216d 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -16,13 +16,13 @@ #include "code_generator_mips64.h" +#include "art_method.h" +#include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_mips64.h" -#include "art_method.h" -#include "code_generator_utils.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "offsets.h" @@ -666,9 +666,19 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, gpr = destination.AsRegister<GpuRegister>(); } if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) { - __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant())); + int32_t value = GetInt32ValueOf(source.GetConstant()->AsConstant()); + if (Primitive::IsFloatingPointType(dst_type) && value == 0) { + gpr = ZERO; + } else { + __ LoadConst32(gpr, value); + } } else { - __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant())); + int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant()); + if (Primitive::IsFloatingPointType(dst_type) && value == 0) { + gpr = ZERO; + } else { + __ LoadConst64(gpr, value); + } } if (dst_type == Primitive::kPrimFloat) { __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>()); @@ -734,12 +744,22 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, // Move to stack from constant HConstant* src_cst = source.GetConstant(); StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword; + GpuRegister gpr = ZERO; if (destination.IsStackSlot()) { - __ LoadConst32(TMP, GetInt32ValueOf(src_cst->AsConstant())); + int32_t value = GetInt32ValueOf(src_cst->AsConstant()); + if (value != 0) { + gpr = TMP; + __ LoadConst32(gpr, value); + } } else { - __ LoadConst64(TMP, GetInt64ValueOf(src_cst->AsConstant())); + DCHECK(destination.IsDoubleStackSlot()); + int64_t value = GetInt64ValueOf(src_cst->AsConstant()); + if (value != 0) { + gpr = TMP; + __ LoadConst64(gpr, value); + } } - __ StoreToOffset(store_type, TMP, SP, destination.GetStackIndex()); + __ StoreToOffset(store_type, gpr, SP, destination.GetStackIndex()); } else { DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot()); @@ -755,9 +775,7 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, } } -void CodeGeneratorMIPS64::SwapLocations(Location loc1, - Location loc2, - Primitive::Type type ATTRIBUTE_UNUSED) { +void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive::Type type) { DCHECK(!loc1.IsConstant()); DCHECK(!loc2.IsConstant()); @@ -781,12 +799,16 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1, // Swap 2 FPRs FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>(); FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>(); - // TODO: Can MOV.S/MOV.D be used here to save one instruction? - // Need to distinguish float from double, right? - __ Dmfc1(TMP, r2); - __ Dmfc1(AT, r1); - __ Dmtc1(TMP, r1); - __ Dmtc1(AT, r2); + if (type == Primitive::kPrimFloat) { + __ MovS(FTMP, r1); + __ MovS(r1, r2); + __ MovS(r2, FTMP); + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + __ MovD(FTMP, r1); + __ MovD(r1, r2); + __ MovD(r2, FTMP); + } } else if (is_slot1 != is_slot2) { // Swap GPR/FPR and stack slot Location reg_loc = is_slot1 ? loc2 : loc1; @@ -800,7 +822,6 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1, reg_loc.AsFpuRegister<FpuRegister>(), SP, mem_loc.GetStackIndex()); - // TODO: review this MTC1/DMTC1 move if (mem_loc.IsStackSlot()) { __ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>()); } else { @@ -845,12 +866,22 @@ void CodeGeneratorMIPS64::Move(HInstruction* instruction, } else { DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot()); // Move to stack from constant + GpuRegister gpr = ZERO; if (location.IsStackSlot()) { - __ LoadConst32(TMP, GetInt32ValueOf(instruction->AsConstant())); - __ StoreToOffset(kStoreWord, TMP, SP, location.GetStackIndex()); + int32_t value = GetInt32ValueOf(instruction->AsConstant()); + if (value != 0) { + gpr = TMP; + __ LoadConst32(gpr, value); + } + __ StoreToOffset(kStoreWord, gpr, SP, location.GetStackIndex()); } else { - __ LoadConst64(TMP, instruction->AsLongConstant()->GetValue()); - __ StoreToOffset(kStoreDoubleword, TMP, SP, location.GetStackIndex()); + DCHECK(location.IsDoubleStackSlot()); + int64_t value = instruction->AsLongConstant()->GetValue(); + if (value != 0) { + gpr = TMP; + __ LoadConst64(gpr, value); + } + __ StoreToOffset(kStoreDoubleword, gpr, SP, location.GetStackIndex()); } } } else if (instruction->IsTemporary()) { @@ -1198,7 +1229,7 @@ void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } default: @@ -1707,7 +1738,7 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { switch (in_type) { case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -1736,8 +1767,18 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { case Primitive::kPrimLong: { GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - // TODO: more efficient (direct) comparison with a constant + Location rhs_location = locations->InAt(1); + bool use_imm = rhs_location.IsConstant(); + GpuRegister rhs = ZERO; + if (use_imm) { + int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant()); + if (value != 0) { + rhs = AT; + __ LoadConst64(rhs, value); + } + } else { + rhs = rhs_location.AsRegister<GpuRegister>(); + } __ Slt(TMP, lhs, rhs); __ Slt(dst, rhs, lhs); __ Subu(dst, dst, TMP); @@ -1902,6 +1943,252 @@ void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) { } } +void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + Primitive::Type type = instruction->GetResultType(); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ Move(out, ZERO); + } else { + if (imm == -1) { + if (type == Primitive::kPrimInt) { + __ Subu(out, ZERO, dividend); + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + __ Dsubu(out, ZERO, dividend); + } + } else if (out != dividend) { + __ Move(out, dividend); + } + } +} + +void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + Primitive::Type type = instruction->GetResultType(); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm)); + DCHECK(IsPowerOfTwo(abs_imm)); + int ctz_imm = CTZ(abs_imm); + + if (instruction->IsDiv()) { + if (type == Primitive::kPrimInt) { + if (ctz_imm == 1) { + // Fast path for division by +/-2, which is very common. + __ Srl(TMP, dividend, 31); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + } + __ Addu(out, dividend, TMP); + __ Sra(out, out, ctz_imm); + if (imm < 0) { + __ Subu(out, ZERO, out); + } + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + if (ctz_imm == 1) { + // Fast path for division by +/-2, which is very common. + __ Dsrl32(TMP, dividend, 31); + } else { + __ Dsra32(TMP, dividend, 31); + if (ctz_imm > 32) { + __ Dsrl(TMP, TMP, 64 - ctz_imm); + } else { + __ Dsrl32(TMP, TMP, 32 - ctz_imm); + } + } + __ Daddu(out, dividend, TMP); + if (ctz_imm < 32) { + __ Dsra(out, out, ctz_imm); + } else { + __ Dsra32(out, out, ctz_imm - 32); + } + if (imm < 0) { + __ Dsubu(out, ZERO, out); + } + } + } else { + if (type == Primitive::kPrimInt) { + if (ctz_imm == 1) { + // Fast path for modulo +/-2, which is very common. + __ Sra(TMP, dividend, 31); + __ Subu(out, dividend, TMP); + __ Andi(out, out, 1); + __ Addu(out, out, TMP); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + __ Addu(out, dividend, TMP); + if (IsUint<16>(abs_imm - 1)) { + __ Andi(out, out, abs_imm - 1); + } else { + __ Sll(out, out, 32 - ctz_imm); + __ Srl(out, out, 32 - ctz_imm); + } + __ Subu(out, out, TMP); + } + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + if (ctz_imm == 1) { + // Fast path for modulo +/-2, which is very common. + __ Dsra32(TMP, dividend, 31); + __ Dsubu(out, dividend, TMP); + __ Andi(out, out, 1); + __ Daddu(out, out, TMP); + } else { + __ Dsra32(TMP, dividend, 31); + if (ctz_imm > 32) { + __ Dsrl(TMP, TMP, 64 - ctz_imm); + } else { + __ Dsrl32(TMP, TMP, 32 - ctz_imm); + } + __ Daddu(out, dividend, TMP); + if (IsUint<16>(abs_imm - 1)) { + __ Andi(out, out, abs_imm - 1); + } else { + if (ctz_imm > 32) { + __ Dsll(out, out, 64 - ctz_imm); + __ Dsrl(out, out, 64 - ctz_imm); + } else { + __ Dsll32(out, out, 32 - ctz_imm); + __ Dsrl32(out, out, 32 - ctz_imm); + } + } + __ Dsubu(out, out, TMP); + } + } + } +} + +void InstructionCodeGeneratorMIPS64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + + Primitive::Type type = instruction->GetResultType(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type; + + int64_t magic; + int shift; + CalculateMagicAndShiftForDivRem(imm, + (type == Primitive::kPrimLong), + &magic, + &shift); + + if (type == Primitive::kPrimInt) { + __ LoadConst32(TMP, magic); + __ MuhR6(TMP, dividend, TMP); + + if (imm > 0 && magic < 0) { + __ Addu(TMP, TMP, dividend); + } else if (imm < 0 && magic > 0) { + __ Subu(TMP, TMP, dividend); + } + + if (shift != 0) { + __ Sra(TMP, TMP, shift); + } + + if (instruction->IsDiv()) { + __ Sra(out, TMP, 31); + __ Subu(out, TMP, out); + } else { + __ Sra(AT, TMP, 31); + __ Subu(AT, TMP, AT); + __ LoadConst32(TMP, imm); + __ MulR6(TMP, AT, TMP); + __ Subu(out, dividend, TMP); + } + } else { + __ LoadConst64(TMP, magic); + __ Dmuh(TMP, dividend, TMP); + + if (imm > 0 && magic < 0) { + __ Daddu(TMP, TMP, dividend); + } else if (imm < 0 && magic > 0) { + __ Dsubu(TMP, TMP, dividend); + } + + if (shift >= 32) { + __ Dsra32(TMP, TMP, shift - 32); + } else if (shift > 0) { + __ Dsra(TMP, TMP, shift); + } + + if (instruction->IsDiv()) { + __ Dsra32(out, TMP, 31); + __ Dsubu(out, TMP, out); + } else { + __ Dsra32(AT, TMP, 31); + __ Dsubu(AT, TMP, AT); + __ LoadConst64(TMP, imm); + __ Dmul(TMP, AT, TMP); + __ Dsubu(out, dividend, TMP); + } + } +} + +void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + Primitive::Type type = instruction->GetResultType(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type; + + LocationSummary* locations = instruction->GetLocations(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + Location second = locations->InAt(1); + + if (second.IsConstant()) { + int64_t imm = Int64FromConstant(second.GetConstant()); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (IsPowerOfTwo(std::abs(imm))) { + DivRemByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } + } else { + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister divisor = second.AsRegister<GpuRegister>(); + if (instruction->IsDiv()) { + if (type == Primitive::kPrimInt) + __ DivR6(out, dividend, divisor); + else + __ Ddiv(out, dividend, divisor); + } else { + if (type == Primitive::kPrimInt) + __ ModR6(out, dividend, divisor); + else + __ Dmod(out, dividend, divisor); + } + } +} + void LocationsBuilderMIPS64::VisitDiv(HDiv* div) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall); @@ -1909,7 +2196,7 @@ void LocationsBuilderMIPS64::VisitDiv(HDiv* div) { case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -1931,16 +2218,9 @@ void InstructionCodeGeneratorMIPS64::VisitDiv(HDiv* instruction) { switch (type) { case Primitive::kPrimInt: - case Primitive::kPrimLong: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - if (type == Primitive::kPrimInt) - __ DivR6(dst, lhs, rhs); - else - __ Ddiv(dst, lhs, rhs); + case Primitive::kPrimLong: + GenerateDivRemIntegral(instruction); break; - } case Primitive::kPrimFloat: case Primitive::kPrimDouble: { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); @@ -2512,10 +2792,12 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in // allocation of a register for the current method pointer like on x86 baseline. // TODO: remove this once all the issues with register saving/restoring are // sorted out. - LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); - if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + if (invoke->HasCurrentMethodInput()) { + LocationSummary* locations = invoke->GetLocations(); + Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); + if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { + locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + } } } @@ -2659,14 +2941,10 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } -void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - if (TryGenerateIntrinsicCode(invoke, codegen_)) { - return; - } - +void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); - GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>(); + GpuRegister temp = temp_location.AsRegister<GpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kMips64PointerSize).SizeValue(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -2675,13 +2953,21 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) // temp = object->GetClass(); DCHECK(receiver.IsRegister()); __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset); - codegen_->MaybeRecordImplicitNullCheck(invoke); + MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); +} + +void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + + codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -3108,7 +3394,7 @@ void LocationsBuilderMIPS64::VisitRem(HRem* rem) { case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -3128,20 +3414,12 @@ void LocationsBuilderMIPS64::VisitRem(HRem* rem) { void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { Primitive::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); switch (type) { case Primitive::kPrimInt: - case Primitive::kPrimLong: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - if (type == Primitive::kPrimInt) - __ ModR6(dst, lhs, rhs); - else - __ Dmod(dst, lhs, rhs); + case Primitive::kPrimLong: + GenerateDivRemIntegral(instruction); break; - } case Primitive::kPrimFloat: case Primitive::kPrimDouble: { diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 9bbd02759a..58c6e0fa83 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -230,6 +230,10 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { Label* true_target, Label* false_target, Label* always_true_target); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivRemByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); + void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); Mips64Assembler* const assembler_; @@ -333,10 +337,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { MethodReference target_method) OVERRIDE; void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED, - Location temp ATTRIBUTE_UNUSED) OVERRIDE { - UNIMPLEMENTED(FATAL); - } + void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc index 921c1d86c2..bf354e7ee2 100644 --- a/compiler/optimizing/code_generator_utils.cc +++ b/compiler/optimizing/code_generator_utils.cc @@ -15,6 +15,7 @@ */ #include "code_generator_utils.h" +#include "nodes.h" #include "base/logging.h" @@ -94,4 +95,19 @@ void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, *shift = is_long ? p - 64 : p - 32; } +// Is it valid to reverse the condition? Uses the values supplied to +// GenerateTestAndBranch() in instruction generators. +bool CanReverseCondition(Label* always_true_target, + Label* false_target, + HCondition* condition) { + // 'always_true_target' is null when the 'true' path is to the next + // block to be generated. Check the type of the condition to ensure that + // FP conditions are not swapped. This is for future fusing of HCompare and + // HCondition. + // Note: If the condition is nullptr, then it is always okay to reverse. + return always_true_target == nullptr && false_target != nullptr && + (condition == nullptr || + !Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())); +} + } // namespace art diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h index 59b495c2c9..628eee8885 100644 --- a/compiler/optimizing/code_generator_utils.h +++ b/compiler/optimizing/code_generator_utils.h @@ -21,10 +21,19 @@ namespace art { +class Label; +class HCondition; + // Computes the magic number and the shift needed in the div/rem by constant algorithm, as out // arguments `magic` and `shift` void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift); +// Is it valid to reverse the condition? Uses the values supplied to +// GenerateTestAndBranch() in instruction generators. +bool CanReverseCondition(Label* always_true_target, + Label* false_target, + HCondition* condition); + } // namespace art #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0df7e3b30a..8308d9ee20 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1216,16 +1216,21 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio DCHECK_EQ(cond_value, 0); } } else { + HCondition* condition = cond->AsCondition(); bool is_materialized = - !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization(); + condition == nullptr || condition->NeedsMaterialization(); // Moves do not affect the eflags register, so if the condition is // evaluated just before the if, we don't need to evaluate it // again. We can't use the eflags on long/FP conditions if they are // materialized due to the complex branching. - Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; - bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction) + Primitive::Type type = (condition != nullptr) + ? cond->InputAt(0)->GetType() + : Primitive::kPrimInt; + bool eflags_set = condition != nullptr + && condition->IsBeforeWhenDisregardMoves(instruction) && (type != Primitive::kPrimLong && !Primitive::IsFloatingPointType(type)); + // Can we optimize the jump if we know that the next block is the true case? + bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition); if (is_materialized) { if (!eflags_set) { // Materialized condition, compare against 0. @@ -1235,9 +1240,17 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio } else { __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); } + if (can_jump_to_false) { + __ j(kEqual, false_target); + return; + } __ j(kNotEqual, true_target); } else { - __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); + if (can_jump_to_false) { + __ j(X86Condition(condition->GetOppositeCondition()), false_target); + return; + } + __ j(X86Condition(condition->GetCondition()), true_target); } } else { // Condition has not been materialized, use its inputs as the @@ -1247,7 +1260,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { // Generate the comparison directly. GenerateCompareTestAndBranch(instruction->AsIf(), - cond->AsCondition(), + condition, true_target, false_target, always_true_target); @@ -1270,7 +1283,13 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio } else { __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); } - __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); + + if (can_jump_to_false) { + __ j(X86Condition(condition->GetOppositeCondition()), false_target); + return; + } + + __ j(X86Condition(condition->GetCondition()), true_target); } } if (false_target != nullptr) { @@ -4043,16 +4062,16 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI // Ensure the value is in a byte register. locations->SetInAt(1, Location::RegisterLocation(EAX)); } else if (Primitive::IsFloatingPointType(field_type)) { - locations->SetInAt(1, Location::RequiresFpuRegister()); - } else { + if (is_volatile && field_type == Primitive::kPrimDouble) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); + } + } else if (is_volatile && field_type == Primitive::kPrimLong) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. locations->SetInAt(1, Location::RequiresRegister()); - } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); - } else if (is_volatile && (field_type == Primitive::kPrimLong)) { + // 64bits value can be atomically written to an address with movsd and an XMM register. // We need two XMM registers because there's no easier way to (bit) copy a register pair // into a single XMM register (we copy each pair part into the XMMs and then interleave them). @@ -4060,6 +4079,15 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI // isolated cases when we need this it isn't worth adding the extra complexity. locations->AddTemp(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + // Temporary registers for the write barrier. + locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too. + // Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } } } @@ -4081,6 +4109,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } + bool maybe_record_implicit_null_check_done = false; + switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { @@ -4090,7 +4120,12 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimShort: case Primitive::kPrimChar: { - __ movw(Address(base, offset), value.AsRegister<Register>()); + if (value.IsConstant()) { + int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movw(Address(base, offset), Immediate(v)); + } else { + __ movw(Address(base, offset), value.AsRegister<Register>()); + } break; } @@ -4105,6 +4140,9 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, __ movl(temp, value.AsRegister<Register>()); __ PoisonHeapReference(temp); __ movl(Address(base, offset), temp); + } else if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(v)); } else { __ movl(Address(base, offset), value.AsRegister<Register>()); } @@ -4120,21 +4158,40 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, __ punpckldq(temp1, temp2); __ movsd(Address(base, offset), temp1); codegen_->MaybeRecordImplicitNullCheck(instruction); + } else if (value.IsConstant()) { + int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(Low32Bits(v))); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v))); } else { __ movl(Address(base, offset), value.AsRegisterPairLow<Register>()); codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); } + maybe_record_implicit_null_check_done = true; break; } case Primitive::kPrimFloat: { - __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(v)); + } else { + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } case Primitive::kPrimDouble: { - __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(Low32Bits(v))); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v))); + maybe_record_implicit_null_check_done = true; + } else { + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } @@ -4143,8 +4200,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } - // Longs are handled in the switch. - if (field_type != Primitive::kPrimLong) { + if (!maybe_record_implicit_null_check_done) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -4481,7 +4537,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Ensure the value is in a byte register. locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); } else if (Primitive::IsFloatingPointType(value_type)) { - locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); } else { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } @@ -4667,8 +4723,14 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<Register>(), TIMES_4, offset); - DCHECK(value.IsFpuRegister()); - __ movss(address, value.AsFpuRegister<XmmRegister>()); + if (value.IsFpuRegister()) { + __ movss(address, value.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(value.IsConstant()); + int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); + __ movl(address, Immediate(v)); + } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4677,8 +4739,19 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) : Address(array, index.AsRegister<Register>(), TIMES_8, offset); - DCHECK(value.IsFpuRegister()); - __ movsd(address, value.AsFpuRegister<XmmRegister>()); + if (value.IsFpuRegister()) { + __ movsd(address, value.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(value.IsConstant()); + Address address_hi = index.IsConstant() ? + Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + + offset + kX86WordSize) : + Address(array, index.AsRegister<Register>(), TIMES_8, offset + kX86WordSize); + int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); + __ movl(address, Immediate(Low32Bits(v))); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(address_hi, Immediate(High32Bits(v))); + } break; } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5218d70995..ee8a299c5e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1183,16 +1183,20 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc DCHECK_EQ(cond_value, 0); } } else { - bool is_materialized = - !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization(); + HCondition* condition = cond->AsCondition(); + bool is_materialized = condition == nullptr || condition->NeedsMaterialization(); // Moves do not affect the eflags register, so if the condition is // evaluated just before the if, we don't need to evaluate it // again. We can't use the eflags on FP conditions if they are // materialized due to the complex branching. - Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; - bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction) + Primitive::Type type = (condition != nullptr) + ? cond->InputAt(0)->GetType() + : Primitive::kPrimInt; + bool eflags_set = condition != nullptr + && condition->IsBeforeWhenDisregardMoves(instruction) && !Primitive::IsFloatingPointType(type); + // Can we optimize the jump if we know that the next block is the true case? + bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition); if (is_materialized) { if (!eflags_set) { @@ -1204,9 +1208,17 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); } + if (can_jump_to_false) { + __ j(kEqual, false_target); + return; + } __ j(kNotEqual, true_target); } else { - __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); + if (can_jump_to_false) { + __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); + return; + } + __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); } } else { // Condition has not been materialized, use its inputs as the @@ -1215,7 +1227,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc // Is this a long or FP comparison that has been folded into the HCondition? if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { // Generate the comparison directly. - GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(), + GenerateCompareTestAndBranch(instruction->AsIf(), condition, true_target, false_target, always_true_target); return; } @@ -1235,7 +1247,13 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } - __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); + + if (can_jump_to_false) { + __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); + return; + } + + __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); } } if (false_target != nullptr) { @@ -2562,7 +2580,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); // We can use a leaq or addq if the constant can fit in an immediate. - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -2682,7 +2700,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) { } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -3755,14 +3773,25 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); Primitive::Type field_type = field_info.GetFieldType(); + bool is_volatile = field_info.IsVolatile(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { - locations->SetInAt(1, Location::RequiresFpuRegister()); + if (is_volatile) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. + locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1))); + } else { + locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); + } } else { - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1))); + if (is_volatile) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. + locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1))); + } else { + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + } } if (needs_write_barrier) { // Temporary registers for the write barrier. @@ -3790,11 +3819,13 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } + bool maybe_record_implicit_null_check_done = false; + switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { if (value.IsConstant()) { - int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); __ movb(Address(base, offset), Immediate(v)); } else { __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); @@ -3805,7 +3836,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimShort: case Primitive::kPrimChar: { if (value.IsConstant()) { - int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); __ movw(Address(base, offset), Immediate(v)); } else { __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); @@ -3838,9 +3869,11 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimLong: { if (value.IsConstant()) { int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(v)); - int32_t v_32 = v; - __ movq(Address(base, offset), Immediate(v_32)); + codegen_->MoveInt64ToAddress(Address(base, offset), + Address(base, offset + sizeof(int32_t)), + v, + instruction); + maybe_record_implicit_null_check_done = true; } else { __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); } @@ -3848,12 +3881,28 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, } case Primitive::kPrimFloat: { - __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int32_t v = + bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); + __ movl(Address(base, offset), Immediate(v)); + } else { + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } case Primitive::kPrimDouble: { - __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int64_t v = + bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); + codegen_->MoveInt64ToAddress(Address(base, offset), + Address(base, offset + sizeof(int32_t)), + v, + instruction); + maybe_record_implicit_null_check_done = true; + } else { + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } @@ -3862,7 +3911,9 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); + if (!maybe_record_implicit_null_check_done) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); @@ -4170,13 +4221,9 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1))); - locations->SetInAt(2, Location::RequiresRegister()); - if (value_type == Primitive::kPrimLong) { - locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2))); - } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) { - locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (Primitive::IsFloatingPointType(value_type)) { + locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); } else { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } @@ -4330,13 +4377,15 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset); if (value.IsRegister()) { __ movq(address, value.AsRegister<CpuRegister>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); } else { int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(v)); - int32_t v_32 = v; - __ movq(address, Immediate(v_32)); + Address address_high = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + + offset + sizeof(int32_t)) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t)); + codegen_->MoveInt64ToAddress(address, address_high, v, instruction); } - codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4345,8 +4394,14 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); - DCHECK(value.IsFpuRegister()); - __ movss(address, value.AsFpuRegister<XmmRegister>()); + if (value.IsFpuRegister()) { + __ movss(address, value.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(value.IsConstant()); + int32_t v = + bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); + __ movl(address, Immediate(v)); + } codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4356,9 +4411,18 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset); - DCHECK(value.IsFpuRegister()); - __ movsd(address, value.AsFpuRegister<XmmRegister>()); - codegen_->MaybeRecordImplicitNullCheck(instruction); + if (value.IsFpuRegister()) { + __ movsd(address, value.AsFpuRegister<XmmRegister>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } else { + int64_t v = + bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); + Address address_high = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + + offset + sizeof(int32_t)) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t)); + codegen_->MoveInt64ToAddress(address, address_high, v, instruction); + } break; } @@ -5564,6 +5628,24 @@ Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) { return Address::RIP(table_fixup); } +void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low, + const Address& addr_high, + int64_t v, + HInstruction* instruction) { + if (IsInt<32>(v)) { + int32_t v_32 = v; + __ movq(addr_low, Immediate(v_32)); + MaybeRecordImplicitNullCheck(instruction); + } else { + // Didn't fit in a register. Do it in pieces. + int32_t low_v = Low32Bits(v); + int32_t high_v = High32Bits(v); + __ movl(addr_low, Immediate(low_v)); + MaybeRecordImplicitNullCheck(instruction); + __ movl(addr_high, Immediate(high_v)); + } +} + #undef __ } // namespace x86_64 diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index fc485f5bb6..7a52473408 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -368,6 +368,12 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. void Store64BitValueToStack(Location dest, int64_t value); + // Assign a 64 bit constant to an address. + void MoveInt64ToAddress(const Address& addr_low, + const Address& addr_high, + int64_t v, + HInstruction* instruction); + private: struct PcRelativeDexCacheAccessInfo { PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h new file mode 100644 index 0000000000..b459d24d7c --- /dev/null +++ b/compiler/optimizing/common_dominator.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_ +#define ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_ + +#include "nodes.h" + +namespace art { + +// Helper class for finding common dominators of two or more blocks in a graph. +// The domination information of a graph must not be modified while there is +// a CommonDominator object as it's internal state could become invalid. +class CommonDominator { + public: + // Convenience function to find the common dominator of 2 blocks. + static HBasicBlock* ForPair(HBasicBlock* block1, HBasicBlock* block2) { + CommonDominator finder(block1); + finder.Update(block2); + return finder.Get(); + } + + // Create a finder starting with a given block. + explicit CommonDominator(HBasicBlock* block) + : dominator_(block), chain_length_(ChainLength(block)) { + DCHECK(block != nullptr); + } + + // Update the common dominator with another block. + void Update(HBasicBlock* block) { + DCHECK(block != nullptr); + HBasicBlock* block2 = dominator_; + DCHECK(block2 != nullptr); + if (block == block2) { + return; + } + size_t chain_length = ChainLength(block); + size_t chain_length2 = chain_length_; + // Equalize the chain lengths + for ( ; chain_length > chain_length2; --chain_length) { + block = block->GetDominator(); + DCHECK(block != nullptr); + } + for ( ; chain_length2 > chain_length; --chain_length2) { + block2 = block2->GetDominator(); + DCHECK(block2 != nullptr); + } + // Now run up the chain until we hit the common dominator. + while (block != block2) { + --chain_length; + block = block->GetDominator(); + DCHECK(block != nullptr); + block2 = block2->GetDominator(); + DCHECK(block2 != nullptr); + } + dominator_ = block; + chain_length_ = chain_length; + } + + HBasicBlock* Get() const { + return dominator_; + } + + private: + static size_t ChainLength(HBasicBlock* block) { + size_t result = 0; + while (block != nullptr) { + ++result; + block = block->GetDominator(); + } + return result; + } + + HBasicBlock* dominator_; + size_t chain_length_; +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_ diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 3de96b5d84..0d7c796837 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -188,6 +188,21 @@ void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) { VisitInstruction(try_boundary); } +void GraphChecker::VisitLoadException(HLoadException* load) { + // Ensure that LoadException is the first instruction in a catch block. + if (!load->GetBlock()->IsCatchBlock()) { + AddError(StringPrintf("%s:%d is in a non-catch block %d.", + load->DebugName(), + load->GetId(), + load->GetBlock()->GetBlockId())); + } else if (load->GetBlock()->GetFirstInstruction() != load) { + AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.", + load->DebugName(), + load->GetId(), + load->GetBlock()->GetBlockId())); + } +} + void GraphChecker::VisitInstruction(HInstruction* instruction) { if (seen_ids_.IsBitSet(instruction->GetId())) { AddError(StringPrintf("Instruction id %d is duplicate in graph.", @@ -242,10 +257,11 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } size_t use_index = use_it.Current()->GetIndex(); if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) { - AddError(StringPrintf("User %s:%d of instruction %d has a wrong " + AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong " "UseListNode index.", use->DebugName(), use->GetId(), + instruction->DebugName(), instruction->GetId())); } } @@ -445,12 +461,18 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); HLoopInformation* loop_information = loop_header->GetLoopInformation(); - // Ensure the pre-header block is first in the list of - // predecessors of a loop header. + // Ensure the pre-header block is first in the list of predecessors of a loop + // header and that the header block is its only successor. if (!loop_header->IsLoopPreHeaderFirstPredecessor()) { AddError(StringPrintf( "Loop pre-header is not the first predecessor of the loop header %d.", id)); + } else if (loop_information->GetPreHeader()->GetSuccessors().size() != 1) { + AddError(StringPrintf( + "Loop pre-header %d of loop defined by header %d has %zu successors.", + loop_information->GetPreHeader()->GetBlockId(), + id, + loop_information->GetPreHeader()->GetSuccessors().size())); } // Ensure the loop header has only one incoming branch and the remaining @@ -493,6 +515,13 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { "Loop defined by header %d has an invalid back edge %d.", id, back_edge_id)); + } else if (back_edge->GetLoopInformation() != loop_information) { + AddError(StringPrintf( + "Back edge %d of loop defined by header %d belongs to nested loop " + "with header %d.", + back_edge_id, + id, + back_edge->GetLoopInformation()->GetHeader()->GetBlockId())); } } } @@ -531,10 +560,14 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { !use_it.Done(); use_it.Advance()) { HInstruction* use = use_it.Current()->GetUser(); if (!use->IsPhi() && !instruction->StrictlyDominates(use)) { - AddError(StringPrintf("Instruction %d in block %d does not dominate " - "use %d in block %d.", - instruction->GetId(), current_block_->GetBlockId(), - use->GetId(), use->GetBlock()->GetBlockId())); + AddError(StringPrintf("Instruction %s:%d in block %d does not dominate " + "use %s:%d in block %d.", + instruction->DebugName(), + instruction->GetId(), + current_block_->GetBlockId(), + use->DebugName(), + use->GetId(), + use->GetBlock()->GetBlockId())); } } diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index abf3659d91..d5ddbabc8c 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -50,6 +50,9 @@ class GraphChecker : public HGraphDelegateVisitor { // Check successors of blocks ending in TryBoundary. void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE; + // Check that LoadException is the first instruction in a catch block. + void VisitLoadException(HLoadException* load) OVERRIDE; + // Check that HCheckCast and HInstanceOf have HLoadClass as second input. void VisitCheckCast(HCheckCast* check) OVERRIDE; void VisitInstanceOf(HInstanceOf* check) OVERRIDE; diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index b7262f6b29..5de94f43c9 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -69,10 +69,13 @@ class InductionVarAnalysisTest : public testing::Test { entry_ = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(entry_); BuildForLoop(0, n); + return_ = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(return_); exit_ = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(exit_); entry_->AddSuccessor(loop_preheader_[0]); - loop_header_[0]->AddSuccessor(exit_); + loop_header_[0]->AddSuccessor(return_); + return_->AddSuccessor(exit_); graph_->SetEntryBlock(entry_); graph_->SetExitBlock(exit_); @@ -91,6 +94,7 @@ class InductionVarAnalysisTest : public testing::Test { entry_->AddInstruction(new (&allocator_) HStoreLocal(tmp_, constant100_)); dum_ = new (&allocator_) HLocal(n + 2); entry_->AddInstruction(dum_); + return_->AddInstruction(new (&allocator_) HReturnVoid()); exit_->AddInstruction(new (&allocator_) HExit()); // Provide loop instructions. @@ -177,6 +181,7 @@ class InductionVarAnalysisTest : public testing::Test { // Fixed basic blocks and instructions. HBasicBlock* entry_; + HBasicBlock* return_; HBasicBlock* exit_; HInstruction* parameter_; // "this" HInstruction* constant0_; diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 5530d261d2..b40ef5aa41 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -75,10 +75,12 @@ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) { return v; } -static HInstruction* Insert(HBasicBlock* preheader, HInstruction* instruction) { - DCHECK(preheader != nullptr); +/** Helper method to insert an instruction. */ +static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { + DCHECK(block != nullptr); + DCHECK(block->GetLastInstruction() != nullptr) << block->GetBlockId(); DCHECK(instruction != nullptr); - preheader->InsertInstructionBefore(instruction, preheader->GetLastInstruction()); + block->InsertInstructionBefore(instruction, block->GetLastInstruction()); return instruction; } @@ -91,48 +93,98 @@ InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis) DCHECK(induction_analysis != nullptr); } -InductionVarRange::Value InductionVarRange::GetMinInduction(HInstruction* context, - HInstruction* instruction) { - return GetInduction(context, instruction, /* is_min */ true); -} - -InductionVarRange::Value InductionVarRange::GetMaxInduction(HInstruction* context, - HInstruction* instruction) { - return SimplifyMax(GetInduction(context, instruction, /* is_min */ false)); +void InductionVarRange::GetInductionRange(HInstruction* context, + HInstruction* instruction, + /*out*/Value* min_val, + /*out*/Value* max_val, + /*out*/bool* needs_finite_test) { + HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop + if (loop != nullptr) { + // Set up loop information. + HBasicBlock* header = loop->GetHeader(); + bool in_body = context->GetBlock() != header; + HInductionVarAnalysis::InductionInfo* info = + induction_analysis_->LookupInfo(loop, instruction); + HInductionVarAnalysis::InductionInfo* trip = + induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); + // Find range. + *min_val = GetVal(info, trip, in_body, /* is_min */ true); + *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false)); + *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); + } else { + // No loop to analyze. + *min_val = Value(); + *max_val = Value(); + *needs_finite_test = false; + } } bool InductionVarRange::CanGenerateCode(HInstruction* context, HInstruction* instruction, - /*out*/bool* top_test) { - return GenerateCode(context, instruction, nullptr, nullptr, nullptr, nullptr, top_test); + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test) { + return GenerateCode(context, + instruction, + nullptr, nullptr, nullptr, nullptr, nullptr, // nothing generated yet + needs_finite_test, + needs_taken_test); } -bool InductionVarRange::GenerateCode(HInstruction* context, - HInstruction* instruction, - HGraph* graph, - HBasicBlock* block, - /*out*/HInstruction** lower, - /*out*/HInstruction** upper) { - return GenerateCode(context, instruction, graph, block, lower, upper, nullptr); +void InductionVarRange::GenerateRangeCode(HInstruction* context, + HInstruction* instruction, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** lower, + /*out*/HInstruction** upper) { + bool b1, b2; // unused + if (!GenerateCode(context, instruction, graph, block, lower, upper, nullptr, &b1, &b2)) { + LOG(FATAL) << "Failed precondition: GenerateCode()"; + } +} + +void InductionVarRange::GenerateTakenTest(HInstruction* context, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** taken_test) { + bool b1, b2; // unused + if (!GenerateCode(context, context, graph, block, nullptr, nullptr, taken_test, &b1, &b2)) { + LOG(FATAL) << "Failed precondition: GenerateCode()"; + } } // // Private class methods. // -InductionVarRange::Value InductionVarRange::GetInduction(HInstruction* context, - HInstruction* instruction, - bool is_min) { - HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop != nullptr) { - HBasicBlock* header = loop->GetHeader(); - bool in_body = context->GetBlock() != header; - return GetVal(induction_analysis_->LookupInfo(loop, instruction), - induction_analysis_->LookupInfo(loop, header->GetLastInstruction()), - in_body, - is_min); +bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) { + if (info != nullptr) { + if (info->induction_class == HInductionVarAnalysis::kLinear) { + return true; + } else if (info->induction_class == HInductionVarAnalysis::kWrapAround) { + return NeedsTripCount(info->op_b); + } } - return Value(); + return false; +} + +bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) { + if (trip != nullptr) { + if (trip->induction_class == HInductionVarAnalysis::kInvariant) { + return trip->operation == HInductionVarAnalysis::kTripCountInBody || + trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe; + } + } + return false; +} + +bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) { + if (trip != nullptr) { + if (trip->induction_class == HInductionVarAnalysis::kInvariant) { + return trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe || + trip->operation == HInductionVarAnalysis::kTripCountInLoopUnsafe; + } + } + return false; } InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, @@ -184,11 +236,13 @@ InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::Induct case HInductionVarAnalysis::kFetch: return GetFetch(info->fetch, trip, in_body, is_min); case HInductionVarAnalysis::kTripCountInLoop: + case HInductionVarAnalysis::kTripCountInLoopUnsafe: if (!in_body && !is_min) { // one extra! return GetVal(info->op_a, trip, in_body, is_min); } FALLTHROUGH_INTENDED; case HInductionVarAnalysis::kTripCountInBody: + case HInductionVarAnalysis::kTripCountInBodyUnsafe: if (is_min) { return Value(0); } else if (in_body) { @@ -356,25 +410,42 @@ bool InductionVarRange::GenerateCode(HInstruction* context, HBasicBlock* block, /*out*/HInstruction** lower, /*out*/HInstruction** upper, - /*out*/bool* top_test) { + /*out*/HInstruction** taken_test, + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test) { HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop if (loop != nullptr) { + // Set up loop information. HBasicBlock* header = loop->GetHeader(); bool in_body = context->GetBlock() != header; - HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction); + HInductionVarAnalysis::InductionInfo* info = + induction_analysis_->LookupInfo(loop, instruction); + if (info == nullptr) { + return false; // nothing to analyze + } HInductionVarAnalysis::InductionInfo* trip = induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); - if (info != nullptr && trip != nullptr) { - if (top_test != nullptr) { - *top_test = trip->operation != HInductionVarAnalysis::kTripCountInLoop; + // Determine what tests are needed. + *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); + *needs_taken_test = NeedsTripCount(info) && IsBodyTripCount(trip); + // Code generation for taken test: generate the code when requested or otherwise analyze + // if code generation is feasible when taken test is needed. + if (taken_test != nullptr) { + return GenerateCode( + trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false); + } else if (*needs_taken_test) { + if (!GenerateCode( + trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) { + return false; } - return + } + // Code generation for lower and upper. + return // Success on lower if invariant (not set), or code can be generated. ((info->induction_class == HInductionVarAnalysis::kInvariant) || GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) && // And success on upper. GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false); - } } return false; } @@ -387,19 +458,38 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, bool in_body, bool is_min) { if (info != nullptr) { + // Handle current operation. Primitive::Type type = Primitive::kPrimInt; HInstruction* opa = nullptr; HInstruction* opb = nullptr; - int32_t value = 0; switch (info->induction_class) { case HInductionVarAnalysis::kInvariant: // Invariants. switch (info->operation) { case HInductionVarAnalysis::kAdd: + case HInductionVarAnalysis::kLT: + case HInductionVarAnalysis::kLE: + case HInductionVarAnalysis::kGT: + case HInductionVarAnalysis::kGE: if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) && GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { if (graph != nullptr) { - *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb)); + HInstruction* operation = nullptr; + switch (info->operation) { + case HInductionVarAnalysis::kAdd: + operation = new (graph->GetArena()) HAdd(type, opa, opb); break; + case HInductionVarAnalysis::kLT: + operation = new (graph->GetArena()) HLessThan(opa, opb); break; + case HInductionVarAnalysis::kLE: + operation = new (graph->GetArena()) HLessThanOrEqual(opa, opb); break; + case HInductionVarAnalysis::kGT: + operation = new (graph->GetArena()) HGreaterThan(opa, opb); break; + case HInductionVarAnalysis::kGE: + operation = new (graph->GetArena()) HGreaterThanOrEqual(opa, opb); break; + default: + LOG(FATAL) << "unknown operation"; + } + *result = Insert(block, operation); } return true; } @@ -427,11 +517,13 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, } return true; case HInductionVarAnalysis::kTripCountInLoop: + case HInductionVarAnalysis::kTripCountInLoopUnsafe: if (!in_body && !is_min) { // one extra! return GenerateCode(info->op_a, trip, graph, block, result, in_body, is_min); } FALLTHROUGH_INTENDED; case HInductionVarAnalysis::kTripCountInBody: + case HInductionVarAnalysis::kTripCountInBodyUnsafe: if (is_min) { if (graph != nullptr) { *result = graph->GetIntConstant(0); @@ -452,23 +544,31 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, break; } break; - case HInductionVarAnalysis::kLinear: - // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only - // to avoid arithmetic wrap-around situations that are hard to guard against. - if (GetConstant(info->op_a, &value)) { - if (value == 1 || value == -1) { - const bool is_min_a = value == 1 ? is_min : !is_min; - if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && - GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { - if (graph != nullptr) { - *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb)); + case HInductionVarAnalysis::kLinear: { + // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only + // to avoid arithmetic wrap-around situations that are hard to guard against. + int32_t stride_value = 0; + if (GetConstant(info->op_a, &stride_value)) { + if (stride_value == 1 || stride_value == -1) { + const bool is_min_a = stride_value == 1 ? is_min : !is_min; + if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && + GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { + if (graph != nullptr) { + HInstruction* oper; + if (stride_value == 1) { + oper = new (graph->GetArena()) HAdd(type, opa, opb); + } else { + oper = new (graph->GetArena()) HSub(type, opb, opa); + } + *result = Insert(block, oper); + } + return true; } - return true; } } } break; - default: // TODO(ajcbik): add more cases + default: break; } } diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 7fa5a26dce..7984871b08 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -57,29 +57,33 @@ class InductionVarRange { explicit InductionVarRange(HInductionVarAnalysis* induction); /** - * Given a context denoted by the first instruction, returns a, - * possibly conservative, lower bound on the instruction's value. + * Given a context denoted by the first instruction, returns a possibly conservative + * lower and upper bound on the instruction's value in the output parameters min_val + * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test + * is needed to protect the range evaluation inside its loop. */ - Value GetMinInduction(HInstruction* context, HInstruction* instruction); + void GetInductionRange(HInstruction* context, + HInstruction* instruction, + /*out*/Value* min_val, + /*out*/Value* max_val, + /*out*/bool* needs_finite_test); /** - * Given a context denoted by the first instruction, returns a, - * possibly conservative, upper bound on the instruction's value. + * Returns true if range analysis is able to generate code for the lower and upper + * bound expressions on the instruction in the given context. The need_finite_test + * and need_taken test flags denote if an additional finite-test and/or taken-test + * are needed to protect the range evaluation inside its loop. */ - Value GetMaxInduction(HInstruction* context, HInstruction* instruction); - - /** - * Returns true if range analysis is able to generate code for the lower and upper bound - * expressions on the instruction in the given context. Output parameter top_test denotes - * whether a top test is needed to protect the trip-count expression evaluation. - */ - bool CanGenerateCode(HInstruction* context, HInstruction* instruction, /*out*/bool* top_test); + bool CanGenerateCode(HInstruction* context, + HInstruction* instruction, + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test); /** * Generates the actual code in the HIR for the lower and upper bound expressions on the * instruction in the given context. Code for the lower and upper bound expression are - * generated in given block and graph and are returned in lower and upper, respectively. - * For a loop invariant, lower is not set. + * generated in given block and graph and are returned in the output parameters lower and + * upper, respectively. For a loop invariant, lower is not set. * * For example, given expression x+i with range [0, 5] for i, calling this method * will generate the following sequence: @@ -87,20 +91,35 @@ class InductionVarRange { * block: * lower: add x, 0 * upper: add x, 5 + * + * Precondition: CanGenerateCode() returns true. */ - bool GenerateCode(HInstruction* context, - HInstruction* instruction, - HGraph* graph, - HBasicBlock* block, - /*out*/HInstruction** lower, - /*out*/HInstruction** upper); + void GenerateRangeCode(HInstruction* context, + HInstruction* instruction, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** lower, + /*out*/HInstruction** upper); + + /** + * Generates explicit taken-test for the loop in the given context. Code is generated in + * given block and graph. The taken-test is returned in parameter test. + * + * Precondition: CanGenerateCode() returns true and needs_taken_test is set. + */ + void GenerateTakenTest(HInstruction* context, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** taken_test); private: // // Private helper methods. // - Value GetInduction(HInstruction* context, HInstruction* instruction, bool is_min); + static bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info); + static bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip); + static bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip); static Value GetFetch(HInstruction* instruction, HInductionVarAnalysis::InductionInfo* trip, @@ -130,8 +149,8 @@ class InductionVarRange { static Value MergeVal(Value v1, Value v2, bool is_min); /** - * Generates code for lower/upper expression in the HIR. Returns true on success. - * With graph == nullptr, the method can be used to determine if code generation + * Generates code for lower/upper/taken-test in the HIR. Returns true on success. + * With values nullptr, the method can be used to determine if code generation * would be successful without generating actual code yet. */ bool GenerateCode(HInstruction* context, @@ -140,7 +159,9 @@ class InductionVarRange { HBasicBlock* block, /*out*/HInstruction** lower, /*out*/HInstruction** upper, - bool* top_test); + /*out*/HInstruction** taken_test, + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test); static bool GenerateCode(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip, diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index ce8926ad72..c2ba157ed8 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -46,6 +46,10 @@ class InductionVarRangeTest : public testing::Test { EXPECT_EQ(v1.is_known, v2.is_known); } + // + // Construction methods. + // + /** Constructs bare minimum graph. */ void BuildGraph() { graph_->SetNumberOfVRegs(1); @@ -58,7 +62,7 @@ class InductionVarRangeTest : public testing::Test { } /** Constructs loop with given upper bound. */ - void BuildLoop(HInstruction* upper) { + void BuildLoop(int32_t lower, HInstruction* upper, int32_t stride) { // Control flow. loop_preheader_ = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(loop_preheader_); @@ -66,29 +70,37 @@ class InductionVarRangeTest : public testing::Test { graph_->AddBlock(loop_header); HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(loop_body); + HBasicBlock* return_block = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(return_block); entry_block_->AddSuccessor(loop_preheader_); loop_preheader_->AddSuccessor(loop_header); loop_header->AddSuccessor(loop_body); - loop_header->AddSuccessor(exit_block_); + loop_header->AddSuccessor(return_block); loop_body->AddSuccessor(loop_header); + return_block->AddSuccessor(exit_block_); // Instructions. HLocal* induc = new (&allocator_) HLocal(0); entry_block_->AddInstruction(induc); loop_preheader_->AddInstruction( - new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(0))); // i = 0 + new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(lower))); // i = l loop_preheader_->AddInstruction(new (&allocator_) HGoto()); HInstruction* load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt); loop_header->AddInstruction(load); - condition_ = new (&allocator_) HLessThan(load, upper); + if (stride > 0) { + condition_ = new (&allocator_) HLessThan(load, upper); // i < u + } else { + condition_ = new (&allocator_) HGreaterThan(load, upper); // i > u + } loop_header->AddInstruction(condition_); - loop_header->AddInstruction(new (&allocator_) HIf(condition_)); // i < u + loop_header->AddInstruction(new (&allocator_) HIf(condition_)); load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt); loop_body->AddInstruction(load); - increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(1)); + increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(stride)); loop_body->AddInstruction(increment_); - loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_)); // i++ + loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_)); // i += s loop_body->AddInstruction(new (&allocator_) HGoto()); - exit_block_->AddInstruction(new (&allocator_) HReturnVoid()); + return_block->AddInstruction(new (&allocator_) HReturnVoid()); + exit_block_->AddInstruction(new (&allocator_) HExit()); } /** Performs induction variable analysis. */ @@ -124,8 +136,20 @@ class InductionVarRangeTest : public testing::Test { } /** Constructs a trip-count. */ - HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc) { - return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr); + HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc, bool in_loop, bool safe) { + if (in_loop && safe) { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr); + } else if (in_loop) { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInLoopUnsafe, CreateConst(tc), nullptr); + } else if (safe) { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInBody, CreateConst(tc), nullptr); + } else { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInBodyUnsafe, CreateConst(tc), nullptr); + } } /** Constructs a linear a * i + b induction. */ @@ -139,16 +163,34 @@ class InductionVarRangeTest : public testing::Test { HInductionVarAnalysis::kPeriodic, CreateConst(lo), CreateConst(hi)); } + /** Constructs a wrap-around induction consisting of a constant, followed info */ + HInductionVarAnalysis::InductionInfo* CreateWrapAround( + int32_t initial, + HInductionVarAnalysis::InductionInfo* info) { + return iva_->CreateInduction(HInductionVarAnalysis::kWrapAround, CreateConst(initial), info); + } + /** Constructs a wrap-around induction consisting of a constant, followed by a range. */ HInductionVarAnalysis::InductionInfo* CreateWrapAround(int32_t initial, int32_t lo, int32_t hi) { - return iva_->CreateInduction( - HInductionVarAnalysis::kWrapAround, CreateConst(initial), CreateRange(lo, hi)); + return CreateWrapAround(initial, CreateRange(lo, hi)); } // // Relay methods. // + bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) { + return InductionVarRange::NeedsTripCount(info); + } + + bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) { + return InductionVarRange::IsBodyTripCount(trip); + } + + bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) { + return InductionVarRange::IsUnsafeTripCount(trip); + } + Value GetMin(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* induc) { return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true); @@ -202,6 +244,26 @@ class InductionVarRangeTest : public testing::Test { // Tests on static methods. // +TEST_F(InductionVarRangeTest, TripCountProperties) { + EXPECT_FALSE(NeedsTripCount(nullptr)); + EXPECT_FALSE(NeedsTripCount(CreateConst(1))); + EXPECT_TRUE(NeedsTripCount(CreateLinear(1, 1))); + EXPECT_FALSE(NeedsTripCount(CreateWrapAround(1, 2, 3))); + EXPECT_TRUE(NeedsTripCount(CreateWrapAround(1, CreateLinear(1, 1)))); + + EXPECT_FALSE(IsBodyTripCount(nullptr)); + EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, true))); + EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, false))); + EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, true))); + EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, false))); + + EXPECT_FALSE(IsUnsafeTripCount(nullptr)); + EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, true, true))); + EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, true, false))); + EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, false, true))); + EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, false, false))); +} + TEST_F(InductionVarRangeTest, GetMinMaxNull) { ExpectEqual(Value(), GetMin(nullptr, nullptr)); ExpectEqual(Value(), GetMax(nullptr, nullptr)); @@ -279,10 +341,10 @@ TEST_F(InductionVarRangeTest, GetMinMaxFetch) { } TEST_F(InductionVarRangeTest, GetMinMaxLinear) { - ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100))); - ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100))); - ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100))); - ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100))); + ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100, true, true))); + ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100, true, true))); + ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100, true, true))); + ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100, true, true))); } TEST_F(InductionVarRangeTest, GetMinMaxWrapAround) { @@ -398,61 +460,98 @@ TEST_F(InductionVarRangeTest, MaxValue) { // Tests on instance methods. // -TEST_F(InductionVarRangeTest, FindRangeConstantTripCount) { - BuildLoop(graph_->GetIntConstant(1000)); +TEST_F(InductionVarRangeTest, ConstantTripCountUp) { + BuildLoop(0, graph_->GetIntConstant(1000), 1); PerformInductionVarAnalysis(); InductionVarRange range(iva_); + Value v1, v2; + bool needs_finite_test = true; + // In context of header: known. - ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0))); - ExpectEqual(Value(1000), range.GetMaxInduction(condition_, condition_->InputAt(0))); + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(1000), v2); // In context of loop-body: known. - ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(999), range.GetMaxInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_)); - ExpectEqual(Value(1000), range.GetMaxInduction(increment_, increment_)); + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(999), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(1), v1); + ExpectEqual(Value(1000), v2); } -TEST_F(InductionVarRangeTest, FindRangeSymbolicTripCount) { - HInstruction* parameter = new (&allocator_) HParameterValue( - graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); - entry_block_->AddInstruction(parameter); - BuildLoop(parameter); +TEST_F(InductionVarRangeTest, ConstantTripCountDown) { + BuildLoop(1000, graph_->GetIntConstant(0), -1); PerformInductionVarAnalysis(); InductionVarRange range(iva_); - // In context of header: full range unknown. - ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0))); - ExpectEqual(Value(), range.GetMaxInduction(condition_, condition_->InputAt(0))); + Value v1, v2; + bool needs_finite_test = true; + + // In context of header: known. + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(1000), v2); // In context of loop-body: known. - ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(parameter, 1, -1), range.GetMaxInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_)); - ExpectEqual(Value(parameter, 1, 0), range.GetMaxInduction(increment_, increment_)); + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(1), v1); + ExpectEqual(Value(1000), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(999), v2); } -TEST_F(InductionVarRangeTest, CodeGeneration) { +TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { HInstruction* parameter = new (&allocator_) HParameterValue( graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); entry_block_->AddInstruction(parameter); - BuildLoop(parameter); + BuildLoop(0, parameter, 1); PerformInductionVarAnalysis(); InductionVarRange range(iva_); + Value v1, v2; + bool needs_finite_test = true; + bool needs_taken_test = true; + + // In context of header: upper unknown. + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(), v2); + + // In context of loop-body: known. + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(parameter, 1, -1), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(1), v1); + ExpectEqual(Value(parameter, 1, 0), v2); + HInstruction* lower = nullptr; HInstruction* upper = nullptr; - bool top_test = false; + HInstruction* taken = nullptr; // Can generate code in context of loop-body only. - EXPECT_FALSE(range.CanGenerateCode(condition_, condition_->InputAt(0), &top_test)); - ASSERT_TRUE(range.CanGenerateCode(increment_, condition_->InputAt(0), &top_test)); - EXPECT_TRUE(top_test); + EXPECT_FALSE(range.CanGenerateCode( + condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + ASSERT_TRUE(range.CanGenerateCode( + increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + EXPECT_FALSE(needs_finite_test); + EXPECT_TRUE(needs_taken_test); // Generates code. - EXPECT_TRUE(range.GenerateCode( - increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper)); + range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); // Verify lower is 0+0. ASSERT_TRUE(lower != nullptr); @@ -462,7 +561,7 @@ TEST_F(InductionVarRangeTest, CodeGeneration) { ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue()); - // Verify upper is (V-1)+0 + // Verify upper is (V-1)+0. ASSERT_TRUE(upper != nullptr); ASSERT_TRUE(upper->IsAdd()); ASSERT_TRUE(upper->InputAt(0)->IsSub()); @@ -471,6 +570,91 @@ TEST_F(InductionVarRangeTest, CodeGeneration) { EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue()); ASSERT_TRUE(upper->InputAt(1)->IsIntConstant()); EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); + + // Verify taken-test is 0<V. + range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); + ASSERT_TRUE(taken != nullptr); + ASSERT_TRUE(taken->IsLessThan()); + ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); + EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue()); + EXPECT_TRUE(taken->InputAt(1)->IsParameterValue()); +} + +TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { + HInstruction* parameter = new (&allocator_) HParameterValue( + graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); + entry_block_->AddInstruction(parameter); + BuildLoop(1000, parameter, -1); + PerformInductionVarAnalysis(); + InductionVarRange range(iva_); + + Value v1, v2; + bool needs_finite_test = true; + bool needs_taken_test = true; + + // In context of header: lower unknown. + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(), v1); + ExpectEqual(Value(1000), v2); + + // In context of loop-body: known. + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(parameter, 1, 1), v1); + ExpectEqual(Value(1000), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(parameter, 1, 0), v1); + ExpectEqual(Value(999), v2); + + HInstruction* lower = nullptr; + HInstruction* upper = nullptr; + HInstruction* taken = nullptr; + + // Can generate code in context of loop-body only. + EXPECT_FALSE(range.CanGenerateCode( + condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + ASSERT_TRUE(range.CanGenerateCode( + increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + EXPECT_FALSE(needs_finite_test); + EXPECT_TRUE(needs_taken_test); + + // Generates code. + range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); + + // Verify lower is 1000-(-(V-1000)-1). + ASSERT_TRUE(lower != nullptr); + ASSERT_TRUE(lower->IsSub()); + ASSERT_TRUE(lower->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue()); + lower = lower->InputAt(1); + ASSERT_TRUE(lower->IsSub()); + ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); + EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue()); + lower = lower->InputAt(0); + ASSERT_TRUE(lower->IsNeg()); + lower = lower->InputAt(0); + ASSERT_TRUE(lower->IsSub()); + EXPECT_TRUE(lower->InputAt(0)->IsParameterValue()); + ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); + EXPECT_EQ(1000, lower->InputAt(1)->AsIntConstant()->GetValue()); + + // Verify upper is 1000-0. + ASSERT_TRUE(upper != nullptr); + ASSERT_TRUE(upper->IsSub()); + ASSERT_TRUE(upper->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue()); + ASSERT_TRUE(upper->InputAt(1)->IsIntConstant()); + EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); + + // Verify taken-test is 1000>V. + range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); + ASSERT_TRUE(taken != nullptr); + ASSERT_TRUE(taken->IsGreaterThan()); + ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue()); + EXPECT_TRUE(taken->InputAt(1)->IsParameterValue()); } } // namespace art diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index dbe75249be..b01324ec3b 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -89,10 +89,7 @@ static Primitive::Type GetType(uint64_t data, bool is_op_size) { } } -static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) { - if (instruction_set == kMips) { - return Intrinsics::kNone; - } +static Intrinsics GetIntrinsic(InlineMethod method) { switch (method.opcode) { // Floating-point conversions. case kIntrinsicDoubleCvt: @@ -431,7 +428,7 @@ void IntrinsicsRecognizer::Run() { DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(&dex_file); DCHECK(inliner != nullptr); if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) { - Intrinsics intrinsic = GetIntrinsic(method, graph_->GetInstructionSet()); + Intrinsics intrinsic = GetIntrinsic(method); if (intrinsic != Intrinsics::kNone) { if (!CheckInvokeType(intrinsic, invoke, dex_file)) { diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 5efcf4eadf..a94e3a8c23 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -138,6 +138,108 @@ bool IntrinsicLocationsBuilderMIPS::TryDispatch(HInvoke* invoke) { #define __ assembler-> +// boolean java.lang.String.equals(Object anObject) +void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + + // Temporary registers to store lengths of strings and for calculations. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) { + MipsAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register str = locations->InAt(0).AsRegister<Register>(); + Register arg = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + Register temp1 = locations->GetTemp(0).AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + + MipsLabel loop; + MipsLabel end; + MipsLabel return_true; + MipsLabel return_false; + + // Get offsets of count, value, and class fields within a string object. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // If the register containing the pointer to "this", and the register + // containing the pointer to "anObject" are the same register then + // "this", and "anObject" are the same object and we can + // short-circuit the logic to a true result. + if (str == arg) { + __ LoadConst32(out, 1); + return; + } + + // Check if input is null, return false if it is. + __ Beqz(arg, &return_false); + + // Reference equality check, return true if same reference. + __ Beq(str, arg, &return_true); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Lw(temp1, str, class_offset); + __ Lw(temp2, arg, class_offset); + __ Bne(temp1, temp2, &return_false); + + // Load lengths of this and argument strings. + __ Lw(temp1, str, count_offset); + __ Lw(temp2, arg, count_offset); + // Check if lengths are equal, return false if they're not. + __ Bne(temp1, temp2, &return_false); + // Return true if both strings are empty. + __ Beqz(temp1, &return_true); + + // Don't overwrite input registers + __ Move(TMP, str); + __ Move(temp3, arg); + + // Assertions that must hold in order to compare strings 2 characters at a time. + DCHECK_ALIGNED(value_offset, 4); + static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); + + // Loop to compare strings 2 characters at a time starting at the beginning of the string. + // Ok to do this because strings are zero-padded. + __ Bind(&loop); + __ Lw(out, TMP, value_offset); + __ Lw(temp2, temp3, value_offset); + __ Bne(out, temp2, &return_false); + __ Addiu(TMP, TMP, 4); + __ Addiu(temp3, temp3, 4); + __ Addiu(temp1, temp1, -2); + __ Bgtz(temp1, &loop); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ LoadConst32(out, 1); + __ B(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ LoadConst32(out, 0); + __ Bind(&end); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -204,7 +306,6 @@ UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) UNIMPLEMENTED_INTRINSIC(StringCharAt) UNIMPLEMENTED_INTRINSIC(StringCompareTo) -UNIMPLEMENTED_INTRINSIC(StringEquals) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 05c7eb02d9..ff843ebb1e 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -101,11 +101,10 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0)); - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); } else { - UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; - UNREACHABLE(); + codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0)); } + codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 14c65c9aaf..a29f3ef1d1 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1605,7 +1605,7 @@ static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1))); } static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index 47457dec7d..2bb769a430 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -42,12 +42,14 @@ class LICMTest : public testing::Test { loop_preheader_ = new (&allocator_) HBasicBlock(graph_); loop_header_ = new (&allocator_) HBasicBlock(graph_); loop_body_ = new (&allocator_) HBasicBlock(graph_); + return_ = new (&allocator_) HBasicBlock(graph_); exit_ = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(entry_); graph_->AddBlock(loop_preheader_); graph_->AddBlock(loop_header_); graph_->AddBlock(loop_body_); + graph_->AddBlock(return_); graph_->AddBlock(exit_); graph_->SetEntryBlock(entry_); @@ -57,8 +59,9 @@ class LICMTest : public testing::Test { entry_->AddSuccessor(loop_preheader_); loop_preheader_->AddSuccessor(loop_header_); loop_header_->AddSuccessor(loop_body_); - loop_header_->AddSuccessor(exit_); + loop_header_->AddSuccessor(return_); loop_body_->AddSuccessor(loop_header_); + return_->AddSuccessor(exit_); // Provide boiler-plate instructions. parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot); @@ -89,6 +92,7 @@ class LICMTest : public testing::Test { HBasicBlock* loop_preheader_; HBasicBlock* loop_header_; HBasicBlock* loop_body_; + HBasicBlock* return_; HBasicBlock* exit_; HInstruction* parameter_; // "this" diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index ebdf7a2f65..1ab206f69e 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -17,6 +17,7 @@ #include "locations.h" #include "nodes.h" +#include "code_generator.h" namespace art { @@ -47,18 +48,26 @@ Location Location::RegisterOrConstant(HInstruction* instruction) { : Location::RequiresRegister(); } -Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) { - if (instruction->IsIntConstant() || instruction->IsNullConstant()) { - return Location::ConstantLocation(instruction->AsConstant()); - } else if (instruction->IsLongConstant()) { - // Does the long constant fit in a 32 bit int? - int64_t value = instruction->AsLongConstant()->GetValue(); - return IsInt<32>(value) - ? Location::ConstantLocation(instruction->AsConstant()) - : Location::RequiresRegister(); - } else { - return Location::RequiresRegister(); +Location Location::RegisterOrInt32Constant(HInstruction* instruction) { + HConstant* constant = instruction->AsConstant(); + if (constant != nullptr) { + int64_t value = CodeGenerator::GetInt64ValueOf(constant); + if (IsInt<32>(value)) { + return Location::ConstantLocation(constant); + } } + return Location::RequiresRegister(); +} + +Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) { + HConstant* constant = instruction->AsConstant(); + if (constant != nullptr) { + int64_t value = CodeGenerator::GetInt64ValueOf(constant); + if (IsInt<32>(value)) { + return Location::ConstantLocation(constant); + } + } + return Location::RequiresFpuRegister(); } Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { @@ -67,6 +76,12 @@ Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { : Location::RegisterLocation(reg); } +Location Location::FpuRegisterOrConstant(HInstruction* instruction) { + return instruction->IsConstant() + ? Location::ConstantLocation(instruction->AsConstant()) + : Location::RequiresFpuRegister(); +} + std::ostream& operator<<(std::ostream& os, const Location& location) { os << location.DebugString(); if (location.IsRegister() || location.IsFpuRegister()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index d014379bca..1181007666 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -354,8 +354,10 @@ class Location : public ValueObject { } static Location RegisterOrConstant(HInstruction* instruction); - static Location RegisterOrInt32LongConstant(HInstruction* instruction); + static Location RegisterOrInt32Constant(HInstruction* instruction); static Location ByteRegisterOrConstant(int reg, HInstruction* instruction); + static Location FpuRegisterOrConstant(HInstruction* instruction); + static Location FpuRegisterOrInt32Constant(HInstruction* instruction); // The location of the first input to the instruction will be // used to replace this unallocated location. diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 68fb0acf7f..de3f2668b6 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -17,6 +17,7 @@ #include "nodes.h" #include "code_generator.h" +#include "common_dominator.h" #include "ssa_builder.h" #include "base/bit_vector-inl.h" #include "base/bit_utils.h" @@ -179,7 +180,10 @@ void HGraph::ComputeDominanceInformation() { if (successor->GetDominator() == nullptr) { successor->SetDominator(current); } else { - successor->SetDominator(FindCommonDominator(successor->GetDominator(), current)); + // The CommonDominator can work for multiple blocks as long as the + // domination information doesn't change. However, since we're changing + // that information here, we can use the finder only for pairs of blocks. + successor->SetDominator(CommonDominator::ForPair(successor->GetDominator(), current)); } // Once all the forward edges have been visited, we know the immediate @@ -194,24 +198,6 @@ void HGraph::ComputeDominanceInformation() { } } -HBasicBlock* HGraph::FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const { - ArenaBitVector visited(arena_, blocks_.size(), false); - // Walk the dominator tree of the first block and mark the visited blocks. - while (first != nullptr) { - visited.SetBit(first->GetBlockId()); - first = first->GetDominator(); - } - // Walk the dominator tree of the second block until a marked block is found. - while (second != nullptr) { - if (visited.IsBitSet(second->GetBlockId())) { - return second; - } - second = second->GetDominator(); - } - LOG(ERROR) << "Could not find common dominator"; - return nullptr; -} - void HGraph::TransformToSsa() { DCHECK(!reverse_post_order_.empty()); SsaBuilder ssa_builder(this); @@ -335,14 +321,24 @@ void HGraph::SimplifyCatchBlocks() { // instructions into `normal_block` and links the two blocks with a Goto. // Afterwards, incoming normal-flow edges are re-linked to `normal_block`, // leaving `catch_block` with the exceptional edges only. + // // Note that catch blocks with normal-flow predecessors cannot begin with - // a MOVE_EXCEPTION instruction, as guaranteed by the verifier. - DCHECK(!catch_block->GetFirstInstruction()->IsLoadException()); - HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction()); - for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) { - if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) { - catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block); - --j; + // a move-exception instruction, as guaranteed by the verifier. However, + // trivially dead predecessors are ignored by the verifier and such code + // has not been removed at this stage. We therefore ignore the assumption + // and rely on GraphChecker to enforce it after initial DCE is run (b/25492628). + HBasicBlock* normal_block = catch_block->SplitCatchBlockAfterMoveException(); + if (normal_block == nullptr) { + // Catch block is either empty or only contains a move-exception. It must + // therefore be dead and will be removed during initial DCE. Do nothing. + DCHECK(!catch_block->EndsWithControlFlowInstruction()); + } else { + // Catch block was split. Re-link normal-flow edges to the new block. + for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) { + if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) { + catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block); + --j; + } } } } @@ -373,19 +369,27 @@ void HGraph::ComputeTryBlockInformation() { } void HGraph::SimplifyCFG() { - // Simplify the CFG for future analysis, and code generation: +// Simplify the CFG for future analysis, and code generation: // (1): Split critical edges. - // (2): Simplify loops by having only one back edge, and one preheader. + // (2): Simplify loops by having only one preheader. // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators // can be invalidated. We remember the initial size to avoid iterating over the new blocks. for (size_t block_id = 0u, end = blocks_.size(); block_id != end; ++block_id) { HBasicBlock* block = blocks_[block_id]; if (block == nullptr) continue; - if (block->NumberOfNormalSuccessors() > 1) { - for (size_t j = 0; j < block->GetSuccessors().size(); ++j) { + if (block->GetSuccessors().size() > 1) { + // Only split normal-flow edges. We cannot split exceptional edges as they + // are synthesized (approximate real control flow), and we do not need to + // anyway. Moves that would be inserted there are performed by the runtime. + for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) { HBasicBlock* successor = block->GetSuccessors()[j]; DCHECK(!successor->IsCatchBlock()); - if (successor->GetPredecessors().size() > 1) { + if (successor == exit_block_) { + // Throw->TryBoundary->Exit. Special case which we do not want to split + // because Goto->Exit is not allowed. + DCHECK(block->IsSingleTryBoundary()); + DCHECK(block->GetSinglePredecessor()->GetLastInstruction()->IsThrow()); + } else if (successor->GetPredecessors().size() > 1) { SplitCriticalEdge(block, successor); --j; } @@ -1163,7 +1167,7 @@ void HInstruction::MoveBefore(HInstruction* cursor) { } HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { - DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented"; + DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; DCHECK_EQ(cursor->GetBlock(), this); HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), @@ -1193,7 +1197,7 @@ HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { } HBasicBlock* HBasicBlock::CreateImmediateDominator() { - DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented"; + DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented."; HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc()); @@ -1209,6 +1213,34 @@ HBasicBlock* HBasicBlock::CreateImmediateDominator() { return new_block; } +HBasicBlock* HBasicBlock::SplitCatchBlockAfterMoveException() { + DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; + DCHECK(IsCatchBlock()) << "This method is intended for catch blocks only."; + + HInstruction* first_insn = GetFirstInstruction(); + HInstruction* split_before = nullptr; + + if (first_insn != nullptr && first_insn->IsLoadException()) { + // Catch block starts with a LoadException. Split the block after + // the StoreLocal and ClearException which must come after the load. + DCHECK(first_insn->GetNext()->IsStoreLocal()); + DCHECK(first_insn->GetNext()->GetNext()->IsClearException()); + split_before = first_insn->GetNext()->GetNext()->GetNext(); + } else { + // Catch block does not load the exception. Split at the beginning + // to create an empty catch block. + split_before = first_insn; + } + + if (split_before == nullptr) { + // Catch block has no instructions after the split point (must be dead). + // Do not split it but rather signal error by returning nullptr. + return nullptr; + } else { + return SplitBefore(split_before); + } +} + HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) { DCHECK(!cursor->IsControlFlow()); DCHECK_NE(instructions_.last_instruction_, cursor); @@ -1940,6 +1972,16 @@ bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const { return !opt.GetDoesNotNeedDexCache(); } +void HInvokeStaticOrDirect::RemoveInputAt(size_t index) { + RemoveAsUserOfInput(index); + inputs_.erase(inputs_.begin() + index); + // Update indexes in use nodes of inputs that have been pulled forward by the erase(). + for (size_t i = index, e = InputCount(); i < e; ++i) { + DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i + 1u); + InputRecordAt(i).GetUseNode()->SetIndex(i); + } +} + void HInstruction::RemoveEnvironmentUsers() { for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) { HUseListNode<HEnvironment*>* user_node = use_it.Current(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 0f2c1cffee..ab53e63300 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -350,8 +350,6 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { HCurrentMethod* GetCurrentMethod(); - HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const; - const DexFile& GetDexFile() const { return dex_file_; } @@ -837,6 +835,15 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // blocks are consistent (for example ending with a control flow instruction). HBasicBlock* SplitAfter(HInstruction* cursor); + // Split catch block into two blocks after the original move-exception bytecode + // instruction, or at the beginning if not present. Returns the newly created, + // latter block, or nullptr if such block could not be created (must be dead + // in that case). Note that this method just updates raw block information, + // like predecessors, successors, dominators, and instruction list. It does not + // update the graph, reverse post order, loop information, nor make sure the + // blocks are consistent (for example ending with a control flow instruction). + HBasicBlock* SplitCatchBlockAfterMoveException(); + // Merge `other` at the end of `this`. Successors and dominated blocks of // `other` are changed to be successors and dominated blocks of `this`. Note // that this method does not update the graph, reverse post order, loop @@ -3399,11 +3406,12 @@ class HInvokeStaticOrDirect : public HInvoke { ClinitCheckRequirement clinit_check_requirement) : HInvoke(arena, number_of_arguments, - // There is one extra argument for the HCurrentMethod node, and - // potentially one other if the clinit check is explicit, and one other - // if the method is a string factory. - 1u + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) - + (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u), + // There is potentially one extra argument for the HCurrentMethod node, and + // potentially one other if the clinit check is explicit, and potentially + // one other if the method is a string factory. + (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) + + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) + + (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u), return_type, dex_pc, method_index, @@ -3411,12 +3419,25 @@ class HInvokeStaticOrDirect : public HInvoke { invoke_type_(invoke_type), clinit_check_requirement_(clinit_check_requirement), target_method_(target_method), - dispatch_info_(dispatch_info) {} + dispatch_info_(dispatch_info) { } void SetDispatchInfo(const DispatchInfo& dispatch_info) { + bool had_current_method_input = HasCurrentMethodInput(); + bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info.method_load_kind); + + // Using the current method is the default and once we find a better + // method load kind, we should not go back to using the current method. + DCHECK(had_current_method_input || !needs_current_method_input); + + if (had_current_method_input && !needs_current_method_input) { + DCHECK_EQ(InputAt(GetCurrentMethodInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod()); + RemoveInputAt(GetCurrentMethodInputIndex()); + } dispatch_info_ = dispatch_info; } + void RemoveInputAt(size_t index); + bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { // We access the method via the dex cache so we can't do an implicit null check. // TODO: for intrinsics we can generate implicit null checks. @@ -3438,6 +3459,17 @@ class HInvokeStaticOrDirect : public HInvoke { bool HasPcRelDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; } + bool HasCurrentMethodInput() const { + // This function can be called only after the invoke has been fully initialized by the builder. + if (NeedsCurrentMethodInput(GetMethodLoadKind())) { + DCHECK(InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod()); + return true; + } else { + DCHECK(InputCount() == GetCurrentMethodInputIndex() || + !InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod()); + return false; + } + } bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; } MethodReference GetTargetMethod() const { return target_method_; } @@ -3486,8 +3518,8 @@ class HInvokeStaticOrDirect : public HInvoke { bool IsStringFactoryFor(HFakeString* str) const { if (!IsStringInit()) return false; - // +1 for the current method. - if (InputCount() == (number_of_arguments_ + 1)) return false; + DCHECK(!HasCurrentMethodInput()); + if (InputCount() == (number_of_arguments_)) return false; return InputAt(InputCount() - 1)->AsFakeString() == str; } @@ -3513,6 +3545,11 @@ class HInvokeStaticOrDirect : public HInvoke { return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kImplicit); } + // Does this method load kind need the current method as an input? + static bool NeedsCurrentMethodInput(MethodLoadKind kind) { + return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kDexCacheViaMethod; + } + DECLARE_INSTRUCTION(InvokeStaticOrDirect); protected: diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 8cb2cfc816..7e3c5e602e 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -56,6 +56,7 @@ #include "inliner.h" #include "instruction_simplifier.h" #include "intrinsics.h" +#include "jit/jit_code_cache.h" #include "licm.h" #include "jni/quick/jni_compiler.h" #include "load_store_elimination.h" @@ -258,15 +259,6 @@ class OptimizingCompiler FINAL : public Compiler { const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const OVERRIDE; - CompiledMethod* TryCompile(const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - jobject class_loader, - const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const; - CompiledMethod* JniCompile(uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file) const OVERRIDE { @@ -291,23 +283,45 @@ class OptimizingCompiler FINAL : public Compiler { } } + bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method) + OVERRIDE + SHARED_REQUIRES(Locks::mutator_lock_); + private: // Whether we should run any optimization or register allocation. If false, will // just run the code generation after the graph was built. const bool run_optimizations_; - // Optimize and compile `graph`. - CompiledMethod* CompileOptimized(HGraph* graph, - CodeGenerator* codegen, - CompilerDriver* driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const; - - // Just compile without doing optimizations. - CompiledMethod* CompileBaseline(CodeGenerator* codegen, - CompilerDriver* driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const; + // Create a 'CompiledMethod' for an optimized graph. + CompiledMethod* EmitOptimized(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* driver) const; + + // Create a 'CompiledMethod' for a non-optimized graph. + CompiledMethod* EmitBaseline(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* driver) const; + + // Try compiling a method and return the code generator used for + // compiling it. + // This method: + // 1) Builds the graph. Returns null if it failed to build it. + // 2) If `run_optimizations_` is set: + // 2.1) Transform the graph to SSA. Returns null if it failed. + // 2.2) Run optimizations on the graph, including register allocator. + // 3) Generate code with the `code_allocator` provided. + CodeGenerator* TryCompile(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file, + Handle<mirror::DexCache> dex_cache) const; std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -446,13 +460,32 @@ static void RunArchOptimizations(InstructionSet instruction_set, } } +NO_INLINE // Avoid increasing caller's frame size by large stack-allocated objects. +static void AllocateRegisters(HGraph* graph, + CodeGenerator* codegen, + PassObserver* pass_observer) { + PrepareForRegisterAllocation(graph).Run(); + SsaLivenessAnalysis liveness(graph, codegen); + { + PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer); + liveness.Analyze(); + } + { + PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer); + RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters(); + } +} + static void RunOptimizations(HGraph* graph, CodeGenerator* codegen, CompilerDriver* driver, OptimizingCompilerStats* stats, const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer, - StackHandleScopeCollection* handles) { + PassObserver* pass_observer) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScopeCollection handles(soa.Self()); + ScopedThreadSuspension sts(soa.Self(), kNative); + ArenaAllocator* arena = graph->GetArena(); HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); @@ -469,7 +502,7 @@ static void RunOptimizations(HGraph* graph, HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction); ReferenceTypePropagation* type_propagation = - new (arena) ReferenceTypePropagation(graph, handles); + new (arena) ReferenceTypePropagation(graph, &handles); HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_after_types"); @@ -492,7 +525,7 @@ static void RunOptimizations(HGraph* graph, RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer); - MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles); + MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles); // TODO: Update passes incompatible with try/catch so we have the same // pipeline for all methods. @@ -532,6 +565,7 @@ static void RunOptimizations(HGraph* graph, } RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer); + AllocateRegisters(graph, codegen, pass_observer); } // The stack map we generate must be 4-byte aligned on ARM. Since existing @@ -545,22 +579,6 @@ static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) { return ArrayRef<const uint8_t>(vector); } -NO_INLINE // Avoid increasing caller's frame size by large stack-allocated objects. -static void AllocateRegisters(HGraph* graph, - CodeGenerator* codegen, - PassObserver* pass_observer) { - PrepareForRegisterAllocation(graph).Run(); - SsaLivenessAnalysis liveness(graph, codegen); - { - PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer); - liveness.Analyze(); - } - { - PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer); - RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters(); - } -} - static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) { ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter()); codegen->EmitLinkerPatches(&linker_patches); @@ -574,74 +592,42 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) return linker_patches; } -CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); - soa.Self()->TransitionFromRunnableToSuspended(kNative); - RunOptimizations(graph, - codegen, - compiler_driver, - compilation_stats_.get(), - dex_compilation_unit, - pass_observer, - &handles); - - AllocateRegisters(graph, codegen, pass_observer); - - ArenaAllocator* arena = graph->GetArena(); - CodeVectorAllocator allocator(arena); - DefaultSrcMap src_mapping_table; - codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo() - ? &src_mapping_table - : nullptr); - codegen->CompileOptimized(&allocator); - +CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) const { ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); - ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps)); - codegen->BuildStackMaps(&stack_map); + stack_map.resize(codegen->ComputeStackMapsSize()); + codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size())); MaybeRecordStat(MethodCompilationStat::kCompiledOptimized); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(allocator.GetMemory()), + ArrayRef<const uint8_t>(code_allocator->GetMemory()), // Follow Quick's behavior and set the frame size to zero if it is // considered "empty" (see the definition of // art::CodeGenerator::HasEmptyFrame). codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(src_mapping_table), + ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()), ArrayRef<const uint8_t>(), // mapping_table. ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(), // native_gc_map. ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), ArrayRef<const LinkerPatch>(linker_patches)); - pass_observer->DumpDisassembly(); - soa.Self()->TransitionFromSuspendedToRunnable(); return compiled_method; } -CompiledMethod* OptimizingCompiler::CompileBaseline( +CompiledMethod* OptimizingCompiler::EmitBaseline( + ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const { - ArenaAllocator* arena = codegen->GetGraph()->GetArena(); - CodeVectorAllocator allocator(arena); - DefaultSrcMap src_mapping_table; - codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo() - ? &src_mapping_table - : nullptr); - codegen->CompileBaseline(&allocator); - + CompilerDriver* compiler_driver) const { ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps)); @@ -649,37 +635,38 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps)); codegen->BuildVMapTable(&vmap_table); ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps)); - codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); + codegen->BuildNativeGCMap(&gc_map, *compiler_driver); MaybeRecordStat(MethodCompilationStat::kCompiledBaseline); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(allocator.GetMemory()), + ArrayRef<const uint8_t>(code_allocator->GetMemory()), // Follow Quick's behavior and set the frame size to zero if it is // considered "empty" (see the definition of // art::CodeGenerator::HasEmptyFrame). codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(src_mapping_table), + ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()), AlignVectorSize(mapping_table), AlignVectorSize(vmap_table), AlignVectorSize(gc_map), ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), ArrayRef<const LinkerPatch>(linker_patches)); - pass_observer->DumpDisassembly(); return compiled_method; } -CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - jobject class_loader, - const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const { +CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file, + Handle<mirror::DexCache> dex_cache) const { std::string method_name = PrettyMethod(method_idx, dex_file); MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); @@ -721,13 +708,10 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite && compiler_driver->RequiresConstructorBarrier(Thread::Current(), dex_compilation_unit.GetDexFile(), dex_compilation_unit.GetClassDefIndex()); - ArenaAllocator arena(Runtime::Current()->GetArenaPool()); - HGraph* graph = new (&arena) HGraph( - &arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(), + HGraph* graph = new (arena) HGraph( + arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(), kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable()); - bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_; - std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, instruction_set, @@ -779,16 +763,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite } } - bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set); - - // `run_optimizations_` is set explicitly (either through a compiler filter - // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back - // to Quick. - bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit(); - CompiledMethod* compiled_method = nullptr; - if (run_optimizations_ && can_allocate_registers) { - VLOG(compiler) << "Optimizing " << method_name; - + VLOG(compiler) << "Optimizing " << method_name; + if (run_optimizations_) { { PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); if (!graph->TryBuildingSsa()) { @@ -800,37 +776,26 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite } } - compiled_method = CompileOptimized(graph, - codegen.get(), - compiler_driver, - dex_compilation_unit, - &pass_observer); - } else if (shouldOptimize && can_allocate_registers) { - LOG(FATAL) << "Could not allocate registers in optimizing compiler"; - UNREACHABLE(); - } else if (can_use_baseline) { - VLOG(compiler) << "Compile baseline " << method_name; - - if (!run_optimizations_) { - MaybeRecordStat(MethodCompilationStat::kNotOptimizedDisabled); - } else if (!can_allocate_registers) { - MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator); - } - - compiled_method = CompileBaseline(codegen.get(), - compiler_driver, - dex_compilation_unit, - &pass_observer); + RunOptimizations(graph, + codegen.get(), + compiler_driver, + compilation_stats_.get(), + dex_compilation_unit, + &pass_observer); + codegen->CompileOptimized(code_allocator); + } else { + codegen->CompileBaseline(code_allocator); } + pass_observer.DumpDisassembly(); if (kArenaAllocatorCountAllocations) { - if (arena.BytesAllocated() > 4 * MB) { - MemStats mem_stats(arena.GetMemStats()); + if (arena->BytesAllocated() > 4 * MB) { + MemStats mem_stats(arena->GetMemStats()); LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats); } } - return compiled_method; + return codegen.release(); } static bool CanHandleVerificationFailure(const VerifiedMethod* verified_method) { @@ -852,26 +817,37 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, Handle<mirror::DexCache> dex_cache) const { CompilerDriver* compiler_driver = GetCompilerDriver(); CompiledMethod* method = nullptr; - if (Runtime::Current()->IsAotCompiler()) { - const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); - DCHECK(!verified_method->HasRuntimeThrow()); - if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) - || CanHandleVerificationFailure(verified_method)) { - method = TryCompile(code_item, access_flags, invoke_type, class_def_idx, - method_idx, jclass_loader, dex_file, dex_cache); - } else { - if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); + DCHECK(Runtime::Current()->IsAotCompiler()); + const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); + DCHECK(!verified_method->HasRuntimeThrow()); + if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) + || CanHandleVerificationFailure(verified_method)) { + ArenaAllocator arena(Runtime::Current()->GetArenaPool()); + CodeVectorAllocator code_allocator(&arena); + std::unique_ptr<CodeGenerator> codegen( + TryCompile(&arena, + &code_allocator, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + jclass_loader, + dex_file, + dex_cache)); + if (codegen.get() != nullptr) { + if (run_optimizations_) { + method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver); } else { - MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified); + method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver); } } } else { - // This is for the JIT compiler, which has already ensured the class is verified. - // We can go straight to compiling. - DCHECK(Runtime::Current()->UseJit()); - method = TryCompile(code_item, access_flags, invoke_type, class_def_idx, - method_idx, jclass_loader, dex_file, dex_cache); + if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { + MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); + } else { + MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified); + } } if (kIsDebugBuild && @@ -896,4 +872,70 @@ bool IsCompilingWithCoreImage() { return EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art"); } +bool OptimizingCompiler::JitCompile(Thread* self, + jit::JitCodeCache* code_cache, + ArtMethod* method) { + StackHandleScope<2> hs(self); + Handle<mirror::ClassLoader> class_loader(hs.NewHandle( + method->GetDeclaringClass()->GetClassLoader())); + Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache())); + + jobject jclass_loader = class_loader.ToJObject(); + const DexFile* dex_file = method->GetDexFile(); + const uint16_t class_def_idx = method->GetClassDefIndex(); + const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); + const uint32_t method_idx = method->GetDexMethodIndex(); + const uint32_t access_flags = method->GetAccessFlags(); + const InvokeType invoke_type = method->GetInvokeType(); + + ArenaAllocator arena(Runtime::Current()->GetArenaPool()); + CodeVectorAllocator code_allocator(&arena); + std::unique_ptr<CodeGenerator> codegen; + { + // Go to native so that we don't block GC during compilation. + ScopedThreadSuspension sts(self, kNative); + + DCHECK(run_optimizations_); + codegen.reset( + TryCompile(&arena, + &code_allocator, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + jclass_loader, + *dex_file, + dex_cache)); + if (codegen.get() == nullptr) { + return false; + } + } + + size_t stack_map_size = codegen->ComputeStackMapsSize(); + uint8_t* stack_map_data = code_cache->ReserveData(self, stack_map_size); + if (stack_map_data == nullptr) { + return false; + } + codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size)); + const void* code = code_cache->CommitCode( + self, + method, + nullptr, + stack_map_data, + nullptr, + codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + codegen->GetFpuSpillMask(), + code_allocator.GetMemory().data(), + code_allocator.GetSize()); + + if (code == nullptr) { + code_cache->ClearData(self, stack_map_data); + return false; + } + + return true; +} + } // namespace art |