diff options
Diffstat (limited to 'compiler')
43 files changed, 1728 insertions, 723 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index d57f301ff9..b444fffd56 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -391,6 +391,7 @@ art_cc_test { mips64: { srcs: [ "linker/mips64/relative_patcher_mips64_test.cc", + "utils/mips64/managed_register_mips64_test.cc", ], }, x86: { diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index 808e28c9ea..538fe93793 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -70,10 +70,6 @@ class DexCompiler { return *unit_.GetDexFile(); } - bool PerformOptimizations() const { - return dex_to_dex_compilation_level_ >= DexToDexCompilationLevel::kOptimize; - } - // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where // a barrier is required. void CompileReturnVoid(Instruction* inst, uint32_t dex_pc); @@ -114,7 +110,7 @@ class DexCompiler { }; void DexCompiler::Compile() { - DCHECK_GE(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kRequired); + DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize); const DexFile::CodeItem* code_item = unit_.GetCodeItem(); const uint16_t* insns = code_item->insns_; const uint32_t insns_size = code_item->insns_size_in_code_units_; @@ -221,7 +217,7 @@ void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) { } Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) { - if (!kEnableCheckCastEllision || !PerformOptimizations()) { + if (!kEnableCheckCastEllision) { return inst; } if (!driver_.IsSafeCast(&unit_, dex_pc)) { @@ -254,7 +250,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc, Instruction::Code new_opcode, bool is_put) { - if (!kEnableQuickening || !PerformOptimizations()) { + if (!kEnableQuickening) { return; } uint32_t field_idx = inst->VRegC_22c(); @@ -279,7 +275,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, Instruction::Code new_opcode, bool is_range) { - if (!kEnableQuickening || !PerformOptimizations()) { + if (!kEnableQuickening) { return; } uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c(); diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h index 00c596d60e..87ddb395ad 100644 --- a/compiler/dex/dex_to_dex_compiler.h +++ b/compiler/dex/dex_to_dex_compiler.h @@ -34,8 +34,7 @@ namespace optimizer { enum class DexToDexCompilationLevel { kDontDexToDexCompile, // Only meaning wrt image time interpretation. - kRequired, // Dex-to-dex compilation required for correctness. - kOptimize // Perform required transformation and peep-hole optimizations. + kOptimize // Perform peep-hole optimizations. }; std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs); diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 995098799c..e823f67d3c 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -532,16 +532,13 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( if (driver.GetCompilerOptions().GetDebuggable()) { // We are debuggable so definitions of classes might be changed. We don't want to do any // optimizations that could break that. - max_level = optimizer::DexToDexCompilationLevel::kRequired; + max_level = optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; } if (klass->IsVerified()) { // Class is verified so we can enable DEX-to-DEX compilation for performance. return max_level; - } else if (klass->ShouldVerifyAtRuntime()) { - // Class verification has soft-failed. Anyway, ensure at least correctness. - return optimizer::DexToDexCompilationLevel::kRequired; } else { - // Class verification has failed: do not run DEX-to-DEX compilation. + // Class verification has failed: do not run DEX-to-DEX optimizations. return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; } } @@ -611,7 +608,7 @@ static void CompileMethod(Thread* self, dex_file, (verified_method != nullptr) ? dex_to_dex_compilation_level - : optimizer::DexToDexCompilationLevel::kRequired); + : optimizer::DexToDexCompilationLevel::kDontDexToDexCompile); } } else if ((access_flags & kAccNative) != 0) { // Are we extracting only and have support for generic JNI down calls? diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 34ad1c5c08..a0c0a2acf6 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -27,7 +27,6 @@ CompilerOptions::CompilerOptions() small_method_threshold_(kDefaultSmallMethodThreshold), tiny_method_threshold_(kDefaultTinyMethodThreshold), num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold), - inline_depth_limit_(kUnsetInlineDepthLimit), inline_max_code_units_(kUnsetInlineMaxCodeUnits), no_inline_from_(nullptr), boot_image_(false), @@ -62,7 +61,6 @@ CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter, size_t small_method_threshold, size_t tiny_method_threshold, size_t num_dex_methods_threshold, - size_t inline_depth_limit, size_t inline_max_code_units, const std::vector<const DexFile*>* no_inline_from, double top_k_profile_threshold, @@ -86,7 +84,6 @@ CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter, small_method_threshold_(small_method_threshold), tiny_method_threshold_(tiny_method_threshold), num_dex_methods_threshold_(num_dex_methods_threshold), - inline_depth_limit_(inline_depth_limit), inline_max_code_units_(inline_max_code_units), no_inline_from_(no_inline_from), boot_image_(false), @@ -130,10 +127,6 @@ void CompilerOptions::ParseNumDexMethods(const StringPiece& option, UsageFn Usag ParseUintOption(option, "--num-dex-methods", &num_dex_methods_threshold_, Usage); } -void CompilerOptions::ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage) { - ParseUintOption(option, "--inline-depth-limit", &inline_depth_limit_, Usage); -} - void CompilerOptions::ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage) { ParseUintOption(option, "--inline-max-code-units", &inline_max_code_units_, Usage); } @@ -183,8 +176,6 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa ParseTinyMethodMax(option, Usage); } else if (option.starts_with("--num-dex-methods=")) { ParseNumDexMethods(option, Usage); - } else if (option.starts_with("--inline-depth-limit=")) { - ParseInlineDepthLimit(option, Usage); } else if (option.starts_with("--inline-max-code-units=")) { ParseInlineMaxCodeUnits(option, Usage); } else if (option == "--generate-debug-info" || option == "-g") { diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 2e3e55f6c6..2376fbf5f5 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -46,15 +46,9 @@ class CompilerOptions FINAL { static constexpr double kDefaultTopKProfileThreshold = 90.0; static const bool kDefaultGenerateDebugInfo = false; static const bool kDefaultGenerateMiniDebugInfo = false; - static const size_t kDefaultInlineDepthLimit = 3; static const size_t kDefaultInlineMaxCodeUnits = 32; - static constexpr size_t kUnsetInlineDepthLimit = -1; static constexpr size_t kUnsetInlineMaxCodeUnits = -1; - // Default inlining settings when the space filter is used. - static constexpr size_t kSpaceFilterInlineDepthLimit = 3; - static constexpr size_t kSpaceFilterInlineMaxCodeUnits = 10; - CompilerOptions(); ~CompilerOptions(); @@ -64,7 +58,6 @@ class CompilerOptions FINAL { size_t small_method_threshold, size_t tiny_method_threshold, size_t num_dex_methods_threshold, - size_t inline_depth_limit, size_t inline_max_code_units, const std::vector<const DexFile*>* no_inline_from, double top_k_profile_threshold, @@ -155,13 +148,6 @@ class CompilerOptions FINAL { return num_dex_methods_threshold_; } - size_t GetInlineDepthLimit() const { - return inline_depth_limit_; - } - void SetInlineDepthLimit(size_t limit) { - inline_depth_limit_ = limit; - } - size_t GetInlineMaxCodeUnits() const { return inline_max_code_units_; } @@ -275,7 +261,6 @@ class CompilerOptions FINAL { void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage); void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage); void ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage); - void ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage); void ParseNumDexMethods(const StringPiece& option, UsageFn Usage); void ParseTinyMethodMax(const StringPiece& option, UsageFn Usage); void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage); @@ -289,7 +274,6 @@ class CompilerOptions FINAL { size_t small_method_threshold_; size_t tiny_method_threshold_; size_t num_dex_methods_threshold_; - size_t inline_depth_limit_; size_t inline_max_code_units_; // Dex files from which we should not inline code. diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 7ee494a131..897d81993d 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -363,7 +363,6 @@ void ImageTest::Compile(ImageHeader::StorageMode storage_mode, } CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U); // Set inline filter values. - compiler_options_->SetInlineDepthLimit(CompilerOptions::kDefaultInlineDepthLimit); compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits); image_classes_.clear(); if (!extra_dex.empty()) { diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 3ae7974038..ad951bcc3f 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -97,7 +97,6 @@ JitCompiler::JitCompiler() { CompilerOptions::kDefaultSmallMethodThreshold, CompilerOptions::kDefaultTinyMethodThreshold, CompilerOptions::kDefaultNumDexMethodsThreshold, - CompilerOptions::kDefaultInlineDepthLimit, CompilerOptions::kDefaultInlineMaxCodeUnits, /* no_inline_from */ nullptr, CompilerOptions::kDefaultTopKProfileThreshold, @@ -177,10 +176,6 @@ JitCompiler::JitCompiler() { jit_logger_.reset(new JitLogger()); jit_logger_->OpenLog(); } - - size_t inline_depth_limit = compiler_driver_->GetCompilerOptions().GetInlineDepthLimit(); - DCHECK_LT(thread_count * inline_depth_limit, std::numeric_limits<uint16_t>::max()) - << "ProfilingInfo's inline counter can potentially overflow"; } JitCompiler::~JitCompiler() { diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 2ee4db923a..476906a768 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -528,7 +528,8 @@ class BCEVisitor : public HGraphVisitor { has_dom_based_dynamic_bce_(false), initial_block_size_(graph->GetBlocks().size()), side_effects_(side_effects), - induction_range_(induction_analysis) {} + induction_range_(induction_analysis), + next_(nullptr) {} void VisitBasicBlock(HBasicBlock* block) OVERRIDE { DCHECK(!IsAddedBlock(block)); @@ -1618,8 +1619,8 @@ class BCEVisitor : public HGraphVisitor { void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) { HInstruction* suspend = loop->GetSuspendCheck(); block->InsertInstructionBefore(condition, block->GetLastInstruction()); - HDeoptimize* deoptimize = - new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc()); + HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( + GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc()); block->InsertInstructionBefore(deoptimize, block->GetLastInstruction()); if (suspend->HasEnvironment()) { deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( @@ -1631,8 +1632,8 @@ class BCEVisitor : public HGraphVisitor { void InsertDeoptInBlock(HBoundsCheck* bounds_check, HInstruction* condition) { HBasicBlock* block = bounds_check->GetBlock(); block->InsertInstructionBefore(condition, bounds_check); - HDeoptimize* deoptimize = - new (GetGraph()->GetArena()) HDeoptimize(condition, bounds_check->GetDexPc()); + HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( + GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc()); block->InsertInstructionBefore(deoptimize, bounds_check); deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment()); } diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc index fe423012ca..048073e37a 100644 --- a/compiler/optimizing/cha_guard_optimization.cc +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -36,7 +36,8 @@ class CHAGuardVisitor : HGraphVisitor { : HGraphVisitor(graph), block_has_cha_guard_(GetGraph()->GetBlocks().size(), 0, - graph->GetArena()->Adapter(kArenaAllocCHA)) { + graph->GetArena()->Adapter(kArenaAllocCHA)), + instruction_iterator_(nullptr) { number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards(); DCHECK_NE(number_of_guards_to_visit_, 0u); // Will recount number of guards during guard optimization. @@ -201,8 +202,8 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, HInstruction* suspend = loop_info->GetSuspendCheck(); // Need a new deoptimize instruction that copies the environment // of the suspend instruction for the loop. - HDeoptimize* deoptimize = - new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc()); + HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( + GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc()); pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( suspend->GetEnvironment(), loop_info->GetHeader()); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index caea250ab6..d7cc577580 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1134,7 +1134,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); // The read barrier instrumentation of object ArrayGet @@ -1602,14 +1602,20 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) { } } -static Condition GenerateLongTestConstant(HCondition* condition, - bool invert, - CodeGeneratorARM* codegen) { +static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition, + bool invert, + CodeGeneratorARM* codegen) { DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong); const LocationSummary* const locations = condition->GetLocations(); - IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition(); - Condition ret = EQ; + IfCondition cond = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + + if (invert) { + std::swap(cond, opposite); + } + + std::pair<Condition, Condition> ret; const Location left = locations->InAt(0); const Location right = locations->InAt(1); @@ -1629,22 +1635,26 @@ static Condition GenerateLongTestConstant(HCondition* condition, __ CmpConstant(left_high, High32Bits(value)); __ it(EQ); __ cmp(left_low, ShifterOperand(Low32Bits(value)), EQ); - ret = ARMUnsignedCondition(cond); + ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite)); break; case kCondLE: case kCondGT: // Trivially true or false. if (value == std::numeric_limits<int64_t>::max()) { __ cmp(left_low, ShifterOperand(left_low)); - ret = cond == kCondLE ? EQ : NE; + ret = cond == kCondLE ? std::make_pair(EQ, NE) : std::make_pair(NE, EQ); break; } if (cond == kCondLE) { + DCHECK_EQ(opposite, kCondGT); cond = kCondLT; + opposite = kCondGE; } else { DCHECK_EQ(cond, kCondGT); + DCHECK_EQ(opposite, kCondLE); cond = kCondGE; + opposite = kCondLT; } value++; @@ -1653,7 +1663,7 @@ static Condition GenerateLongTestConstant(HCondition* condition, case kCondLT: __ CmpConstant(left_low, Low32Bits(value)); __ sbcs(IP, left_high, ShifterOperand(High32Bits(value))); - ret = ARMCondition(cond); + ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); break; default: LOG(FATAL) << "Unreachable"; @@ -1663,14 +1673,20 @@ static Condition GenerateLongTestConstant(HCondition* condition, return ret; } -static Condition GenerateLongTest(HCondition* condition, - bool invert, - CodeGeneratorARM* codegen) { +static std::pair<Condition, Condition> GenerateLongTest(HCondition* condition, + bool invert, + CodeGeneratorARM* codegen) { DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong); const LocationSummary* const locations = condition->GetLocations(); - IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition(); - Condition ret = EQ; + IfCondition cond = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + + if (invert) { + std::swap(cond, opposite); + } + + std::pair<Condition, Condition> ret; Location left = locations->InAt(0); Location right = locations->InAt(1); @@ -1689,15 +1705,19 @@ static Condition GenerateLongTest(HCondition* condition, __ cmp(left.AsRegisterPairLow<Register>(), ShifterOperand(right.AsRegisterPairLow<Register>()), EQ); - ret = ARMUnsignedCondition(cond); + ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite)); break; case kCondLE: case kCondGT: if (cond == kCondLE) { + DCHECK_EQ(opposite, kCondGT); cond = kCondGE; + opposite = kCondLT; } else { DCHECK_EQ(cond, kCondGT); + DCHECK_EQ(opposite, kCondLE); cond = kCondLT; + opposite = kCondGE; } std::swap(left, right); @@ -1709,7 +1729,7 @@ static Condition GenerateLongTest(HCondition* condition, __ sbcs(IP, left.AsRegisterPairHigh<Register>(), ShifterOperand(right.AsRegisterPairHigh<Register>())); - ret = ARMCondition(cond); + ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); break; default: LOG(FATAL) << "Unreachable"; @@ -1719,90 +1739,83 @@ static Condition GenerateLongTest(HCondition* condition, return ret; } -static Condition GenerateTest(HInstruction* instruction, - Location loc, - bool invert, - CodeGeneratorARM* codegen) { - DCHECK(!instruction->IsConstant()); +static std::pair<Condition, Condition> GenerateTest(HCondition* condition, + bool invert, + CodeGeneratorARM* codegen) { + const LocationSummary* const locations = condition->GetLocations(); + const Primitive::Type type = condition->GetLeft()->GetType(); + IfCondition cond = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + std::pair<Condition, Condition> ret; + const Location right = locations->InAt(1); - Condition ret = invert ? EQ : NE; + if (invert) { + std::swap(cond, opposite); + } - if (IsBooleanValueOrMaterializedCondition(instruction)) { - __ CmpConstant(loc.AsRegister<Register>(), 0); + if (type == Primitive::kPrimLong) { + ret = locations->InAt(1).IsConstant() + ? GenerateLongTestConstant(condition, invert, codegen) + : GenerateLongTest(condition, invert, codegen); + } else if (Primitive::IsFloatingPointType(type)) { + GenerateVcmp(condition, codegen); + __ vmstat(); + ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()), + ARMFPCondition(opposite, condition->IsGtBias())); } else { - HCondition* const condition = instruction->AsCondition(); - const LocationSummary* const locations = condition->GetLocations(); - const Primitive::Type type = condition->GetLeft()->GetType(); - const IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition(); - const Location right = locations->InAt(1); - - if (type == Primitive::kPrimLong) { - ret = condition->GetLocations()->InAt(1).IsConstant() - ? GenerateLongTestConstant(condition, invert, codegen) - : GenerateLongTest(condition, invert, codegen); - } else if (Primitive::IsFloatingPointType(type)) { - GenerateVcmp(condition, codegen); - __ vmstat(); - ret = ARMFPCondition(cond, condition->IsGtBias()); - } else { - DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - const Register left = locations->InAt(0).AsRegister<Register>(); + const Register left = locations->InAt(0).AsRegister<Register>(); - if (right.IsRegister()) { - __ cmp(left, ShifterOperand(right.AsRegister<Register>())); - } else { - DCHECK(right.IsConstant()); - __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); - } - - ret = ARMCondition(cond); + if (right.IsRegister()) { + __ cmp(left, ShifterOperand(right.AsRegister<Register>())); + } else { + DCHECK(right.IsConstant()); + __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); } + + ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); } return ret; } -static bool CanGenerateTest(HInstruction* condition, ArmAssembler* assembler) { - if (!IsBooleanValueOrMaterializedCondition(condition)) { - const HCondition* const cond = condition->AsCondition(); - - if (cond->GetLeft()->GetType() == Primitive::kPrimLong) { - const LocationSummary* const locations = cond->GetLocations(); - const IfCondition c = cond->GetCondition(); - - if (locations->InAt(1).IsConstant()) { - const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); - ShifterOperand so; - - if (c < kCondLT || c > kCondGE) { - // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, - // we check that the least significant half of the first input to be compared - // is in a low register (the other half is read outside an IT block), and - // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used. - if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) || - !IsUint<8>(Low32Bits(value))) { - return false; - } - } else if (c == kCondLE || c == kCondGT) { - if (value < std::numeric_limits<int64_t>::max() && - !assembler->ShifterOperandCanHold(kNoRegister, - kNoRegister, - SBC, - High32Bits(value + 1), - kCcSet, - &so)) { - return false; - } - } else if (!assembler->ShifterOperandCanHold(kNoRegister, - kNoRegister, - SBC, - High32Bits(value), - kCcSet, - &so)) { +static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { + if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { + const LocationSummary* const locations = condition->GetLocations(); + const IfCondition c = condition->GetCondition(); + + if (locations->InAt(1).IsConstant()) { + const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); + ShifterOperand so; + + if (c < kCondLT || c > kCondGE) { + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the least significant half of the first input to be compared + // is in a low register (the other half is read outside an IT block), and + // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP + // encoding can be used. + if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) || + !IsUint<8>(Low32Bits(value))) { + return false; + } + } else if (c == kCondLE || c == kCondGT) { + if (value < std::numeric_limits<int64_t>::max() && + !assembler->ShifterOperandCanHold(kNoRegister, + kNoRegister, + SBC, + High32Bits(value + 1), + kCcSet, + &so)) { return false; } + } else if (!assembler->ShifterOperandCanHold(kNoRegister, + kNoRegister, + SBC, + High32Bits(value), + kCcSet, + &so)) { + return false; } } } @@ -2415,13 +2428,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } -void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond, - Label* true_label, - Label* false_label ATTRIBUTE_UNUSED) { - __ vmstat(); // transfer FP status register to ARM APSR. - __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias())); -} - void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label) { @@ -2438,7 +2444,6 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, // Set the conditions for the test, remembering that == needs to be // decided using the low words. - // TODO: consider avoiding jumps with temporary and CMP low+SBC high switch (if_cond) { case kCondEQ: case kCondNE: @@ -2509,25 +2514,38 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition, Label* true_target_in, Label* false_target_in) { + if (CanGenerateTest(condition, codegen_->GetAssembler())) { + Label* non_fallthrough_target; + bool invert; + + if (true_target_in == nullptr) { + DCHECK(false_target_in != nullptr); + non_fallthrough_target = false_target_in; + invert = true; + } else { + non_fallthrough_target = true_target_in; + invert = false; + } + + const auto cond = GenerateTest(condition, invert, codegen_); + + __ b(non_fallthrough_target, cond.first); + + if (false_target_in != nullptr && false_target_in != non_fallthrough_target) { + __ b(false_target_in); + } + + return; + } + // Generated branching requires both targets to be explicit. If either of the // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. Label fallthrough_target; Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; - Primitive::Type type = condition->InputAt(0)->GetType(); - switch (type) { - case Primitive::kPrimLong: - GenerateLongComparesAndJumps(condition, true_target, false_target); - break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - GenerateVcmp(condition, codegen_); - GenerateFPJumps(condition, true_target, false_target); - break; - default: - LOG(FATAL) << "Unexpected compare type " << type; - } + DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); + GenerateLongComparesAndJumps(condition, true_target, false_target); if (false_target != &fallthrough_target) { __ b(false_target); @@ -2729,7 +2747,8 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) { } if (!Primitive::IsFloatingPointType(type) && - CanGenerateTest(condition, codegen_->GetAssembler())) { + (IsBooleanValueOrMaterializedCondition(condition) || + CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) { bool invert = false; if (out.Equals(second)) { @@ -2753,7 +2772,14 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) { codegen_->MoveLocation(out, src.Equals(first) ? second : first, type); } - const Condition cond = GenerateTest(condition, locations->InAt(2), invert, codegen_); + std::pair<Condition, Condition> cond; + + if (IsBooleanValueOrMaterializedCondition(condition)) { + __ CmpConstant(locations->InAt(2).AsRegister<Register>(), 0); + cond = invert ? std::make_pair(EQ, NE) : std::make_pair(NE, EQ); + } else { + cond = GenerateTest(condition->AsCondition(), invert, codegen_); + } if (out.IsRegister()) { ShifterOperand operand; @@ -2765,8 +2791,8 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) { operand = ShifterOperand(src.AsRegister<Register>()); } - __ it(cond); - __ mov(out.AsRegister<Register>(), operand, cond); + __ it(cond.first); + __ mov(out.AsRegister<Register>(), operand, cond.first); } else { DCHECK(out.IsRegisterPair()); @@ -2784,10 +2810,10 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) { operand_low = ShifterOperand(src.AsRegisterPairLow<Register>()); } - __ it(cond); - __ mov(out.AsRegisterPairLow<Register>(), operand_low, cond); - __ it(cond); - __ mov(out.AsRegisterPairHigh<Register>(), operand_high, cond); + __ it(cond.first); + __ mov(out.AsRegisterPairLow<Register>(), operand_low, cond.first); + __ it(cond.first); + __ mov(out.AsRegisterPairHigh<Register>(), operand_high, cond.first); } return; @@ -2840,7 +2866,7 @@ void LocationsBuilderARM::HandleCondition(HCondition* cond) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); if (!cond->IsEmittedAtUseSite()) { - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } break; @@ -2867,51 +2893,44 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { return; } - LocationSummary* locations = cond->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - Register out = locations->Out().AsRegister<Register>(); - Label true_label, false_label; + const Register out = cond->GetLocations()->Out().AsRegister<Register>(); - switch (cond->InputAt(0)->GetType()) { - default: { - // Integer case. - if (right.IsRegister()) { - __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>())); - } else { - DCHECK(right.IsConstant()); - __ CmpConstant(left.AsRegister<Register>(), - CodeGenerator::GetInt32ValueOf(right.GetConstant())); - } - __ it(ARMCondition(cond->GetCondition()), kItElse); - __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1), - ARMCondition(cond->GetCondition())); - __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(0), - ARMCondition(cond->GetOppositeCondition())); - return; - } - case Primitive::kPrimLong: - GenerateLongComparesAndJumps(cond, &true_label, &false_label); - break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - GenerateVcmp(cond, codegen_); - GenerateFPJumps(cond, &true_label, &false_label); - break; + if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) { + const auto condition = GenerateTest(cond, false, codegen_); + + __ it(condition.first); + __ mov(out, ShifterOperand(1), condition.first); + __ it(condition.second); + __ mov(out, ShifterOperand(0), condition.second); + return; } // Convert the jumps into the result. Label done_label; - Label* final_label = codegen_->GetFinalLabel(cond, &done_label); + Label* const final_label = codegen_->GetFinalLabel(cond, &done_label); - // False case: result = 0. - __ Bind(&false_label); - __ LoadImmediate(out, 0); - __ b(final_label); + if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) { + Label true_label, false_label; - // True case: result = 1. - __ Bind(&true_label); - __ LoadImmediate(out, 1); + GenerateLongComparesAndJumps(cond, &true_label, &false_label); + + // False case: result = 0. + __ Bind(&false_label); + __ LoadImmediate(out, 0); + __ b(final_label); + + // True case: result = 1. + __ Bind(&true_label); + __ LoadImmediate(out, 1); + } else { + DCHECK(CanGenerateTest(cond, codegen_->GetAssembler())); + + const auto condition = GenerateTest(cond, false, codegen_); + + __ mov(out, ShifterOperand(0), AL, kCcKeep); + __ b(final_label, condition.second); + __ LoadImmediate(out, 1); + } if (done_label.IsLinked()) { __ Bind(&done_label); @@ -7039,14 +7058,16 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - Label done, zero; - Label* final_label = codegen_->GetFinalLabel(instruction, &done); + Label done; + Label* const final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. // avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &zero); + DCHECK_NE(out, obj); + __ LoadImmediate(out, 0); + __ CompareAndBranchIfZero(obj, final_label); } switch (type_check_kind) { @@ -7058,11 +7079,23 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { class_offset, maybe_temp_loc, kCompilerReadBarrierOption); - __ cmp(out, ShifterOperand(cls)); // Classes must be equal for the instanceof to succeed. - __ b(&zero, NE); - __ LoadImmediate(out, 1); - __ b(final_label); + __ cmp(out, ShifterOperand(cls)); + // We speculatively set the result to false without changing the condition + // flags, which allows us to avoid some branching later. + __ mov(out, ShifterOperand(0), AL, kCcKeep); + + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. + if (ArmAssembler::IsLowRegister(out)) { + __ it(EQ); + __ mov(out, ShifterOperand(1), EQ); + } else { + __ b(final_label, NE); + __ LoadImmediate(out, 1); + } + break; } @@ -7084,14 +7117,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { super_offset, maybe_temp_loc, kCompilerReadBarrierOption); - // If `out` is null, we use it for the result, and jump to `done`. + // If `out` is null, we use it for the result, and jump to the final label. __ CompareAndBranchIfZero(out, final_label); __ cmp(out, ShifterOperand(cls)); __ b(&loop, NE); __ LoadImmediate(out, 1); - if (zero.IsLinked()) { - __ b(final_label); - } break; } @@ -7114,14 +7144,32 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { super_offset, maybe_temp_loc, kCompilerReadBarrierOption); - __ CompareAndBranchIfNonZero(out, &loop); - // If `out` is null, we use it for the result, and jump to `done`. - __ b(final_label); - __ Bind(&success); - __ LoadImmediate(out, 1); - if (zero.IsLinked()) { + // This is essentially a null check, but it sets the condition flags to the + // proper value for the code that follows the loop, i.e. not `EQ`. + __ cmp(out, ShifterOperand(1)); + __ b(&loop, HS); + + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. + if (ArmAssembler::IsLowRegister(out)) { + // If `out` is null, we use it for the result, and the condition flags + // have already been set to `NE`, so the IT block that comes afterwards + // (and which handles the successful case) turns into a NOP (instead of + // overwriting `out`). + __ Bind(&success); + // There is only one branch to the `success` label (which is bound to this + // IT block), and it has the same condition, `EQ`, so in that case the MOV + // is executed. + __ it(EQ); + __ mov(out, ShifterOperand(1), EQ); + } else { + // If `out` is null, we use it for the result, and jump to the final label. __ b(final_label); + __ Bind(&success); + __ LoadImmediate(out, 1); } + break; } @@ -7144,14 +7192,28 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { component_offset, maybe_temp_loc, kCompilerReadBarrierOption); - // If `out` is null, we use it for the result, and jump to `done`. + // If `out` is null, we use it for the result, and jump to the final label. __ CompareAndBranchIfZero(out, final_label); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(out, &zero); - __ Bind(&exact_check); - __ LoadImmediate(out, 1); - __ b(final_label); + __ cmp(out, ShifterOperand(0)); + // We speculatively set the result to false without changing the condition + // flags, which allows us to avoid some branching later. + __ mov(out, ShifterOperand(0), AL, kCcKeep); + + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. + if (ArmAssembler::IsLowRegister(out)) { + __ Bind(&exact_check); + __ it(EQ); + __ mov(out, ShifterOperand(1), EQ); + } else { + __ b(final_label, NE); + __ Bind(&exact_check); + __ LoadImmediate(out, 1); + } + break; } @@ -7171,9 +7233,6 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { codegen_->AddSlowPath(slow_path); __ b(slow_path->GetEntryLabel(), NE); __ LoadImmediate(out, 1); - if (zero.IsLinked()) { - __ b(final_label); - } break; } @@ -7202,18 +7261,10 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ b(slow_path->GetEntryLabel()); - if (zero.IsLinked()) { - __ b(final_label); - } break; } } - if (zero.IsLinked()) { - __ Bind(&zero); - __ LoadImmediate(out, 0); - } - if (done.IsLinked()) { __ Bind(&done); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 59a7f7c048..86f2f21df7 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -299,7 +299,6 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, Label* false_target); - void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 28cc942dfb..d463830ff6 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1150,7 +1150,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); // The read barrier instrumentation of object ArrayGet @@ -3281,7 +3281,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); Primitive::Type type = instruction->GetResultType(); - DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); LocationSummary* locations = instruction->GetLocations(); Register out = OutputRegister(instruction); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 2d2d8109a3..cce412b314 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -1175,7 +1175,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); // The read barrier instrumentation of object ArrayGet @@ -1687,14 +1687,21 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codege } } -static vixl32::Condition GenerateLongTestConstant(HCondition* condition, - bool invert, - CodeGeneratorARMVIXL* codegen) { +static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( + HCondition* condition, + bool invert, + CodeGeneratorARMVIXL* codegen) { DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong); const LocationSummary* const locations = condition->GetLocations(); - IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition(); - vixl32::Condition ret = eq; + IfCondition cond = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + + if (invert) { + std::swap(cond, opposite); + } + + std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne); const Location left = locations->InAt(0); const Location right = locations->InAt(1); @@ -1713,13 +1720,14 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition, case kCondAE: { __ Cmp(left_high, High32Bits(value)); + // We use the scope because of the IT block that follows. ExactAssemblyScope guard(codegen->GetVIXLAssembler(), 2 * vixl32::k16BitT32InstructionSizeInBytes, CodeBufferCheckScope::kExactSize); __ it(eq); __ cmp(eq, left_low, Low32Bits(value)); - ret = ARMUnsignedCondition(cond); + ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite)); break; } case kCondLE: @@ -1727,15 +1735,19 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition, // Trivially true or false. if (value == std::numeric_limits<int64_t>::max()) { __ Cmp(left_low, left_low); - ret = cond == kCondLE ? eq : ne; + ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq); break; } if (cond == kCondLE) { + DCHECK_EQ(opposite, kCondGT); cond = kCondLT; + opposite = kCondGE; } else { DCHECK_EQ(cond, kCondGT); + DCHECK_EQ(opposite, kCondLE); cond = kCondGE; + opposite = kCondLT; } value++; @@ -1746,7 +1758,7 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition, __ Cmp(left_low, Low32Bits(value)); __ Sbcs(temps.Acquire(), left_high, High32Bits(value)); - ret = ARMCondition(cond); + ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); break; } default: @@ -1757,14 +1769,21 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition, return ret; } -static vixl32::Condition GenerateLongTest(HCondition* condition, - bool invert, - CodeGeneratorARMVIXL* codegen) { +static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest( + HCondition* condition, + bool invert, + CodeGeneratorARMVIXL* codegen) { DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong); const LocationSummary* const locations = condition->GetLocations(); - IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition(); - vixl32::Condition ret = eq; + IfCondition cond = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + + if (invert) { + std::swap(cond, opposite); + } + + std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne); Location left = locations->InAt(0); Location right = locations->InAt(1); @@ -1779,22 +1798,27 @@ static vixl32::Condition GenerateLongTest(HCondition* condition, case kCondAE: { __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); + // We use the scope because of the IT block that follows. ExactAssemblyScope guard(codegen->GetVIXLAssembler(), 2 * vixl32::k16BitT32InstructionSizeInBytes, CodeBufferCheckScope::kExactSize); __ it(eq); __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right)); - ret = ARMUnsignedCondition(cond); + ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite)); break; } case kCondLE: case kCondGT: if (cond == kCondLE) { + DCHECK_EQ(opposite, kCondGT); cond = kCondGE; + opposite = kCondLT; } else { DCHECK_EQ(cond, kCondGT); + DCHECK_EQ(opposite, kCondLE); cond = kCondLT; + opposite = kCondGE; } std::swap(left, right); @@ -1805,7 +1829,7 @@ static vixl32::Condition GenerateLongTest(HCondition* condition, __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right)); - ret = ARMCondition(cond); + ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); break; } default: @@ -1816,69 +1840,62 @@ static vixl32::Condition GenerateLongTest(HCondition* condition, return ret; } -static vixl32::Condition GenerateTest(HInstruction* instruction, - Location loc, - bool invert, - CodeGeneratorARMVIXL* codegen) { - DCHECK(!instruction->IsConstant()); +static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition, + bool invert, + CodeGeneratorARMVIXL* codegen) { + const Primitive::Type type = condition->GetLeft()->GetType(); + IfCondition cond = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne); - vixl32::Condition ret = invert ? eq : ne; + if (invert) { + std::swap(cond, opposite); + } - if (IsBooleanValueOrMaterializedCondition(instruction)) { - __ Cmp(RegisterFrom(loc), 0); + if (type == Primitive::kPrimLong) { + ret = condition->GetLocations()->InAt(1).IsConstant() + ? GenerateLongTestConstant(condition, invert, codegen) + : GenerateLongTest(condition, invert, codegen); + } else if (Primitive::IsFloatingPointType(type)) { + GenerateVcmp(condition, codegen); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()), + ARMFPCondition(opposite, condition->IsGtBias())); } else { - HCondition* const condition = instruction->AsCondition(); - const Primitive::Type type = condition->GetLeft()->GetType(); - const IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition(); - - if (type == Primitive::kPrimLong) { - ret = condition->GetLocations()->InAt(1).IsConstant() - ? GenerateLongTestConstant(condition, invert, codegen) - : GenerateLongTest(condition, invert, codegen); - } else if (Primitive::IsFloatingPointType(type)) { - GenerateVcmp(condition, codegen); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - ret = ARMFPCondition(cond, condition->IsGtBias()); - } else { - DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1)); - ret = ARMCondition(cond); - } + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1)); + ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); } return ret; } -static bool CanGenerateTest(HInstruction* condition, ArmVIXLAssembler* assembler) { - if (!IsBooleanValueOrMaterializedCondition(condition)) { - const HCondition* const cond = condition->AsCondition(); - - if (cond->GetLeft()->GetType() == Primitive::kPrimLong) { - const LocationSummary* const locations = cond->GetLocations(); - const IfCondition c = cond->GetCondition(); +static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) { + if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { + const LocationSummary* const locations = condition->GetLocations(); + const IfCondition c = condition->GetCondition(); - if (locations->InAt(1).IsConstant()) { - const int64_t value = Int64ConstantFrom(locations->InAt(1)); + if (locations->InAt(1).IsConstant()) { + const int64_t value = Int64ConstantFrom(locations->InAt(1)); - if (c < kCondLT || c > kCondGE) { - // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, - // we check that the least significant half of the first input to be compared - // is in a low register (the other half is read outside an IT block), and - // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used. - if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) { - return false; - } - // TODO(VIXL): The rest of the checks are there to keep the backend in sync with - // the previous one, but are not strictly necessary. - } else if (c == kCondLE || c == kCondGT) { - if (value < std::numeric_limits<int64_t>::max() && - !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) { - return false; - } - } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) { + if (c < kCondLT || c > kCondGE) { + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the least significant half of the first input to be compared + // is in a low register (the other half is read outside an IT block), and + // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP + // encoding can be used. + if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) { + return false; + } + // TODO(VIXL): The rest of the checks are there to keep the backend in sync with + // the previous one, but are not strictly necessary. + } else if (c == kCondLE || c == kCondGT) { + if (value < std::numeric_limits<int64_t>::max() && + !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) { return false; } + } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) { + return false; } } } @@ -2445,14 +2462,6 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) { void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } -void InstructionCodeGeneratorARMVIXL::GenerateFPJumps(HCondition* cond, - vixl32::Label* true_label, - vixl32::Label* false_label ATTRIBUTE_UNUSED) { - // To branch on the result of the FP compare we transfer FPSCR to APSR (encoded as PC in VMRS). - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(ARMFPCondition(cond->GetCondition(), cond->IsGtBias()), true_label); -} - void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond, vixl32::Label* true_label, vixl32::Label* false_label) { @@ -2469,7 +2478,6 @@ void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* c // Set the conditions for the test, remembering that == needs to be // decided using the low words. - // TODO: consider avoiding jumps with temporary and CMP low+SBC high switch (if_cond) { case kCondEQ: case kCondNE: @@ -2540,31 +2548,44 @@ void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* c void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition, vixl32::Label* true_target_in, vixl32::Label* false_target_in) { + if (CanGenerateTest(condition, codegen_->GetAssembler())) { + vixl32::Label* non_fallthrough_target; + bool invert; + + if (true_target_in == nullptr) { + DCHECK(false_target_in != nullptr); + non_fallthrough_target = false_target_in; + invert = true; + } else { + non_fallthrough_target = true_target_in; + invert = false; + } + + const auto cond = GenerateTest(condition, invert, codegen_); + + __ B(cond.first, non_fallthrough_target); + + if (false_target_in != nullptr && false_target_in != non_fallthrough_target) { + __ B(false_target_in); + } + + return; + } + // Generated branching requires both targets to be explicit. If either of the // targets is nullptr (fallthrough) use and bind `fallthrough` instead. vixl32::Label fallthrough; vixl32::Label* true_target = (true_target_in == nullptr) ? &fallthrough : true_target_in; vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in; - Primitive::Type type = condition->InputAt(0)->GetType(); - switch (type) { - case Primitive::kPrimLong: - GenerateLongComparesAndJumps(condition, true_target, false_target); - break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - GenerateVcmp(condition, codegen_); - GenerateFPJumps(condition, true_target, false_target); - break; - default: - LOG(FATAL) << "Unexpected compare type " << type; - } + DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); + GenerateLongComparesAndJumps(condition, true_target, false_target); if (false_target != &fallthrough) { __ B(false_target); } - if (true_target_in == nullptr || false_target_in == nullptr) { + if (fallthrough.IsReferenced()) { __ Bind(&fallthrough); } } @@ -2759,7 +2780,8 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { } if (!Primitive::IsFloatingPointType(type) && - CanGenerateTest(condition, codegen_->GetAssembler())) { + (IsBooleanValueOrMaterializedCondition(condition) || + CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) { bool invert = false; if (out.Equals(second)) { @@ -2783,15 +2805,24 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { codegen_->MoveLocation(out, src.Equals(first) ? second : first, type); } - const vixl32::Condition cond = GenerateTest(condition, locations->InAt(2), invert, codegen_); + std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne); + + if (IsBooleanValueOrMaterializedCondition(condition)) { + __ Cmp(InputRegisterAt(select, 2), 0); + cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq); + } else { + cond = GenerateTest(condition->AsCondition(), invert, codegen_); + } + const size_t instr_count = out.IsRegisterPair() ? 4 : 2; + // We use the scope because of the IT block that follows. ExactAssemblyScope guard(GetVIXLAssembler(), instr_count * vixl32::k16BitT32InstructionSizeInBytes, CodeBufferCheckScope::kExactSize); if (out.IsRegister()) { - __ it(cond); - __ mov(cond, RegisterFrom(out), OperandFrom(src, type)); + __ it(cond.first); + __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type)); } else { DCHECK(out.IsRegisterPair()); @@ -2809,10 +2840,10 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { operand_low = LowRegisterFrom(src); } - __ it(cond); - __ mov(cond, LowRegisterFrom(out), operand_low); - __ it(cond); - __ mov(cond, HighRegisterFrom(out), operand_high); + __ it(cond.first); + __ mov(cond.first, LowRegisterFrom(out), operand_low); + __ it(cond.first); + __ mov(cond.first, HighRegisterFrom(out), operand_high); } return; @@ -2865,7 +2896,7 @@ void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); if (!cond->IsEmittedAtUseSite()) { - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } break; @@ -2892,50 +2923,48 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { return; } - Location right = cond->GetLocations()->InAt(1); - vixl32::Register out = OutputRegister(cond); - vixl32::Label true_label, false_label; + const vixl32::Register out = OutputRegister(cond); - switch (cond->InputAt(0)->GetType()) { - default: { - // Integer case. - if (right.IsRegister()) { - __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1)); - } else { - DCHECK(right.IsConstant()); - __ Cmp(InputRegisterAt(cond, 0), - CodeGenerator::GetInt32ValueOf(right.GetConstant())); - } - ExactAssemblyScope aas(GetVIXLAssembler(), - 3 * vixl32::kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ ite(ARMCondition(cond->GetCondition())); - __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1); - __ mov(ARMCondition(cond->GetOppositeCondition()), OutputRegister(cond), 0); - return; - } - case Primitive::kPrimLong: - GenerateLongComparesAndJumps(cond, &true_label, &false_label); - break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - GenerateVcmp(cond, codegen_); - GenerateFPJumps(cond, &true_label, &false_label); - break; + if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) { + const auto condition = GenerateTest(cond, false, codegen_); + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(GetVIXLAssembler(), + 4 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(condition.first); + __ mov(condition.first, out, 1); + __ it(condition.second); + __ mov(condition.second, out, 0); + return; } // Convert the jumps into the result. vixl32::Label done_label; - vixl32::Label* final_label = codegen_->GetFinalLabel(cond, &done_label); + vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label); - // False case: result = 0. - __ Bind(&false_label); - __ Mov(out, 0); - __ B(final_label); + if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) { + vixl32::Label true_label, false_label; - // True case: result = 1. - __ Bind(&true_label); - __ Mov(out, 1); + GenerateLongComparesAndJumps(cond, &true_label, &false_label); + + // False case: result = 0. + __ Bind(&false_label); + __ Mov(out, 0); + __ B(final_label); + + // True case: result = 1. + __ Bind(&true_label); + __ Mov(out, 1); + } else { + DCHECK(CanGenerateTest(cond, codegen_->GetAssembler())); + + const auto condition = GenerateTest(cond, false, codegen_); + + __ Mov(LeaveFlags, out, 0); + __ B(condition.second, final_label, /* far_target */ false); + __ Mov(out, 1); + } if (done_label.IsReferenced()) { __ Bind(&done_label); @@ -7079,14 +7108,16 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - vixl32::Label done, zero; - vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); + vixl32::Label done; + vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARMVIXL* slow_path = nullptr; // Return 0 if `obj` is null. // avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &zero, /* far_target */ false); + DCHECK(!out.Is(obj)); + __ Mov(out, 0); + __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false); } switch (type_check_kind) { @@ -7098,11 +7129,28 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) class_offset, maybe_temp_loc, kCompilerReadBarrierOption); - __ Cmp(out, cls); // Classes must be equal for the instanceof to succeed. - __ B(ne, &zero, /* far_target */ false); - __ Mov(out, 1); - __ B(final_label); + __ Cmp(out, cls); + // We speculatively set the result to false without changing the condition + // flags, which allows us to avoid some branching later. + __ Mov(LeaveFlags, out, 0); + + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. + if (out.IsLow()) { + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(eq); + __ mov(eq, out, 1); + } else { + __ B(ne, final_label, /* far_target */ false); + __ Mov(out, 1); + } + break; } @@ -7124,14 +7172,11 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) super_offset, maybe_temp_loc, kCompilerReadBarrierOption); - // If `out` is null, we use it for the result, and jump to `done`. + // If `out` is null, we use it for the result, and jump to the final label. __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); __ Cmp(out, cls); __ B(ne, &loop, /* far_target */ false); __ Mov(out, 1); - if (zero.IsReferenced()) { - __ B(final_label); - } break; } @@ -7154,14 +7199,38 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) super_offset, maybe_temp_loc, kCompilerReadBarrierOption); - __ CompareAndBranchIfNonZero(out, &loop); - // If `out` is null, we use it for the result, and jump to `done`. - __ B(final_label); - __ Bind(&success); - __ Mov(out, 1); - if (zero.IsReferenced()) { + // This is essentially a null check, but it sets the condition flags to the + // proper value for the code that follows the loop, i.e. not `eq`. + __ Cmp(out, 1); + __ B(hs, &loop, /* far_target */ false); + + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. + if (out.IsLow()) { + // If `out` is null, we use it for the result, and the condition flags + // have already been set to `ne`, so the IT block that comes afterwards + // (and which handles the successful case) turns into a NOP (instead of + // overwriting `out`). + __ Bind(&success); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + // There is only one branch to the `success` label (which is bound to this + // IT block), and it has the same condition, `eq`, so in that case the MOV + // is executed. + __ it(eq); + __ mov(eq, out, 1); + } else { + // If `out` is null, we use it for the result, and jump to the final label. __ B(final_label); + __ Bind(&success); + __ Mov(out, 1); } + break; } @@ -7184,14 +7253,34 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) component_offset, maybe_temp_loc, kCompilerReadBarrierOption); - // If `out` is null, we use it for the result, and jump to `done`. + // If `out` is null, we use it for the result, and jump to the final label. __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false); - __ Bind(&exact_check); - __ Mov(out, 1); - __ B(final_label); + __ Cmp(out, 0); + // We speculatively set the result to false without changing the condition + // flags, which allows us to avoid some branching later. + __ Mov(LeaveFlags, out, 0); + + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. + if (out.IsLow()) { + __ Bind(&exact_check); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(eq); + __ mov(eq, out, 1); + } else { + __ B(ne, final_label, /* far_target */ false); + __ Bind(&exact_check); + __ Mov(out, 1); + } + break; } @@ -7211,9 +7300,6 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); - if (zero.IsReferenced()) { - __ B(final_label); - } break; } @@ -7242,18 +7328,10 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); - if (zero.IsReferenced()) { - __ B(final_label); - } break; } } - if (zero.IsReferenced()) { - __ Bind(&zero); - __ Mov(out, 0); - } - if (done.IsReferenced()) { __ Bind(&done); } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 781027ab30..1e9669dc38 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -401,9 +401,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateCompareTestAndBranch(HCondition* condition, vixl::aarch32::Label* true_target, vixl::aarch32::Label* false_target); - void GenerateFPJumps(HCondition* cond, - vixl::aarch32::Label* true_label, - vixl::aarch32::Label* false_label); void GenerateLongComparesAndJumps(HCondition* cond, vixl::aarch32::Label* true_label, vixl::aarch32::Label* false_label); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 5246dbc5cb..c82533bc7d 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -558,26 +558,21 @@ void CodeGeneratorMIPS64::GenerateFrameEntry() { return; } - // Make sure the frame size isn't unreasonably large. Per the various APIs - // it looks like it should always be less than 2GB in size, which allows - // us using 32-bit signed offsets from the stack pointer. - if (GetFrameSize() > 0x7FFFFFFF) - LOG(FATAL) << "Stack frame larger than 2GB"; + // Make sure the frame size isn't unreasonably large. + if (GetFrameSize() > GetStackOverflowReservedBytes(kMips64)) { + LOG(FATAL) << "Stack frame larger than " << GetStackOverflowReservedBytes(kMips64) << " bytes"; + } // Spill callee-saved registers. - // Note that their cumulative size is small and they can be indexed using - // 16-bit offsets. - - // TODO: increment/decrement SP in one step instead of two or remove this comment. - uint32_t ofs = FrameEntrySpillSize(); + uint32_t ofs = GetFrameSize(); __ IncreaseFrameSize(ofs); for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { GpuRegister reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { ofs -= kMips64DoublewordSize; - __ Sd(reg, SP, ofs); + __ StoreToOffset(kStoreDoubleword, reg, SP, ofs); __ cfi().RelOffset(DWARFReg(reg), ofs); } } @@ -586,23 +581,16 @@ void CodeGeneratorMIPS64::GenerateFrameEntry() { FpuRegister reg = kFpuCalleeSaves[i]; if (allocated_registers_.ContainsFloatingPointRegister(reg)) { ofs -= kMips64DoublewordSize; - __ Sdc1(reg, SP, ofs); + __ StoreFpuToOffset(kStoreDoubleword, reg, SP, ofs); __ cfi().RelOffset(DWARFReg(reg), ofs); } } - // Allocate the rest of the frame and store the current method pointer - // at its end. - - __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize()); - // Save the current method if we need it. Note that we do not // do this in HCurrentMethod, as the instruction might have been removed // in the SSA graph. if (RequiresCurrentMethod()) { - static_assert(IsInt<16>(kCurrentMethodStackOffset), - "kCurrentMethodStackOffset must fit into int16_t"); - __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset); + __ StoreToOffset(kStoreDoubleword, kMethodRegisterArgument, SP, kCurrentMethodStackOffset); } if (GetGraph()->HasShouldDeoptimizeFlag()) { @@ -615,42 +603,32 @@ void CodeGeneratorMIPS64::GenerateFrameExit() { __ cfi().RememberState(); if (!HasEmptyFrame()) { - // Deallocate the rest of the frame. - - __ DecreaseFrameSize(GetFrameSize() - FrameEntrySpillSize()); - // Restore callee-saved registers. - // Note that their cumulative size is small and they can be indexed using - // 16-bit offsets. - - // TODO: increment/decrement SP in one step instead of two or remove this comment. - uint32_t ofs = 0; - - for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { - FpuRegister reg = kFpuCalleeSaves[i]; - if (allocated_registers_.ContainsFloatingPointRegister(reg)) { - __ Ldc1(reg, SP, ofs); - ofs += kMips64DoublewordSize; + // For better instruction scheduling restore RA before other registers. + uint32_t ofs = GetFrameSize(); + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { + GpuRegister reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + ofs -= kMips64DoublewordSize; + __ LoadFromOffset(kLoadDoubleword, reg, SP, ofs); __ cfi().Restore(DWARFReg(reg)); } } - for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { - GpuRegister reg = kCoreCalleeSaves[i]; - if (allocated_registers_.ContainsCoreRegister(reg)) { - __ Ld(reg, SP, ofs); - ofs += kMips64DoublewordSize; + for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { + FpuRegister reg = kFpuCalleeSaves[i]; + if (allocated_registers_.ContainsFloatingPointRegister(reg)) { + ofs -= kMips64DoublewordSize; + __ LoadFpuFromOffset(kLoadDoubleword, reg, SP, ofs); __ cfi().Restore(DWARFReg(reg)); } } - DCHECK_EQ(ofs, FrameEntrySpillSize()); - __ DecreaseFrameSize(ofs); + __ DecreaseFrameSize(GetFrameSize()); } - __ Jr(RA); - __ Nop(); + __ Jic(RA, 0); __ cfi().RestoreState(); __ cfi().DefCFAOffset(GetFrameSize()); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 4db4796985..80776e8b78 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -723,7 +723,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 2ffc398287..49f099f6a9 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -744,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -3660,7 +3660,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); Primitive::Type type = instruction->GetResultType(); - DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); bool is_div = instruction->IsDiv(); LocationSummary* locations = instruction->GetLocations(); diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index cd954043f5..31cd204c9f 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -74,7 +74,6 @@ class CodegenTargetConfig { } private: - CodegenTargetConfig() {} InstructionSet isa_; CreateCodegenFn create_codegen_; }; diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index e184745520..01304ac35b 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -66,6 +66,11 @@ inline vixl::aarch32::SRegister LowSRegisterFrom(Location location) { return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl::aarch32::SRegister>()); } +inline vixl::aarch32::SRegister HighSRegisterFrom(Location location) { + DCHECK(location.IsFpuRegisterPair()) << location; + return vixl::aarch32::SRegister(location.AsFpuRegisterPairHigh<vixl::aarch32::SRegister>()); +} + inline vixl::aarch32::Register RegisterFrom(Location location) { DCHECK(location.IsRegister()) << location; return vixl::aarch32::Register(location.reg()); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 0dfae11465..cc3c143b15 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -505,6 +505,10 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } + void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE { + StartAttributeStream("kind") << deoptimize->GetKind(); + } + #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE { StartAttributeStream("kind") << instruction->GetOpKind(); diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index 82ee93d5c2..9516ccb385 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -29,7 +29,21 @@ namespace art { */ class InductionVarAnalysisTest : public CommonCompilerTest { public: - InductionVarAnalysisTest() : pool_(), allocator_(&pool_) { + InductionVarAnalysisTest() + : pool_(), + allocator_(&pool_), + iva_(nullptr), + entry_(nullptr), + return_(nullptr), + exit_(nullptr), + parameter_(nullptr), + constant0_(nullptr), + constant1_(nullptr), + constant2_(nullptr), + constant7_(nullptr), + constant100_(nullptr), + constantm1_(nullptr), + float_constant0_(nullptr) { graph_ = CreateGraph(&allocator_); } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 62f5114e59..19f668dc1d 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -46,32 +46,100 @@ namespace art { -static constexpr size_t kMaximumNumberOfHInstructions = 32; +// Instruction limit to control memory. +static constexpr size_t kMaximumNumberOfTotalInstructions = 1024; + +// Maximum number of instructions for considering a method small, +// which we will always try to inline if the other non-instruction limits +// are not reached. +static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3; // Limit the number of dex registers that we accumulate while inlining // to avoid creating large amount of nested environments. static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64; -// Avoid inlining within a huge method due to memory pressure. -static constexpr size_t kMaximumCodeUnitSize = 4096; +// Limit recursive call inlining, which do not benefit from too +// much inlining compared to code locality. +static constexpr size_t kMaximumNumberOfRecursiveCalls = 4; // Controls the use of inline caches in AOT mode. static constexpr bool kUseAOTInlineCaches = false; -void HInliner::Run() { - const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions(); - if ((compiler_options.GetInlineDepthLimit() == 0) - || (compiler_options.GetInlineMaxCodeUnits() == 0)) { - return; +// We check for line numbers to make sure the DepthString implementation +// aligns the output nicely. +#define LOG_INTERNAL(msg) \ + static_assert(__LINE__ > 10, "Unhandled line number"); \ + static_assert(__LINE__ < 10000, "Unhandled line number"); \ + VLOG(compiler) << DepthString(__LINE__) << msg + +#define LOG_TRY() LOG_INTERNAL("Try inlinining call: ") +#define LOG_NOTE() LOG_INTERNAL("Note: ") +#define LOG_SUCCESS() LOG_INTERNAL("Success: ") +#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ") +#define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ") + +std::string HInliner::DepthString(int line) const { + std::string value; + // Indent according to the inlining depth. + size_t count = depth_; + // Line numbers get printed in the log, so add a space if the log's line is less + // than 1000, and two if less than 100. 10 cannot be reached as it's the copyright. + if (!kIsTargetBuild) { + if (line < 100) { + value += " "; + } + if (line < 1000) { + value += " "; + } + // Safeguard if this file reaches more than 10000 lines. + DCHECK_LT(line, 10000); } - if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) { - return; + for (size_t i = 0; i < count; ++i) { + value += " "; + } + return value; +} + +static size_t CountNumberOfInstructions(HGraph* graph) { + size_t number_of_instructions = 0; + for (HBasicBlock* block : graph->GetReversePostOrderSkipEntryBlock()) { + for (HInstructionIterator instr_it(block->GetInstructions()); + !instr_it.Done(); + instr_it.Advance()) { + ++number_of_instructions; + } + } + return number_of_instructions; +} + +void HInliner::UpdateInliningBudget() { + if (total_number_of_instructions_ >= kMaximumNumberOfTotalInstructions) { + // Always try to inline small methods. + inlining_budget_ = kMaximumNumberOfInstructionsForSmallMethod; + } else { + inlining_budget_ = std::max( + kMaximumNumberOfInstructionsForSmallMethod, + kMaximumNumberOfTotalInstructions - total_number_of_instructions_); } +} + +void HInliner::Run() { if (graph_->IsDebuggable()) { // For simplicity, we currently never inline when the graph is debuggable. This avoids // doing some logic in the runtime to discover if a method could have been inlined. return; } + + // Initialize the number of instructions for the method being compiled. Recursive calls + // to HInliner::Run have already updated the instruction count. + if (outermost_graph_ == graph_) { + total_number_of_instructions_ = CountNumberOfInstructions(graph_); + } + + UpdateInliningBudget(); + DCHECK_NE(total_number_of_instructions_, 0u); + DCHECK_NE(inlining_budget_, 0u); + // Keep a copy of all blocks when starting the visit. ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder(); DCHECK(!blocks.empty()); @@ -292,7 +360,18 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { return nullptr; } PointerSize pointer_size = caller_compilation_unit_.GetClassLinker()->GetImagePointerSize(); - return resolved_method->GetSingleImplementation(pointer_size); + ArtMethod* single_impl = resolved_method->GetSingleImplementation(pointer_size); + if (single_impl == nullptr) { + return nullptr; + } + if (single_impl->IsProxyMethod()) { + // Proxy method is a generic invoker that's not worth + // devirtualizing/inlining. It also causes issues when the proxy + // method is in another dex file if we try to rewrite invoke-interface to + // invoke-virtual because a proxy method doesn't have a real dex file. + return nullptr; + } + return single_impl; } bool HInliner::TryInline(HInvoke* invoke_instruction) { @@ -305,17 +384,18 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { ScopedObjectAccess soa(Thread::Current()); uint32_t method_index = invoke_instruction->GetDexMethodIndex(); const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); - VLOG(compiler) << "Try inlining " << caller_dex_file.PrettyMethod(method_index); + LOG_TRY() << caller_dex_file.PrettyMethod(method_index); - // We can query the dex cache directly. The verifier has populated it already. ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod(); - ArtMethod* actual_method = nullptr; if (resolved_method == nullptr) { DCHECK(invoke_instruction->IsInvokeStaticOrDirect()); DCHECK(invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()); - VLOG(compiler) << "Not inlining a String.<init> method"; + LOG_FAIL_NO_STAT() << "Not inlining a String.<init> method"; return false; - } else if (invoke_instruction->IsInvokeStaticOrDirect()) { + } + ArtMethod* actual_method = nullptr; + + if (invoke_instruction->IsInvokeStaticOrDirect()) { actual_method = resolved_method; } else { // Check if we can statically find the method. @@ -328,6 +408,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { if (method != nullptr) { cha_devirtualize = true; actual_method = method; + LOG_NOTE() << "Try CHA-based inlining of " << actual_method->PrettyMethod(); } } @@ -390,16 +471,23 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache); switch (inline_cache_type) { - case kInlineCacheNoData: - break; + case kInlineCacheNoData: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " could not be statically determined"; + return false; + } - case kInlineCacheUninitialized: - VLOG(compiler) << "Interface or virtual call to " - << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) - << " is not hit and not inlined"; + case kInlineCacheUninitialized: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is not hit and not inlined"; return false; + } - case kInlineCacheMonomorphic: + case kInlineCacheMonomorphic: { MaybeRecordStat(kMonomorphicCall); if (outermost_graph_->IsCompilingOsr()) { // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the @@ -408,23 +496,29 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, } else { return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache); } + } - case kInlineCachePolymorphic: + case kInlineCachePolymorphic: { MaybeRecordStat(kPolymorphicCall); return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache); + } - case kInlineCacheMegamorphic: - VLOG(compiler) << "Interface or virtual call to " - << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) - << " is megamorphic and not inlined"; + case kInlineCacheMegamorphic: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is megamorphic and not inlined"; MaybeRecordStat(kMegamorphicCall); return false; + } - case kInlineCacheMissingTypes: - VLOG(compiler) << "Interface or virtual call to " - << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) - << " is missing types and not inlined"; + case kInlineCacheMissingTypes: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is missing types and not inlined"; return false; + } } UNREACHABLE(); } @@ -587,9 +681,10 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, dex::TypeIndex class_index = FindClassIndexIn( GetMonomorphicType(classes), caller_compilation_unit_); if (!class_index.IsValid()) { - VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method) - << " from inline cache is not inlined because its class is not" - << " accessible to the caller"; + LOG_FAIL(kNotInlinedDexCache) + << "Call to " << ArtMethod::PrettyMethod(resolved_method) + << " from inline cache is not inlined because its class is not" + << " accessible to the caller"; return false; } @@ -603,6 +698,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForVirtual( resolved_method, pointer_size); } + LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod(); DCHECK(resolved_method != nullptr); HInstruction* receiver = invoke_instruction->InputAt(0); HInstruction* cursor = invoke_instruction->GetPrevious(); @@ -646,7 +742,8 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction, HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc); HInstruction* compare = new (graph_->GetArena()) HNotEqual( deopt_flag, graph_->GetIntConstant(0, dex_pc)); - HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc); + HInstruction* deopt = new (graph_->GetArena()) HDeoptimize( + graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc); if (cursor != nullptr) { bb_cursor->InsertInstructionAfter(deopt_flag, cursor); @@ -710,9 +807,16 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, bb_cursor->InsertInstructionAfter(compare, load_class); if (with_deoptimization) { HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( - compare, invoke_instruction->GetDexPc()); + graph_->GetArena(), + compare, + receiver, + HDeoptimize::Kind::kInline, + invoke_instruction->GetDexPc()); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + DCHECK_EQ(invoke_instruction->InputAt(0), receiver); + receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize); + deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo()); } return compare; } @@ -752,6 +856,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, dex::TypeIndex class_index = FindClassIndexIn(handle.Get(), caller_compilation_unit_); HInstruction* return_replacement = nullptr; + LOG_NOTE() << "Try inline polymorphic call to " << method->PrettyMethod(); if (!class_index.IsValid() || !TryBuildAndInline(invoke_instruction, method, @@ -761,8 +866,8 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, } else { one_target_inlined = true; - VLOG(compiler) << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method) - << " has inlined " << ArtMethod::PrettyMethod(method); + LOG_SUCCESS() << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method) + << " has inlined " << ArtMethod::PrettyMethod(method); // If we have inlined all targets before, and this receiver is the last seen, // we deoptimize instead of keeping the original invoke instruction. @@ -796,9 +901,10 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, } if (!one_target_inlined) { - VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method) - << " from inline cache is not inlined because none" - << " of its targets could be inlined"; + LOG_FAIL_NO_STAT() + << "Call to " << ArtMethod::PrettyMethod(resolved_method) + << " from inline cache is not inlined because none" + << " of its targets could be inlined"; return false; } @@ -932,9 +1038,6 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( actual_method = new_method; } else if (actual_method != new_method) { // Different methods, bailout. - VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method) - << " from inline cache is not inlined because it resolves" - << " to different methods"; return false; } } @@ -988,13 +1091,19 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction); } else { HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( - compare, invoke_instruction->GetDexPc()); + graph_->GetArena(), + compare, + receiver, + HDeoptimize::Kind::kInline, + invoke_instruction->GetDexPc()); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); if (return_replacement != nullptr) { invoke_instruction->ReplaceWith(return_replacement); } + receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize); invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); + deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo()); } // Run type propagation to get the guard typed. @@ -1007,6 +1116,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( MaybeRecordStat(kInlinedPolymorphicCall); + LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod(); return true; } @@ -1021,11 +1131,23 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) { if (invoke_instruction->IsInvokeInterface()) { + DCHECK(!method->IsProxyMethod()); // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always // better than an invoke-interface because: // 1) In the best case, the interface call has one more indirection (to fetch the IMT). // 2) We will not go to the conflict trampoline with an invoke-virtual. // TODO: Consider sharpening once it is not dependent on the compiler driver. + + if (method->IsDefault() && !method->IsCopied()) { + // Changing to invoke-virtual cannot be done on an original default method + // since it's not in any vtable. Devirtualization by exact type/inline-cache + // always uses a method in the iftable which is never an original default + // method. + // On the other hand, inlining an original default method by CHA is fine. + DCHECK(cha_devirtualize); + return false; + } + const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); uint32_t dex_method_index = FindMethodIndexIn( method, caller_dex_file, invoke_instruction->GetDexMethodIndex()); @@ -1076,13 +1198,34 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, return true; } +size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const { + const HInliner* current = this; + size_t count = 0; + do { + if (current->graph_->GetArtMethod() == method) { + ++count; + } + current = current->parent_; + } while (current != nullptr); + return count; +} + bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, ReferenceTypeInfo receiver_type, HInstruction** return_replacement) { if (method->IsProxyMethod()) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " is not inlined because of unimplemented inline support for proxy methods."; + LOG_FAIL(kNotInlinedProxy) + << "Method " << method->PrettyMethod() + << " is not inlined because of unimplemented inline support for proxy methods."; + return false; + } + + if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) { + LOG_FAIL(kNotInlinedRecursiveBudget) + << "Method " + << method->PrettyMethod() + << " is not inlined because it has reached its recursive call budget."; return false; } @@ -1091,15 +1234,16 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, if (!compiler_driver_->MayInline(method->GetDexFile(), outer_compilation_unit_.GetDexFile())) { if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) { - VLOG(compiler) << "Successfully replaced pattern of invoke " - << method->PrettyMethod(); + LOG_SUCCESS() << "Successfully replaced pattern of invoke " + << method->PrettyMethod(); MaybeRecordStat(kReplacedInvokeWithSimplePattern); return true; } - VLOG(compiler) << "Won't inline " << method->PrettyMethod() << " in " - << outer_compilation_unit_.GetDexFile()->GetLocation() << " (" - << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from " - << method->GetDexFile()->GetLocation(); + LOG_FAIL(kNotInlinedWont) + << "Won't inline " << method->PrettyMethod() << " in " + << outer_compilation_unit_.GetDexFile()->GetLocation() << " (" + << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from " + << method->GetDexFile()->GetLocation(); return false; } @@ -1108,30 +1252,32 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, const DexFile::CodeItem* code_item = method->GetCodeItem(); if (code_item == nullptr) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " is not inlined because it is native"; + LOG_FAIL_NO_STAT() + << "Method " << method->PrettyMethod() << " is not inlined because it is native"; return false; } size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (code_item->insns_size_in_code_units_ > inline_max_code_units) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " is too big to inline: " - << code_item->insns_size_in_code_units_ - << " > " - << inline_max_code_units; + LOG_FAIL(kNotInlinedCodeItem) + << "Method " << method->PrettyMethod() + << " is not inlined because its code item is too big: " + << code_item->insns_size_in_code_units_ + << " > " + << inline_max_code_units; return false; } if (code_item->tries_size_ != 0) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " is not inlined because of try block"; + LOG_FAIL(kNotInlinedTryCatch) + << "Method " << method->PrettyMethod() << " is not inlined because of try block"; return false; } if (!method->IsCompilable()) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " has soft failures un-handled by the compiler, so it cannot be inlined"; + LOG_FAIL(kNotInlinedNotVerified) + << "Method " << method->PrettyMethod() + << " has soft failures un-handled by the compiler, so it cannot be inlined"; } if (!method->GetDeclaringClass()->IsVerified()) { @@ -1139,8 +1285,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, if (Runtime::Current()->UseJitCompilation() || !compiler_driver_->IsMethodVerifiedWithoutFailures( method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " couldn't be verified, so it cannot be inlined"; + LOG_FAIL(kNotInlinedNotVerified) + << "Method " << method->PrettyMethod() + << " couldn't be verified, so it cannot be inlined"; return false; } } @@ -1149,9 +1296,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) { // Case of a static method that cannot be inlined because it implicitly // requires an initialization check of its declaring class. - VLOG(compiler) << "Method " << method->PrettyMethod() - << " is not inlined because it is static and requires a clinit" - << " check that cannot be emitted due to Dex cache limitations"; + LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod() + << " is not inlined because it is static and requires a clinit" + << " check that cannot be emitted due to Dex cache limitations"; return false; } @@ -1160,7 +1307,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - VLOG(compiler) << "Successfully inlined " << method->PrettyMethod(); + LOG_SUCCESS() << method->PrettyMethod(); MaybeRecordStat(kInlinedInvoke); return true; } @@ -1448,15 +1595,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, handles_); if (builder.BuildGraph() != kAnalysisSuccess) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be built, so cannot be inlined"; + LOG_FAIL(kNotInlinedCannotBuild) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be built, so cannot be inlined"; return false; } if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " cannot be inlined because of the register allocator"; + LOG_FAIL(kNotInlinedRegisterAllocator) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " cannot be inlined because of the register allocator"; return false; } @@ -1503,15 +1652,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, /* is_first_run */ false).Run(); } - size_t number_of_instructions_budget = kMaximumNumberOfHInstructions; - size_t number_of_inlined_instructions = - RunOptimizations(callee_graph, code_item, dex_compilation_unit); - number_of_instructions_budget += number_of_inlined_instructions; + RunOptimizations(callee_graph, code_item, dex_compilation_unit); HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it has an infinite loop"; + LOG_FAIL(kNotInlinedInfiniteLoop) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it has an infinite loop"; return false; } @@ -1520,15 +1667,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, if (predecessor->GetLastInstruction()->IsThrow()) { if (invoke_instruction->GetBlock()->IsTryBlock()) { // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto. - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because one branch always throws and" - << " caller is in a try/catch block"; + LOG_FAIL(kNotInlinedTryCatch) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because one branch always throws and" + << " caller is in a try/catch block"; return false; } else if (graph_->GetExitBlock() == nullptr) { // TODO(ngeoffray): Support adding HExit in the caller graph. - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because one branch always throws and" - << " caller does not have an exit block"; + LOG_FAIL(kNotInlinedInfiniteLoop) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because one branch always throws and" + << " caller does not have an exit block"; return false; } else if (graph_->HasIrreducibleLoops()) { // TODO(ngeoffray): Support re-computing loop information to graphs with @@ -1544,32 +1693,31 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } if (!has_one_return) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it always throws"; + LOG_FAIL(kNotInlinedAlwaysThrows) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it always throws"; return false; } size_t number_of_instructions = 0; - - bool can_inline_environment = - total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters; - // Skip the entry block, it does not contain instructions that prevent inlining. for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) { if (block->IsLoopHeader()) { if (block->GetLoopInformation()->IsIrreducible()) { // Don't inline methods with irreducible loops, they could prevent some // optimizations to run. - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it contains an irreducible loop"; + LOG_FAIL(kNotInlinedIrreducibleLoop) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it contains an irreducible loop"; return false; } if (!block->GetLoopInformation()->HasExitEdge()) { // Don't inline methods with loops without exit, since they cause the // loop information to be computed incorrectly when updating after // inlining. - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it contains a loop with no exit"; + LOG_FAIL(kNotInlinedLoopWithoutExit) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it contains a loop with no exit"; return false; } } @@ -1577,34 +1725,39 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, for (HInstructionIterator instr_it(block->GetInstructions()); !instr_it.Done(); instr_it.Advance()) { - if (number_of_instructions++ == number_of_instructions_budget) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " is not inlined because its caller has reached" - << " its instruction budget limit."; + if (++number_of_instructions >= inlining_budget_) { + LOG_FAIL(kNotInlinedInstructionBudget) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " is not inlined because the outer method has reached" + << " its instruction budget limit."; return false; } HInstruction* current = instr_it.Current(); - if (!can_inline_environment && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " is not inlined because its caller has reached" - << " its environment budget limit."; + if (current->NeedsEnvironment() && + (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) { + LOG_FAIL(kNotInlinedEnvironmentBudget) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " is not inlined because its caller has reached" + << " its environment budget limit."; return false; } if (current->NeedsEnvironment() && !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(), resolved_method)) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because " << current->DebugName() - << " needs an environment, is in a different dex file" - << ", and cannot be encoded in the stack maps."; + LOG_FAIL(kNotInlinedStackMaps) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because " << current->DebugName() + << " needs an environment, is in a different dex file" + << ", and cannot be encoded in the stack maps."; return false; } if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because " << current->DebugName() - << " it is in a different dex file and requires access to the dex cache"; + LOG_FAIL(kNotInlinedDexCache) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because " << current->DebugName() + << " it is in a different dex file and requires access to the dex cache"; return false; } @@ -1613,21 +1766,24 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, current->IsUnresolvedStaticFieldSet() || current->IsUnresolvedInstanceFieldSet()) { // Entrypoint for unresolved fields does not handle inlined frames. - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it is using an unresolved" - << " entrypoint"; + LOG_FAIL(kNotInlinedUnresolvedEntrypoint) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it is using an unresolved" + << " entrypoint"; return false; } } } - number_of_inlined_instructions_ += number_of_instructions; - DCHECK_EQ(caller_instruction_counter, graph_->GetCurrentInstructionId()) << "No instructions can be added to the outer graph while inner graph is being built"; + // Inline the callee graph inside the caller graph. const int32_t callee_instruction_counter = callee_graph->GetCurrentInstructionId(); graph_->SetCurrentInstructionId(callee_instruction_counter); *return_replacement = callee_graph->InlineInto(graph_, invoke_instruction); + // Update our budget for other inlining attempts in `caller_graph`. + total_number_of_instructions_ += number_of_instructions; + UpdateInliningBudget(); DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId()) << "No instructions can be added to the inner graph during inlining into the outer graph"; @@ -1640,9 +1796,9 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, return true; } -size_t HInliner::RunOptimizations(HGraph* callee_graph, - const DexFile::CodeItem* code_item, - const DexCompilationUnit& dex_compilation_unit) { +void HInliner::RunOptimizations(HGraph* callee_graph, + const DexFile::CodeItem* code_item, + const DexCompilationUnit& dex_compilation_unit) { // Note: if the outermost_graph_ is being compiled OSR, we should not run any // optimization that could lead to a HDeoptimize. The following optimizations do not. HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); @@ -1664,23 +1820,37 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph, optimization->Run(); } - size_t number_of_inlined_instructions = 0u; - if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) { - HInliner inliner(callee_graph, - outermost_graph_, - codegen_, - outer_compilation_unit_, - dex_compilation_unit, - compiler_driver_, - handles_, - inline_stats_, - total_number_of_dex_registers_ + code_item->registers_size_, - depth_ + 1); - inliner.Run(); - number_of_inlined_instructions += inliner.number_of_inlined_instructions_; + // Bail early for pathological cases on the environment (for example recursive calls, + // or too large environment). + if (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters) { + LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod() + << " will not be inlined because the outer method has reached" + << " its environment budget limit."; + return; + } + + // Bail early if we know we already are over the limit. + size_t number_of_instructions = CountNumberOfInstructions(callee_graph); + if (number_of_instructions > inlining_budget_) { + LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod() + << " will not be inlined because the outer method has reached" + << " its instruction budget limit. " << number_of_instructions; + return; } - return number_of_inlined_instructions; + HInliner inliner(callee_graph, + outermost_graph_, + codegen_, + outer_compilation_unit_, + dex_compilation_unit, + compiler_driver_, + handles_, + inline_stats_, + total_number_of_dex_registers_ + code_item->registers_size_, + total_number_of_instructions_ + number_of_instructions, + this, + depth_ + 1); + inliner.Run(); } static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti, diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index a032042c78..9e4685cbf4 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -42,7 +42,9 @@ class HInliner : public HOptimization { VariableSizedHandleScope* handles, OptimizingCompilerStats* stats, size_t total_number_of_dex_registers, - size_t depth) + size_t total_number_of_instructions, + HInliner* parent, + size_t depth = 0) : HOptimization(outer_graph, kInlinerPassName, stats), outermost_graph_(outermost_graph), outer_compilation_unit_(outer_compilation_unit), @@ -50,8 +52,10 @@ class HInliner : public HOptimization { codegen_(codegen), compiler_driver_(compiler_driver), total_number_of_dex_registers_(total_number_of_dex_registers), + total_number_of_instructions_(total_number_of_instructions), + parent_(parent), depth_(depth), - number_of_inlined_instructions_(0), + inlining_budget_(0), handles_(handles), inline_stats_(nullptr) {} @@ -95,10 +99,10 @@ class HInliner : public HOptimization { HInstruction** return_replacement); // Run simple optimizations on `callee_graph`. - // Returns the number of inlined instructions. - size_t RunOptimizations(HGraph* callee_graph, - const DexFile::CodeItem* code_item, - const DexCompilationUnit& dex_compilation_unit); + void RunOptimizations(HGraph* callee_graph, + const DexFile::CodeItem* code_item, + const DexCompilationUnit& dex_compilation_unit) + REQUIRES_SHARED(Locks::mutator_lock_); // Try to recognize known simple patterns and replace invoke call with appropriate instructions. bool TryPatternSubstitution(HInvoke* invoke_instruction, @@ -259,14 +263,30 @@ class HInliner : public HOptimization { HInstruction* return_replacement, HInstruction* invoke_instruction); + // Update the inlining budget based on `total_number_of_instructions_`. + void UpdateInliningBudget(); + + // Count the number of calls of `method` being inlined recursively. + size_t CountRecursiveCallsOf(ArtMethod* method) const; + + // Pretty-print for spaces during logging. + std::string DepthString(int line) const; + HGraph* const outermost_graph_; const DexCompilationUnit& outer_compilation_unit_; const DexCompilationUnit& caller_compilation_unit_; CodeGenerator* const codegen_; CompilerDriver* const compiler_driver_; const size_t total_number_of_dex_registers_; + size_t total_number_of_instructions_; + + // The 'parent' inliner, that means the inlinigng optimization that requested + // `graph_` to be inlined. + const HInliner* const parent_; const size_t depth_; - size_t number_of_inlined_instructions_; + + // The budget left for inlining, in number of instructions. + size_t inlining_budget_; VariableSizedHandleScope* const handles_; // Used to record stats about optimizations on the inlined graph. diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 17421fc364..60790e5b84 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -2132,6 +2132,9 @@ void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) { if (cond->IsConstant()) { if (cond->AsIntConstant()->IsFalse()) { // Never deopt: instruction can be removed. + if (deoptimize->GuardsAnInput()) { + deoptimize->ReplaceWith(deoptimize->GuardedInput()); + } deoptimize->GetBlock()->RemoveInstruction(deoptimize); } else { // Always deopt. diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index b25bad7170..0d933eaf82 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -39,6 +39,7 @@ using helpers::Int32ConstantFrom; using helpers::LocationFrom; using helpers::LowRegisterFrom; using helpers::LowSRegisterFrom; +using helpers::HighSRegisterFrom; using helpers::OutputDRegister; using helpers::OutputSRegister; using helpers::OutputRegister; @@ -794,6 +795,58 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) { __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); } +void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { + if (features_.HasARMv8AInstructions()) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + } +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); + + ArmVIXLAssembler* assembler = GetAssembler(); + vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0); + vixl32::Register out_reg = OutputRegister(invoke); + vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0)); + vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0)); + vixl32::Label done; + vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done); + + // Round to nearest integer, ties away from zero. + __ Vcvta(S32, F32, temp1, in_reg); + __ Vmov(out_reg, temp1); + + // For positive, zero or NaN inputs, rounding is done. + __ Cmp(out_reg, 0); + __ B(ge, final_label, /* far_target */ false); + + // Handle input < 0 cases. + // If input is negative but not a tie, previous result (round to nearest) is valid. + // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1. + __ Vrinta(F32, F32, temp1, in_reg); + __ Vmov(temp2, 0.5); + __ Vsub(F32, temp1, in_reg, temp1); + __ Vcmp(F32, temp1, temp2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + { + // Use ExactAsemblyScope here because we are using IT. + ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(eq); + __ add(eq, out_reg, out_reg, 1); + } + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { CreateIntToIntLocations(arena_, invoke); } @@ -3100,7 +3153,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { } UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit) diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index 5bcfa4c98b..8d15f78cce 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -28,7 +28,18 @@ namespace art { */ class LICMTest : public CommonCompilerTest { public: - LICMTest() : pool_(), allocator_(&pool_) { + LICMTest() + : pool_(), + allocator_(&pool_), + entry_(nullptr), + loop_preheader_(nullptr), + loop_header_(nullptr), + loop_body_(nullptr), + return_(nullptr), + exit_(nullptr), + parameter_(nullptr), + int_constant_(nullptr), + float_constant_(nullptr) { graph_ = CreateGraph(&allocator_); } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ec706e6694..caada8bccb 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1088,6 +1088,19 @@ void HInstruction::ReplaceWith(HInstruction* other) { DCHECK(env_uses_.empty()); } +void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) { + const HUseList<HInstruction*>& uses = GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (dominator->StrictlyDominates(user)) { + user->ReplaceInput(replacement, index); + } + } +} + void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) { HUserRecord<HInstruction*> input_use = InputRecordAt(index); if (input_use.GetInstruction() == replacement) { @@ -1323,6 +1336,18 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) { } } +std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) { + switch (rhs) { + case HDeoptimize::Kind::kBCE: + return os << "bce"; + case HDeoptimize::Kind::kInline: + return os << "inline"; + default: + LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs); + UNREACHABLE(); + } +} + bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const { return this == instruction->GetPreviousDisregardingMoves(); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 6881d8f6ae..5f5a28c520 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -341,6 +341,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_current_method_(nullptr), + art_method_(nullptr), inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()), osr_(osr), cha_single_implementation_list_(arena->Adapter(kArenaAllocCHA)) { @@ -2080,6 +2081,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void SetLocations(LocationSummary* locations) { locations_ = locations; } void ReplaceWith(HInstruction* instruction); + void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); void ReplaceInput(HInstruction* replacement, size_t index); // This is almost the same as doing `ReplaceWith()`. But in this helper, the @@ -2943,28 +2945,97 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { }; // Deoptimize to interpreter, upon checking a condition. -class HDeoptimize FINAL : public HTemplateInstruction<1> { +class HDeoptimize FINAL : public HVariableInputSizeInstruction { public: + enum class Kind { + kBCE, + kInline, + kLast = kInline + }; + + // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move + // across. + HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc) + : HVariableInputSizeInstruction( + SideEffects::All(), + dex_pc, + arena, + /* number_of_inputs */ 1, + kArenaAllocMisc) { + SetPackedFlag<kFieldCanBeMoved>(false); + SetPackedField<DeoptimizeKindField>(kind); + SetRawInputAt(0, cond); + } + + // Use this constructor when the `HDeoptimize` guards an instruction, and any user + // that relies on the deoptimization to pass should have its input be the `HDeoptimize` + // instead of `guard`. // We set CanTriggerGC to prevent any intermediate address to be live // at the point of the `HDeoptimize`. - HDeoptimize(HInstruction* cond, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { + HDeoptimize(ArenaAllocator* arena, + HInstruction* cond, + HInstruction* guard, + Kind kind, + uint32_t dex_pc) + : HVariableInputSizeInstruction( + SideEffects::CanTriggerGC(), + dex_pc, + arena, + /* number_of_inputs */ 2, + kArenaAllocMisc) { + SetPackedFlag<kFieldCanBeMoved>(true); + SetPackedField<DeoptimizeKindField>(kind); SetRawInputAt(0, cond); + SetRawInputAt(1, guard); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; + bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); } + + bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind()); } + bool NeedsEnvironment() const OVERRIDE { return true; } + bool CanThrow() const OVERRIDE { return true; } + Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); } + + Primitive::Type GetType() const OVERRIDE { + return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid; + } + + bool GuardsAnInput() const { + return InputCount() == 2; + } + + HInstruction* GuardedInput() const { + DCHECK(GuardsAnInput()); + return InputAt(1); + } + + void RemoveGuard() { + RemoveInputAt(1); + } + DECLARE_INSTRUCTION(Deoptimize); private: + static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits; + static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1; + static constexpr size_t kFieldDeoptimizeKindSize = + MinimumBitsToStore(static_cast<size_t>(Kind::kLast)); + static constexpr size_t kNumberOfDeoptimizePackedBits = + kFieldDeoptimizeKind + kFieldDeoptimizeKindSize; + static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits, + "Too many packed fields."); + using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>; + DISALLOW_COPY_AND_ASSIGN(HDeoptimize); }; +std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs); + // Represents a should_deoptimize flag. Currently used for CHA-based devirtualization. // The compiled code checks this flag value in a guard before devirtualized call and // if it's true, starts to do deoptimization. diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index d84fe6ccff..60af2b4201 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -174,53 +174,45 @@ static constexpr uint8_t expected_cfi_kMips[] = { // 0x00000034: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kMips64[] = { - 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, - 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, - 0xE8, 0xFF, 0xBD, 0x67, 0x18, 0x00, 0xBD, 0x67, - 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF, - 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67, - 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, + 0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF, + 0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7, + 0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF, + 0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67, + 0x00, 0x00, 0x1F, 0xD8, }; - static constexpr uint8_t expected_cfi_kMips64[] = { - 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, - 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x0A, 0x44, - 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF, - 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, + 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, + 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, + 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; -// 0x00000000: daddiu r29, r29, -40 -// 0x00000004: .cfi_def_cfa_offset: 40 -// 0x00000004: sd r31, +32(r29) +// 0x00000000: daddiu r29, r29, -64 +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: sd r31, +56(r29) // 0x00000008: .cfi_offset: r31 at cfa-8 -// 0x00000008: sd r17, +24(r29) +// 0x00000008: sd r17, +48(r29) // 0x0000000c: .cfi_offset: r17 at cfa-16 -// 0x0000000c: sd r16, +16(r29) +// 0x0000000c: sd r16, +40(r29) // 0x00000010: .cfi_offset: r16 at cfa-24 -// 0x00000010: sdc1 f25, +8(r29) +// 0x00000010: sdc1 f25, +32(r29) // 0x00000014: .cfi_offset: r57 at cfa-32 -// 0x00000014: sdc1 f24, +0(r29) +// 0x00000014: sdc1 f24, +24(r29) // 0x00000018: .cfi_offset: r56 at cfa-40 -// 0x00000018: daddiu r29, r29, -24 -// 0x0000001c: .cfi_def_cfa_offset: 64 -// 0x0000001c: .cfi_remember_state -// 0x0000001c: daddiu r29, r29, 24 -// 0x00000020: .cfi_def_cfa_offset: 40 -// 0x00000020: ldc1 f24, +0(r29) -// 0x00000024: .cfi_restore: r56 -// 0x00000024: ldc1 f25, +8(r29) +// 0x00000018: .cfi_remember_state +// 0x00000018: ld r31, +56(r29) +// 0x0000001c: .cfi_restore: r31 +// 0x0000001c: ld r17, +48(r29) +// 0x00000020: .cfi_restore: r17 +// 0x00000020: ld r16, +40(r29) +// 0x00000024: .cfi_restore: r16 +// 0x00000024: ldc1 f25, +32(r29) // 0x00000028: .cfi_restore: r57 -// 0x00000028: ld r16, +16(r29) -// 0x0000002c: .cfi_restore: r16 -// 0x0000002c: ld r17, +24(r29) -// 0x00000030: .cfi_restore: r17 -// 0x00000030: ld r31, +32(r29) -// 0x00000034: .cfi_restore: r31 -// 0x00000034: daddiu r29, r29, 40 -// 0x00000038: .cfi_def_cfa_offset: 0 -// 0x00000038: jr r31 -// 0x0000003c: nop -// 0x00000040: .cfi_restore_state -// 0x00000040: .cfi_def_cfa_offset: 64 +// 0x00000028: ldc1 f24, +24(r29) +// 0x0000002c: .cfi_restore: r56 +// 0x0000002c: daddiu r29, r29, 64 +// 0x00000030: .cfi_def_cfa_offset: 0 +// 0x00000030: jic r31, 0 +// 0x00000034: .cfi_restore_state +// 0x00000034: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kThumb2_adjust[] = { #ifdef ART_USE_OLD_ARM_BACKEND @@ -403,58 +395,52 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = { // 0x00020060: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kMips64_adjust_head[] = { - 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, - 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, - 0xE8, 0xFF, 0xBD, 0x67, 0x02, 0x00, 0xA6, 0x60, - 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8, + 0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF, + 0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7, + 0x02, 0x00, 0xA6, 0x60, 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8, }; static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = { - 0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, - 0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, - 0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF, + 0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67, + 0x00, 0x00, 0x1F, 0xD8, }; static constexpr uint8_t expected_cfi_kMips64_adjust[] = { - 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, - 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x10, 0x00, - 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, - 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, + 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, + 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x04, 0x10, 0x00, 0x02, 0x00, 0x0A, + 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, + 0x00, 0x44, 0x0B, 0x0E, 0x40, }; -// 0x00000000: daddiu r29, r29, -40 -// 0x00000004: .cfi_def_cfa_offset: 40 -// 0x00000004: sd r31, +32(r29) +// 0x00000000: daddiu r29, r29, -64 +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: sd r31, +56(r29) // 0x00000008: .cfi_offset: r31 at cfa-8 -// 0x00000008: sd r17, +24(r29) +// 0x00000008: sd r17, +48(r29) // 0x0000000c: .cfi_offset: r17 at cfa-16 -// 0x0000000c: sd r16, +16(r29) +// 0x0000000c: sd r16, +40(r29) // 0x00000010: .cfi_offset: r16 at cfa-24 -// 0x00000010: sdc1 f25, +8(r29) +// 0x00000010: sdc1 f25, +32(r29) // 0x00000014: .cfi_offset: r57 at cfa-32 -// 0x00000014: sdc1 f24, +0(r29) +// 0x00000014: sdc1 f24, +24(r29) // 0x00000018: .cfi_offset: r56 at cfa-40 -// 0x00000018: daddiu r29, r29, -24 -// 0x0000001c: .cfi_def_cfa_offset: 64 -// 0x0000001c: bnec r5, r6, 0x0000002c ; +12 -// 0x00000020: auipc r1, 2 -// 0x00000024: jic r1, 12 ; b 0x00020030 ; +131080 -// 0x00000028: nop +// 0x00000018: bnec r5, r6, 0x00000024 ; +12 +// 0x0000001c: auipc r1, 2 +// 0x00000020: jic r1, 12 ; bc 0x00020028 ; +131080 +// 0x00000024: nop // ... -// 0x00020028: nop -// 0x0002002c: .cfi_remember_state -// 0x0002002c: daddiu r29, r29, 24 -// 0x00020030: .cfi_def_cfa_offset: 40 -// 0x00020030: ldc1 f24, +0(r29) -// 0x00020034: .cfi_restore: r56 -// 0x00020034: ldc1 f25, +8(r29) +// 0x00020024: nop +// 0x00020028: .cfi_remember_state +// 0x00020028: ld r31, +56(r29) +// 0x0002002c: .cfi_restore: r31 +// 0x0002002c: ld r17, +48(r29) +// 0x00020030: .cfi_restore: r17 +// 0x00020030: ld r16, +40(r29) +// 0x00020034: .cfi_restore: r16 +// 0x00020034: ldc1 f25, +32(r29) // 0x00020038: .cfi_restore: r57 -// 0x00020038: ld r16, +16(r29) -// 0x0002003c: .cfi_restore: r16 -// 0x0002003c: ld r17, +24(r29) -// 0x00020040: .cfi_restore: r17 -// 0x00020040: ld r31, +32(r29) -// 0x00020044: .cfi_restore: r31 -// 0x00020044: daddiu r29, r29, 40 -// 0x00020047: .cfi_def_cfa_offset: 0 -// 0x00020048: jr r31 -// 0x0002004c: nop -// 0x00020050: .cfi_restore_state -// 0x00020050: .cfi_def_cfa_offset: 64 +// 0x00020038: ldc1 f24, +24(r29) +// 0x0002003c: .cfi_restore: r56 +// 0x0002003c: daddiu r29, r29, 64 +// 0x00020040: .cfi_def_cfa_offset: 0 +// 0x00020040: jic r31, 0 +// 0x00020044: .cfi_restore_state +// 0x00020044: .cfi_def_cfa_offset: 64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 23ccd9e953..3c6d2d64a9 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -499,7 +499,8 @@ static HOptimization* BuildOptimization( handles, stats, number_of_dex_registers, - /* depth */ 0); + /* total_number_of_instructions */ 0, + /* parent */ nullptr); } else if (opt_name == HSharpening::kSharpeningPassName) { return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles); } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) { @@ -607,8 +608,7 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph, VariableSizedHandleScope* handles) const { OptimizingCompilerStats* stats = compilation_stats_.get(); const CompilerOptions& compiler_options = driver->GetCompilerOptions(); - bool should_inline = (compiler_options.GetInlineDepthLimit() > 0) - && (compiler_options.GetInlineMaxCodeUnits() > 0); + bool should_inline = (compiler_options.GetInlineMaxCodeUnits() > 0); if (!should_inline) { return; } @@ -623,7 +623,8 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph, handles, stats, number_of_dex_registers, - /* depth */ 0); + /* total_number_of_instructions */ 0, + /* parent */ nullptr); HOptimization* optimizations[] = { inliner }; RunOptimizations(optimizations, arraysize(optimizations), pass_observer); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index ae9a8119a7..a211c5472a 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -69,6 +69,23 @@ enum MethodCompilationStat { kExplicitNullCheckGenerated, kSimplifyIf, kInstructionSunk, + kNotInlinedUnresolvedEntrypoint, + kNotInlinedDexCache, + kNotInlinedStackMaps, + kNotInlinedEnvironmentBudget, + kNotInlinedInstructionBudget, + kNotInlinedLoopWithoutExit, + kNotInlinedIrreducibleLoop, + kNotInlinedAlwaysThrows, + kNotInlinedInfiniteLoop, + kNotInlinedTryCatch, + kNotInlinedRegisterAllocator, + kNotInlinedCannotBuild, + kNotInlinedNotVerified, + kNotInlinedCodeItem, + kNotInlinedWont, + kNotInlinedRecursiveBudget, + kNotInlinedProxy, kLastStat }; @@ -168,6 +185,23 @@ class OptimizingCompilerStats { case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break; case kSimplifyIf: name = "SimplifyIf"; break; case kInstructionSunk: name = "InstructionSunk"; break; + case kNotInlinedUnresolvedEntrypoint: name = "NotInlinedUnresolvedEntrypoint"; break; + case kNotInlinedDexCache: name = "NotInlinedDexCache"; break; + case kNotInlinedStackMaps: name = "NotInlinedStackMaps"; break; + case kNotInlinedEnvironmentBudget: name = "NotInlinedEnvironmentBudget"; break; + case kNotInlinedInstructionBudget: name = "NotInlinedInstructionBudget"; break; + case kNotInlinedLoopWithoutExit: name = "NotInlinedLoopWithoutExit"; break; + case kNotInlinedIrreducibleLoop: name = "NotInlinedIrreducibleLoop"; break; + case kNotInlinedAlwaysThrows: name = "NotInlinedAlwaysThrows"; break; + case kNotInlinedInfiniteLoop: name = "NotInlinedInfiniteLoop"; break; + case kNotInlinedTryCatch: name = "NotInlinedTryCatch"; break; + case kNotInlinedRegisterAllocator: name = "NotInlinedRegisterAllocator"; break; + case kNotInlinedCannotBuild: name = "NotInlinedCannotBuild"; break; + case kNotInlinedNotVerified: name = "NotInlinedNotVerified"; break; + case kNotInlinedCodeItem: name = "NotInlinedCodeItem"; break; + case kNotInlinedWont: name = "NotInlinedWont"; break; + case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break; + case kNotInlinedProxy: name = "NotInlinedProxy"; break; case kLastStat: LOG(FATAL) << "invalid stat " diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index efbaf6c221..66bfea9860 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -40,6 +40,14 @@ void PrepareForRegisterAllocation::VisitDivZeroCheck(HDivZeroCheck* check) { check->ReplaceWith(check->InputAt(0)); } +void PrepareForRegisterAllocation::VisitDeoptimize(HDeoptimize* deoptimize) { + if (deoptimize->GuardsAnInput()) { + // Replace the uses with the actual guarded instruction. + deoptimize->ReplaceWith(deoptimize->GuardedInput()); + deoptimize->RemoveGuard(); + } +} + void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) { check->ReplaceWith(check->InputAt(0)); if (check->IsStringCharAt()) { diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index c128227654..7ffbe44ef6 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -44,6 +44,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; + void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const; bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 6e332ca59b..d5637b9b75 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -310,8 +310,8 @@ static void BoundTypeForClassCheck(HInstruction* check) { BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti); } else { DCHECK(check->IsDeoptimize()); - if (compare->IsEqual()) { - BoundTypeIn(receiver, check->GetBlock(), check, class_rti); + if (compare->IsEqual() && check->AsDeoptimize()->GuardsAnInput()) { + check->SetReferenceTypeInfo(class_rti); } } } diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc index 84a4bab1a9..0b49ce1a4c 100644 --- a/compiler/optimizing/reference_type_propagation_test.cc +++ b/compiler/optimizing/reference_type_propagation_test.cc @@ -29,7 +29,7 @@ namespace art { */ class ReferenceTypePropagationTest : public CommonCompilerTest { public: - ReferenceTypePropagationTest() : pool_(), allocator_(&pool_) { + ReferenceTypePropagationTest() : pool_(), allocator_(&pool_), propagation_(nullptr) { graph_ = CreateGraph(&allocator_); } diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index ab0dad4300..9236a0e4fa 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -315,7 +315,10 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor { // This class and its sub-classes will never be used to drive a visit of an // `HGraph` but only to visit `HInstructions` one at a time, so we do not need // to pass a valid graph to `HGraphDelegateVisitor()`. - SchedulingLatencyVisitor() : HGraphDelegateVisitor(nullptr) {} + SchedulingLatencyVisitor() + : HGraphDelegateVisitor(nullptr), + last_visited_latency_(0), + last_visited_internal_latency_(0) {} void VisitInstruction(HInstruction* instruction) OVERRIDE { LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". " @@ -413,6 +416,7 @@ class HScheduler { selector_(selector), only_optimize_loop_blocks_(true), scheduling_graph_(this, arena), + cursor_(nullptr), candidates_(arena_->Adapter(kArenaAllocScheduler)) {} virtual ~HScheduler() {} diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index 1916c73ca4..a1016d1d47 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -189,13 +189,14 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { // Use HAboveOrEqual+HDeoptimize as the bounds check. HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length); block->AddInstruction(ae); - HInstruction* deoptimize = new(&allocator_) HDeoptimize(ae, /* dex_pc */ 0u); + HInstruction* deoptimize = + new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u); block->AddInstruction(deoptimize); HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_, - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, - deoptimize); + /* number_of_vregs */ 5, + /* method */ nullptr, + /* dex_pc */ 0u, + deoptimize); deoptimize_env->CopyFrom(args); deoptimize->SetRawEnvironment(deoptimize_env); HInstruction* array_set = diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h index d71c2fe997..ad3a099eb6 100644 --- a/compiler/utils/atomic_method_ref_map-inl.h +++ b/compiler/utils/atomic_method_ref_map-inl.h @@ -42,7 +42,7 @@ template <typename T> inline bool AtomicMethodRefMap<T>::Get(MethodReference ref, T* out) const { const ElementArray* const array = GetArray(ref.dex_file); if (array == nullptr) { - return kInsertResultInvalidDexFile; + return false; } *out = (*array)[ref.dex_method_index].LoadRelaxed(); return true; diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc index dea396e4a7..42d061ec15 100644 --- a/compiler/utils/mips64/managed_register_mips64.cc +++ b/compiler/utils/mips64/managed_register_mips64.cc @@ -26,6 +26,11 @@ bool Mips64ManagedRegister::Overlaps(const Mips64ManagedRegister& other) const { CHECK(IsValidManagedRegister()); CHECK(other.IsValidManagedRegister()); if (Equals(other)) return true; + if (IsFpuRegister() && other.IsVectorRegister()) { + return (AsFpuRegister() == other.AsOverlappingFpuRegister()); + } else if (IsVectorRegister() && other.IsFpuRegister()) { + return (AsVectorRegister() == other.AsOverlappingVectorRegister()); + } return false; } @@ -36,6 +41,8 @@ void Mips64ManagedRegister::Print(std::ostream& os) const { os << "GPU: " << static_cast<int>(AsGpuRegister()); } else if (IsFpuRegister()) { os << "FpuRegister: " << static_cast<int>(AsFpuRegister()); + } else if (IsVectorRegister()) { + os << "VectorRegister: " << static_cast<int>(AsVectorRegister()); } else { os << "??: " << RegId(); } diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h index c9f95569cf..3980199b1e 100644 --- a/compiler/utils/mips64/managed_register_mips64.h +++ b/compiler/utils/mips64/managed_register_mips64.h @@ -30,11 +30,27 @@ const int kNumberOfGpuAllocIds = kNumberOfGpuRegisters; const int kNumberOfFpuRegIds = kNumberOfFpuRegisters; const int kNumberOfFpuAllocIds = kNumberOfFpuRegisters; -const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds; -const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds; - -// An instance of class 'ManagedRegister' represents a single GPU register (enum -// Register) or a double precision FP register (enum FpuRegister) +const int kNumberOfVecRegIds = kNumberOfVectorRegisters; +const int kNumberOfVecAllocIds = kNumberOfVectorRegisters; + +const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds + kNumberOfVecRegIds; +const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds + kNumberOfVecAllocIds; + +// Register ids map: +// [0..R[ core registers (enum GpuRegister) +// [R..F[ floating-point registers (enum FpuRegister) +// [F..W[ MSA vector registers (enum VectorRegister) +// where +// R = kNumberOfGpuRegIds +// F = R + kNumberOfFpuRegIds +// W = F + kNumberOfVecRegIds + +// An instance of class 'ManagedRegister' represents a single Mips64 register. +// A register can be one of the following: +// * core register (enum GpuRegister) +// * floating-point register (enum FpuRegister) +// * MSA vector register (enum VectorRegister) +// // 'ManagedRegister::NoRegister()' provides an invalid register. // There is a one-to-one mapping between ManagedRegister and register id. class Mips64ManagedRegister : public ManagedRegister { @@ -49,6 +65,21 @@ class Mips64ManagedRegister : public ManagedRegister { return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds); } + constexpr VectorRegister AsVectorRegister() const { + CHECK(IsVectorRegister()); + return static_cast<VectorRegister>(id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegisters)); + } + + constexpr FpuRegister AsOverlappingFpuRegister() const { + CHECK(IsValidManagedRegister()); + return static_cast<FpuRegister>(AsVectorRegister()); + } + + constexpr VectorRegister AsOverlappingVectorRegister() const { + CHECK(IsValidManagedRegister()); + return static_cast<VectorRegister>(AsFpuRegister()); + } + constexpr bool IsGpuRegister() const { CHECK(IsValidManagedRegister()); return (0 <= id_) && (id_ < kNumberOfGpuRegIds); @@ -60,6 +91,12 @@ class Mips64ManagedRegister : public ManagedRegister { return (0 <= test) && (test < kNumberOfFpuRegIds); } + constexpr bool IsVectorRegister() const { + CHECK(IsValidManagedRegister()); + const int test = id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegIds); + return (0 <= test) && (test < kNumberOfVecRegIds); + } + void Print(std::ostream& os) const; // Returns true if the two managed-registers ('this' and 'other') overlap. @@ -77,6 +114,11 @@ class Mips64ManagedRegister : public ManagedRegister { return FromRegId(r + kNumberOfGpuRegIds); } + static constexpr Mips64ManagedRegister FromVectorRegister(VectorRegister r) { + CHECK_NE(r, kNoVectorRegister); + return FromRegId(r + kNumberOfGpuRegIds + kNumberOfFpuRegIds); + } + private: constexpr bool IsValidManagedRegister() const { return (0 <= id_) && (id_ < kNumberOfRegIds); diff --git a/compiler/utils/mips64/managed_register_mips64_test.cc b/compiler/utils/mips64/managed_register_mips64_test.cc new file mode 100644 index 0000000000..8b72d7e61d --- /dev/null +++ b/compiler/utils/mips64/managed_register_mips64_test.cc @@ -0,0 +1,480 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "managed_register_mips64.h" +#include "globals.h" +#include "gtest/gtest.h" + +namespace art { +namespace mips64 { + +TEST(Mips64ManagedRegister, NoRegister) { + Mips64ManagedRegister reg = ManagedRegister::NoRegister().AsMips64(); + EXPECT_TRUE(reg.IsNoRegister()); + EXPECT_FALSE(reg.Overlaps(reg)); +} + +TEST(Mips64ManagedRegister, GpuRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(ZERO, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(AT); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(AT, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(V0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(V0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(A0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(A0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(A7); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(A7, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(T0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(T0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(T3); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(T3, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(S0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(S0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(GP); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(GP, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(SP); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(SP, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(RA); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(RA, reg.AsGpuRegister()); +} + +TEST(Mips64ManagedRegister, FpuRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0); + Mips64ManagedRegister vreg = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F0, reg.AsFpuRegister()); + EXPECT_EQ(W0, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + + reg = Mips64ManagedRegister::FromFpuRegister(F1); + vreg = Mips64ManagedRegister::FromVectorRegister(W1); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F1, reg.AsFpuRegister()); + EXPECT_EQ(W1, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + + reg = Mips64ManagedRegister::FromFpuRegister(F20); + vreg = Mips64ManagedRegister::FromVectorRegister(W20); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F20, reg.AsFpuRegister()); + EXPECT_EQ(W20, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F20))); + + reg = Mips64ManagedRegister::FromFpuRegister(F31); + vreg = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F31, reg.AsFpuRegister()); + EXPECT_EQ(W31, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); +} + +TEST(Mips64ManagedRegister, VectorRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromVectorRegister(W0); + Mips64ManagedRegister freg = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W0, reg.AsVectorRegister()); + EXPECT_EQ(F0, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + reg = Mips64ManagedRegister::FromVectorRegister(W2); + freg = Mips64ManagedRegister::FromFpuRegister(F2); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W2, reg.AsVectorRegister()); + EXPECT_EQ(F2, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W2))); + + reg = Mips64ManagedRegister::FromVectorRegister(W13); + freg = Mips64ManagedRegister::FromFpuRegister(F13); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W13, reg.AsVectorRegister()); + EXPECT_EQ(F13, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W13))); + + reg = Mips64ManagedRegister::FromVectorRegister(W29); + freg = Mips64ManagedRegister::FromFpuRegister(F29); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W29, reg.AsVectorRegister()); + EXPECT_EQ(F29, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W29))); +} + +TEST(Mips64ManagedRegister, Equals) { + ManagedRegister no_reg = ManagedRegister::NoRegister(); + EXPECT_TRUE(no_reg.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_ZERO = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_TRUE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_A1 = Mips64ManagedRegister::FromGpuRegister(A1); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_TRUE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_S2 = Mips64ManagedRegister::FromGpuRegister(S2); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_TRUE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_F0 = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_TRUE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_F31 = Mips64ManagedRegister::FromFpuRegister(F31); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + EXPECT_TRUE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_W0 = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W31))); + + Mips64ManagedRegister reg_W31 = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W1))); + EXPECT_TRUE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W31))); +} + +TEST(Mips64ManagedRegister, Overlaps) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0); + Mips64ManagedRegister reg_o = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F0, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W0, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F4); + reg_o = Mips64ManagedRegister::FromVectorRegister(W4); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F4, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W4, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F16); + reg_o = Mips64ManagedRegister::FromVectorRegister(W16); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F16, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W16, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F31); + reg_o = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F31, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W31, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W0); + reg_o = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W0, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F0, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W4); + reg_o = Mips64ManagedRegister::FromFpuRegister(F4); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W4, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F4, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W16); + reg_o = Mips64ManagedRegister::FromFpuRegister(F16); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W16, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F16, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W31); + reg_o = Mips64ManagedRegister::FromFpuRegister(F31); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W31, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F31, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(A0); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(S0); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(RA); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); +} + +} // namespace mips64 +} // namespace art diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc index 4bfc84990d..fa7e98586c 100644 --- a/compiler/verifier_deps_test.cc +++ b/compiler/verifier_deps_test.cc @@ -18,21 +18,21 @@ #include "verifier/verifier_deps.h" #include "class_linker.h" -#include "compiler/common_compiler_test.h" -#include "compiler/dex/verification_results.h" -#include "compiler/dex/verified_method.h" -#include "compiler/driver/compiler_options.h" -#include "compiler/driver/compiler_driver.h" -#include "compiler/utils/atomic_method_ref_map-inl.h" +#include "common_compiler_test.h" #include "compiler_callbacks.h" +#include "dex/verification_results.h" +#include "dex/verified_method.h" #include "dex_file.h" #include "dex_file_types.h" +#include "driver/compiler_options.h" +#include "driver/compiler_driver.h" #include "handle_scope-inl.h" #include "verifier/method_verifier-inl.h" #include "mirror/class_loader.h" #include "runtime.h" #include "thread.h" #include "scoped_thread_state_change-inl.h" +#include "utils/atomic_method_ref_map-inl.h" namespace art { namespace verifier { |