summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp1
-rw-r--r--compiler/dex/dex_to_dex_compiler.cc12
-rw-r--r--compiler/dex/dex_to_dex_compiler.h3
-rw-r--r--compiler/driver/compiler_driver.cc9
-rw-r--r--compiler/driver/compiler_options.cc9
-rw-r--r--compiler/driver/compiler_options.h16
-rw-r--r--compiler/image_test.cc1
-rw-r--r--compiler/jit/jit_compiler.cc5
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc11
-rw-r--r--compiler/optimizing/cha_guard_optimization.cc7
-rw-r--r--compiler/optimizing/code_generator_arm.cc427
-rw-r--r--compiler/optimizing/code_generator_arm.h1
-rw-r--r--compiler/optimizing/code_generator_arm64.cc4
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc416
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h3
-rw-r--r--compiler/optimizing/code_generator_mips64.cc66
-rw-r--r--compiler/optimizing/code_generator_x86.cc2
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc4
-rw-r--r--compiler/optimizing/codegen_test_utils.h1
-rw-r--r--compiler/optimizing/common_arm.h5
-rw-r--r--compiler/optimizing/graph_visualizer.cc4
-rw-r--r--compiler/optimizing/induction_var_analysis_test.cc16
-rw-r--r--compiler/optimizing/inliner.cc440
-rw-r--r--compiler/optimizing/inliner.h34
-rw-r--r--compiler/optimizing/instruction_simplifier.cc3
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc54
-rw-r--r--compiler/optimizing/licm_test.cc13
-rw-r--r--compiler/optimizing/nodes.cc25
-rw-r--r--compiler/optimizing/nodes.h83
-rw-r--r--compiler/optimizing/optimizing_cfi_test_expected.inc148
-rw-r--r--compiler/optimizing/optimizing_compiler.cc9
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h34
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc8
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h1
-rw-r--r--compiler/optimizing/reference_type_propagation.cc4
-rw-r--r--compiler/optimizing/reference_type_propagation_test.cc2
-rw-r--r--compiler/optimizing/scheduler.h6
-rw-r--r--compiler/optimizing/ssa_liveness_analysis_test.cc11
-rw-r--r--compiler/utils/atomic_method_ref_map-inl.h2
-rw-r--r--compiler/utils/mips64/managed_register_mips64.cc7
-rw-r--r--compiler/utils/mips64/managed_register_mips64.h52
-rw-r--r--compiler/utils/mips64/managed_register_mips64_test.cc480
-rw-r--r--compiler/verifier_deps_test.cc12
43 files changed, 1728 insertions, 723 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index d57f301ff9..b444fffd56 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -391,6 +391,7 @@ art_cc_test {
mips64: {
srcs: [
"linker/mips64/relative_patcher_mips64_test.cc",
+ "utils/mips64/managed_register_mips64_test.cc",
],
},
x86: {
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 808e28c9ea..538fe93793 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -70,10 +70,6 @@ class DexCompiler {
return *unit_.GetDexFile();
}
- bool PerformOptimizations() const {
- return dex_to_dex_compilation_level_ >= DexToDexCompilationLevel::kOptimize;
- }
-
// Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
// a barrier is required.
void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
@@ -114,7 +110,7 @@ class DexCompiler {
};
void DexCompiler::Compile() {
- DCHECK_GE(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kRequired);
+ DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize);
const DexFile::CodeItem* code_item = unit_.GetCodeItem();
const uint16_t* insns = code_item->insns_;
const uint32_t insns_size = code_item->insns_size_in_code_units_;
@@ -221,7 +217,7 @@ void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) {
}
Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) {
- if (!kEnableCheckCastEllision || !PerformOptimizations()) {
+ if (!kEnableCheckCastEllision) {
return inst;
}
if (!driver_.IsSafeCast(&unit_, dex_pc)) {
@@ -254,7 +250,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
uint32_t dex_pc,
Instruction::Code new_opcode,
bool is_put) {
- if (!kEnableQuickening || !PerformOptimizations()) {
+ if (!kEnableQuickening) {
return;
}
uint32_t field_idx = inst->VRegC_22c();
@@ -279,7 +275,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
Instruction::Code new_opcode, bool is_range) {
- if (!kEnableQuickening || !PerformOptimizations()) {
+ if (!kEnableQuickening) {
return;
}
uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h
index 00c596d60e..87ddb395ad 100644
--- a/compiler/dex/dex_to_dex_compiler.h
+++ b/compiler/dex/dex_to_dex_compiler.h
@@ -34,8 +34,7 @@ namespace optimizer {
enum class DexToDexCompilationLevel {
kDontDexToDexCompile, // Only meaning wrt image time interpretation.
- kRequired, // Dex-to-dex compilation required for correctness.
- kOptimize // Perform required transformation and peep-hole optimizations.
+ kOptimize // Perform peep-hole optimizations.
};
std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 995098799c..e823f67d3c 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -532,16 +532,13 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
if (driver.GetCompilerOptions().GetDebuggable()) {
// We are debuggable so definitions of classes might be changed. We don't want to do any
// optimizations that could break that.
- max_level = optimizer::DexToDexCompilationLevel::kRequired;
+ max_level = optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
}
if (klass->IsVerified()) {
// Class is verified so we can enable DEX-to-DEX compilation for performance.
return max_level;
- } else if (klass->ShouldVerifyAtRuntime()) {
- // Class verification has soft-failed. Anyway, ensure at least correctness.
- return optimizer::DexToDexCompilationLevel::kRequired;
} else {
- // Class verification has failed: do not run DEX-to-DEX compilation.
+ // Class verification has failed: do not run DEX-to-DEX optimizations.
return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
}
}
@@ -611,7 +608,7 @@ static void CompileMethod(Thread* self,
dex_file,
(verified_method != nullptr)
? dex_to_dex_compilation_level
- : optimizer::DexToDexCompilationLevel::kRequired);
+ : optimizer::DexToDexCompilationLevel::kDontDexToDexCompile);
}
} else if ((access_flags & kAccNative) != 0) {
// Are we extracting only and have support for generic JNI down calls?
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 34ad1c5c08..a0c0a2acf6 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -27,7 +27,6 @@ CompilerOptions::CompilerOptions()
small_method_threshold_(kDefaultSmallMethodThreshold),
tiny_method_threshold_(kDefaultTinyMethodThreshold),
num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
- inline_depth_limit_(kUnsetInlineDepthLimit),
inline_max_code_units_(kUnsetInlineMaxCodeUnits),
no_inline_from_(nullptr),
boot_image_(false),
@@ -62,7 +61,6 @@ CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter,
size_t small_method_threshold,
size_t tiny_method_threshold,
size_t num_dex_methods_threshold,
- size_t inline_depth_limit,
size_t inline_max_code_units,
const std::vector<const DexFile*>* no_inline_from,
double top_k_profile_threshold,
@@ -86,7 +84,6 @@ CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter,
small_method_threshold_(small_method_threshold),
tiny_method_threshold_(tiny_method_threshold),
num_dex_methods_threshold_(num_dex_methods_threshold),
- inline_depth_limit_(inline_depth_limit),
inline_max_code_units_(inline_max_code_units),
no_inline_from_(no_inline_from),
boot_image_(false),
@@ -130,10 +127,6 @@ void CompilerOptions::ParseNumDexMethods(const StringPiece& option, UsageFn Usag
ParseUintOption(option, "--num-dex-methods", &num_dex_methods_threshold_, Usage);
}
-void CompilerOptions::ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage) {
- ParseUintOption(option, "--inline-depth-limit", &inline_depth_limit_, Usage);
-}
-
void CompilerOptions::ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage) {
ParseUintOption(option, "--inline-max-code-units", &inline_max_code_units_, Usage);
}
@@ -183,8 +176,6 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa
ParseTinyMethodMax(option, Usage);
} else if (option.starts_with("--num-dex-methods=")) {
ParseNumDexMethods(option, Usage);
- } else if (option.starts_with("--inline-depth-limit=")) {
- ParseInlineDepthLimit(option, Usage);
} else if (option.starts_with("--inline-max-code-units=")) {
ParseInlineMaxCodeUnits(option, Usage);
} else if (option == "--generate-debug-info" || option == "-g") {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 2e3e55f6c6..2376fbf5f5 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -46,15 +46,9 @@ class CompilerOptions FINAL {
static constexpr double kDefaultTopKProfileThreshold = 90.0;
static const bool kDefaultGenerateDebugInfo = false;
static const bool kDefaultGenerateMiniDebugInfo = false;
- static const size_t kDefaultInlineDepthLimit = 3;
static const size_t kDefaultInlineMaxCodeUnits = 32;
- static constexpr size_t kUnsetInlineDepthLimit = -1;
static constexpr size_t kUnsetInlineMaxCodeUnits = -1;
- // Default inlining settings when the space filter is used.
- static constexpr size_t kSpaceFilterInlineDepthLimit = 3;
- static constexpr size_t kSpaceFilterInlineMaxCodeUnits = 10;
-
CompilerOptions();
~CompilerOptions();
@@ -64,7 +58,6 @@ class CompilerOptions FINAL {
size_t small_method_threshold,
size_t tiny_method_threshold,
size_t num_dex_methods_threshold,
- size_t inline_depth_limit,
size_t inline_max_code_units,
const std::vector<const DexFile*>* no_inline_from,
double top_k_profile_threshold,
@@ -155,13 +148,6 @@ class CompilerOptions FINAL {
return num_dex_methods_threshold_;
}
- size_t GetInlineDepthLimit() const {
- return inline_depth_limit_;
- }
- void SetInlineDepthLimit(size_t limit) {
- inline_depth_limit_ = limit;
- }
-
size_t GetInlineMaxCodeUnits() const {
return inline_max_code_units_;
}
@@ -275,7 +261,6 @@ class CompilerOptions FINAL {
void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
void ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage);
- void ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage);
void ParseNumDexMethods(const StringPiece& option, UsageFn Usage);
void ParseTinyMethodMax(const StringPiece& option, UsageFn Usage);
void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage);
@@ -289,7 +274,6 @@ class CompilerOptions FINAL {
size_t small_method_threshold_;
size_t tiny_method_threshold_;
size_t num_dex_methods_threshold_;
- size_t inline_depth_limit_;
size_t inline_max_code_units_;
// Dex files from which we should not inline code.
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7ee494a131..897d81993d 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -363,7 +363,6 @@ void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
}
CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
// Set inline filter values.
- compiler_options_->SetInlineDepthLimit(CompilerOptions::kDefaultInlineDepthLimit);
compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
image_classes_.clear();
if (!extra_dex.empty()) {
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 3ae7974038..ad951bcc3f 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -97,7 +97,6 @@ JitCompiler::JitCompiler() {
CompilerOptions::kDefaultSmallMethodThreshold,
CompilerOptions::kDefaultTinyMethodThreshold,
CompilerOptions::kDefaultNumDexMethodsThreshold,
- CompilerOptions::kDefaultInlineDepthLimit,
CompilerOptions::kDefaultInlineMaxCodeUnits,
/* no_inline_from */ nullptr,
CompilerOptions::kDefaultTopKProfileThreshold,
@@ -177,10 +176,6 @@ JitCompiler::JitCompiler() {
jit_logger_.reset(new JitLogger());
jit_logger_->OpenLog();
}
-
- size_t inline_depth_limit = compiler_driver_->GetCompilerOptions().GetInlineDepthLimit();
- DCHECK_LT(thread_count * inline_depth_limit, std::numeric_limits<uint16_t>::max())
- << "ProfilingInfo's inline counter can potentially overflow";
}
JitCompiler::~JitCompiler() {
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 2ee4db923a..476906a768 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -528,7 +528,8 @@ class BCEVisitor : public HGraphVisitor {
has_dom_based_dynamic_bce_(false),
initial_block_size_(graph->GetBlocks().size()),
side_effects_(side_effects),
- induction_range_(induction_analysis) {}
+ induction_range_(induction_analysis),
+ next_(nullptr) {}
void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
DCHECK(!IsAddedBlock(block));
@@ -1618,8 +1619,8 @@ class BCEVisitor : public HGraphVisitor {
void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
HInstruction* suspend = loop->GetSuspendCheck();
block->InsertInstructionBefore(condition, block->GetLastInstruction());
- HDeoptimize* deoptimize =
- new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+ HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+ GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc());
block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
if (suspend->HasEnvironment()) {
deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
@@ -1631,8 +1632,8 @@ class BCEVisitor : public HGraphVisitor {
void InsertDeoptInBlock(HBoundsCheck* bounds_check, HInstruction* condition) {
HBasicBlock* block = bounds_check->GetBlock();
block->InsertInstructionBefore(condition, bounds_check);
- HDeoptimize* deoptimize =
- new (GetGraph()->GetArena()) HDeoptimize(condition, bounds_check->GetDexPc());
+ HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+ GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc());
block->InsertInstructionBefore(deoptimize, bounds_check);
deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
}
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index fe423012ca..048073e37a 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -36,7 +36,8 @@ class CHAGuardVisitor : HGraphVisitor {
: HGraphVisitor(graph),
block_has_cha_guard_(GetGraph()->GetBlocks().size(),
0,
- graph->GetArena()->Adapter(kArenaAllocCHA)) {
+ graph->GetArena()->Adapter(kArenaAllocCHA)),
+ instruction_iterator_(nullptr) {
number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards();
DCHECK_NE(number_of_guards_to_visit_, 0u);
// Will recount number of guards during guard optimization.
@@ -201,8 +202,8 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag,
HInstruction* suspend = loop_info->GetSuspendCheck();
// Need a new deoptimize instruction that copies the environment
// of the suspend instruction for the loop.
- HDeoptimize* deoptimize =
- new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc());
+ HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+ GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc());
pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
suspend->GetEnvironment(), loop_info->GetHeader());
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index caea250ab6..d7cc577580 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1134,7 +1134,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation of object ArrayGet
@@ -1602,14 +1602,20 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) {
}
}
-static Condition GenerateLongTestConstant(HCondition* condition,
- bool invert,
- CodeGeneratorARM* codegen) {
+static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition,
+ bool invert,
+ CodeGeneratorARM* codegen) {
DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
const LocationSummary* const locations = condition->GetLocations();
- IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
- Condition ret = EQ;
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+
+ if (invert) {
+ std::swap(cond, opposite);
+ }
+
+ std::pair<Condition, Condition> ret;
const Location left = locations->InAt(0);
const Location right = locations->InAt(1);
@@ -1629,22 +1635,26 @@ static Condition GenerateLongTestConstant(HCondition* condition,
__ CmpConstant(left_high, High32Bits(value));
__ it(EQ);
__ cmp(left_low, ShifterOperand(Low32Bits(value)), EQ);
- ret = ARMUnsignedCondition(cond);
+ ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
break;
case kCondLE:
case kCondGT:
// Trivially true or false.
if (value == std::numeric_limits<int64_t>::max()) {
__ cmp(left_low, ShifterOperand(left_low));
- ret = cond == kCondLE ? EQ : NE;
+ ret = cond == kCondLE ? std::make_pair(EQ, NE) : std::make_pair(NE, EQ);
break;
}
if (cond == kCondLE) {
+ DCHECK_EQ(opposite, kCondGT);
cond = kCondLT;
+ opposite = kCondGE;
} else {
DCHECK_EQ(cond, kCondGT);
+ DCHECK_EQ(opposite, kCondLE);
cond = kCondGE;
+ opposite = kCondLT;
}
value++;
@@ -1653,7 +1663,7 @@ static Condition GenerateLongTestConstant(HCondition* condition,
case kCondLT:
__ CmpConstant(left_low, Low32Bits(value));
__ sbcs(IP, left_high, ShifterOperand(High32Bits(value)));
- ret = ARMCondition(cond);
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
break;
default:
LOG(FATAL) << "Unreachable";
@@ -1663,14 +1673,20 @@ static Condition GenerateLongTestConstant(HCondition* condition,
return ret;
}
-static Condition GenerateLongTest(HCondition* condition,
- bool invert,
- CodeGeneratorARM* codegen) {
+static std::pair<Condition, Condition> GenerateLongTest(HCondition* condition,
+ bool invert,
+ CodeGeneratorARM* codegen) {
DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
const LocationSummary* const locations = condition->GetLocations();
- IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
- Condition ret = EQ;
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+
+ if (invert) {
+ std::swap(cond, opposite);
+ }
+
+ std::pair<Condition, Condition> ret;
Location left = locations->InAt(0);
Location right = locations->InAt(1);
@@ -1689,15 +1705,19 @@ static Condition GenerateLongTest(HCondition* condition,
__ cmp(left.AsRegisterPairLow<Register>(),
ShifterOperand(right.AsRegisterPairLow<Register>()),
EQ);
- ret = ARMUnsignedCondition(cond);
+ ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
break;
case kCondLE:
case kCondGT:
if (cond == kCondLE) {
+ DCHECK_EQ(opposite, kCondGT);
cond = kCondGE;
+ opposite = kCondLT;
} else {
DCHECK_EQ(cond, kCondGT);
+ DCHECK_EQ(opposite, kCondLE);
cond = kCondLT;
+ opposite = kCondGE;
}
std::swap(left, right);
@@ -1709,7 +1729,7 @@ static Condition GenerateLongTest(HCondition* condition,
__ sbcs(IP,
left.AsRegisterPairHigh<Register>(),
ShifterOperand(right.AsRegisterPairHigh<Register>()));
- ret = ARMCondition(cond);
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
break;
default:
LOG(FATAL) << "Unreachable";
@@ -1719,90 +1739,83 @@ static Condition GenerateLongTest(HCondition* condition,
return ret;
}
-static Condition GenerateTest(HInstruction* instruction,
- Location loc,
- bool invert,
- CodeGeneratorARM* codegen) {
- DCHECK(!instruction->IsConstant());
+static std::pair<Condition, Condition> GenerateTest(HCondition* condition,
+ bool invert,
+ CodeGeneratorARM* codegen) {
+ const LocationSummary* const locations = condition->GetLocations();
+ const Primitive::Type type = condition->GetLeft()->GetType();
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+ std::pair<Condition, Condition> ret;
+ const Location right = locations->InAt(1);
- Condition ret = invert ? EQ : NE;
+ if (invert) {
+ std::swap(cond, opposite);
+ }
- if (IsBooleanValueOrMaterializedCondition(instruction)) {
- __ CmpConstant(loc.AsRegister<Register>(), 0);
+ if (type == Primitive::kPrimLong) {
+ ret = locations->InAt(1).IsConstant()
+ ? GenerateLongTestConstant(condition, invert, codegen)
+ : GenerateLongTest(condition, invert, codegen);
+ } else if (Primitive::IsFloatingPointType(type)) {
+ GenerateVcmp(condition, codegen);
+ __ vmstat();
+ ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
+ ARMFPCondition(opposite, condition->IsGtBias()));
} else {
- HCondition* const condition = instruction->AsCondition();
- const LocationSummary* const locations = condition->GetLocations();
- const Primitive::Type type = condition->GetLeft()->GetType();
- const IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
- const Location right = locations->InAt(1);
-
- if (type == Primitive::kPrimLong) {
- ret = condition->GetLocations()->InAt(1).IsConstant()
- ? GenerateLongTestConstant(condition, invert, codegen)
- : GenerateLongTest(condition, invert, codegen);
- } else if (Primitive::IsFloatingPointType(type)) {
- GenerateVcmp(condition, codegen);
- __ vmstat();
- ret = ARMFPCondition(cond, condition->IsGtBias());
- } else {
- DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
- const Register left = locations->InAt(0).AsRegister<Register>();
+ const Register left = locations->InAt(0).AsRegister<Register>();
- if (right.IsRegister()) {
- __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
- } else {
- DCHECK(right.IsConstant());
- __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
- }
-
- ret = ARMCondition(cond);
+ if (right.IsRegister()) {
+ __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+ } else {
+ DCHECK(right.IsConstant());
+ __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
}
+
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
}
return ret;
}
-static bool CanGenerateTest(HInstruction* condition, ArmAssembler* assembler) {
- if (!IsBooleanValueOrMaterializedCondition(condition)) {
- const HCondition* const cond = condition->AsCondition();
-
- if (cond->GetLeft()->GetType() == Primitive::kPrimLong) {
- const LocationSummary* const locations = cond->GetLocations();
- const IfCondition c = cond->GetCondition();
-
- if (locations->InAt(1).IsConstant()) {
- const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
- ShifterOperand so;
-
- if (c < kCondLT || c > kCondGE) {
- // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
- // we check that the least significant half of the first input to be compared
- // is in a low register (the other half is read outside an IT block), and
- // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
- // encoding can be used.
- if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
- !IsUint<8>(Low32Bits(value))) {
- return false;
- }
- } else if (c == kCondLE || c == kCondGT) {
- if (value < std::numeric_limits<int64_t>::max() &&
- !assembler->ShifterOperandCanHold(kNoRegister,
- kNoRegister,
- SBC,
- High32Bits(value + 1),
- kCcSet,
- &so)) {
- return false;
- }
- } else if (!assembler->ShifterOperandCanHold(kNoRegister,
- kNoRegister,
- SBC,
- High32Bits(value),
- kCcSet,
- &so)) {
+static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
+ if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
+ const LocationSummary* const locations = condition->GetLocations();
+ const IfCondition c = condition->GetCondition();
+
+ if (locations->InAt(1).IsConstant()) {
+ const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
+ ShifterOperand so;
+
+ if (c < kCondLT || c > kCondGE) {
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the least significant half of the first input to be compared
+ // is in a low register (the other half is read outside an IT block), and
+ // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
+ // encoding can be used.
+ if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
+ !IsUint<8>(Low32Bits(value))) {
+ return false;
+ }
+ } else if (c == kCondLE || c == kCondGT) {
+ if (value < std::numeric_limits<int64_t>::max() &&
+ !assembler->ShifterOperandCanHold(kNoRegister,
+ kNoRegister,
+ SBC,
+ High32Bits(value + 1),
+ kCcSet,
+ &so)) {
return false;
}
+ } else if (!assembler->ShifterOperandCanHold(kNoRegister,
+ kNoRegister,
+ SBC,
+ High32Bits(value),
+ kCcSet,
+ &so)) {
+ return false;
}
}
}
@@ -2415,13 +2428,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
-void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
- Label* true_label,
- Label* false_label ATTRIBUTE_UNUSED) {
- __ vmstat(); // transfer FP status register to ARM APSR.
- __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
-}
-
void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
Label* true_label,
Label* false_label) {
@@ -2438,7 +2444,6 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
// Set the conditions for the test, remembering that == needs to be
// decided using the low words.
- // TODO: consider avoiding jumps with temporary and CMP low+SBC high
switch (if_cond) {
case kCondEQ:
case kCondNE:
@@ -2509,25 +2514,38 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target_in,
Label* false_target_in) {
+ if (CanGenerateTest(condition, codegen_->GetAssembler())) {
+ Label* non_fallthrough_target;
+ bool invert;
+
+ if (true_target_in == nullptr) {
+ DCHECK(false_target_in != nullptr);
+ non_fallthrough_target = false_target_in;
+ invert = true;
+ } else {
+ non_fallthrough_target = true_target_in;
+ invert = false;
+ }
+
+ const auto cond = GenerateTest(condition, invert, codegen_);
+
+ __ b(non_fallthrough_target, cond.first);
+
+ if (false_target_in != nullptr && false_target_in != non_fallthrough_target) {
+ __ b(false_target_in);
+ }
+
+ return;
+ }
+
// Generated branching requires both targets to be explicit. If either of the
// targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
Label fallthrough_target;
Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
- Primitive::Type type = condition->InputAt(0)->GetType();
- switch (type) {
- case Primitive::kPrimLong:
- GenerateLongComparesAndJumps(condition, true_target, false_target);
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- GenerateVcmp(condition, codegen_);
- GenerateFPJumps(condition, true_target, false_target);
- break;
- default:
- LOG(FATAL) << "Unexpected compare type " << type;
- }
+ DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+ GenerateLongComparesAndJumps(condition, true_target, false_target);
if (false_target != &fallthrough_target) {
__ b(false_target);
@@ -2729,7 +2747,8 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
}
if (!Primitive::IsFloatingPointType(type) &&
- CanGenerateTest(condition, codegen_->GetAssembler())) {
+ (IsBooleanValueOrMaterializedCondition(condition) ||
+ CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) {
bool invert = false;
if (out.Equals(second)) {
@@ -2753,7 +2772,14 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
}
- const Condition cond = GenerateTest(condition, locations->InAt(2), invert, codegen_);
+ std::pair<Condition, Condition> cond;
+
+ if (IsBooleanValueOrMaterializedCondition(condition)) {
+ __ CmpConstant(locations->InAt(2).AsRegister<Register>(), 0);
+ cond = invert ? std::make_pair(EQ, NE) : std::make_pair(NE, EQ);
+ } else {
+ cond = GenerateTest(condition->AsCondition(), invert, codegen_);
+ }
if (out.IsRegister()) {
ShifterOperand operand;
@@ -2765,8 +2791,8 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
operand = ShifterOperand(src.AsRegister<Register>());
}
- __ it(cond);
- __ mov(out.AsRegister<Register>(), operand, cond);
+ __ it(cond.first);
+ __ mov(out.AsRegister<Register>(), operand, cond.first);
} else {
DCHECK(out.IsRegisterPair());
@@ -2784,10 +2810,10 @@ void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
operand_low = ShifterOperand(src.AsRegisterPairLow<Register>());
}
- __ it(cond);
- __ mov(out.AsRegisterPairLow<Register>(), operand_low, cond);
- __ it(cond);
- __ mov(out.AsRegisterPairHigh<Register>(), operand_high, cond);
+ __ it(cond.first);
+ __ mov(out.AsRegisterPairLow<Register>(), operand_low, cond.first);
+ __ it(cond.first);
+ __ mov(out.AsRegisterPairHigh<Register>(), operand_high, cond.first);
}
return;
@@ -2840,7 +2866,7 @@ void LocationsBuilderARM::HandleCondition(HCondition* cond) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
if (!cond->IsEmittedAtUseSite()) {
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
break;
@@ -2867,51 +2893,44 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
return;
}
- LocationSummary* locations = cond->GetLocations();
- Location left = locations->InAt(0);
- Location right = locations->InAt(1);
- Register out = locations->Out().AsRegister<Register>();
- Label true_label, false_label;
+ const Register out = cond->GetLocations()->Out().AsRegister<Register>();
- switch (cond->InputAt(0)->GetType()) {
- default: {
- // Integer case.
- if (right.IsRegister()) {
- __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>()));
- } else {
- DCHECK(right.IsConstant());
- __ CmpConstant(left.AsRegister<Register>(),
- CodeGenerator::GetInt32ValueOf(right.GetConstant()));
- }
- __ it(ARMCondition(cond->GetCondition()), kItElse);
- __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1),
- ARMCondition(cond->GetCondition()));
- __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(0),
- ARMCondition(cond->GetOppositeCondition()));
- return;
- }
- case Primitive::kPrimLong:
- GenerateLongComparesAndJumps(cond, &true_label, &false_label);
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- GenerateVcmp(cond, codegen_);
- GenerateFPJumps(cond, &true_label, &false_label);
- break;
+ if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) {
+ const auto condition = GenerateTest(cond, false, codegen_);
+
+ __ it(condition.first);
+ __ mov(out, ShifterOperand(1), condition.first);
+ __ it(condition.second);
+ __ mov(out, ShifterOperand(0), condition.second);
+ return;
}
// Convert the jumps into the result.
Label done_label;
- Label* final_label = codegen_->GetFinalLabel(cond, &done_label);
+ Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
- // False case: result = 0.
- __ Bind(&false_label);
- __ LoadImmediate(out, 0);
- __ b(final_label);
+ if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
+ Label true_label, false_label;
- // True case: result = 1.
- __ Bind(&true_label);
- __ LoadImmediate(out, 1);
+ GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+
+ // False case: result = 0.
+ __ Bind(&false_label);
+ __ LoadImmediate(out, 0);
+ __ b(final_label);
+
+ // True case: result = 1.
+ __ Bind(&true_label);
+ __ LoadImmediate(out, 1);
+ } else {
+ DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+
+ const auto condition = GenerateTest(cond, false, codegen_);
+
+ __ mov(out, ShifterOperand(0), AL, kCcKeep);
+ __ b(final_label, condition.second);
+ __ LoadImmediate(out, 1);
+ }
if (done_label.IsLinked()) {
__ Bind(&done_label);
@@ -7039,14 +7058,16 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- Label done, zero;
- Label* final_label = codegen_->GetFinalLabel(instruction, &done);
+ Label done;
+ Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARM* slow_path = nullptr;
// Return 0 if `obj` is null.
// avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
- __ CompareAndBranchIfZero(obj, &zero);
+ DCHECK_NE(out, obj);
+ __ LoadImmediate(out, 0);
+ __ CompareAndBranchIfZero(obj, final_label);
}
switch (type_check_kind) {
@@ -7058,11 +7079,23 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
class_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- __ cmp(out, ShifterOperand(cls));
// Classes must be equal for the instanceof to succeed.
- __ b(&zero, NE);
- __ LoadImmediate(out, 1);
- __ b(final_label);
+ __ cmp(out, ShifterOperand(cls));
+ // We speculatively set the result to false without changing the condition
+ // flags, which allows us to avoid some branching later.
+ __ mov(out, ShifterOperand(0), AL, kCcKeep);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (ArmAssembler::IsLowRegister(out)) {
+ __ it(EQ);
+ __ mov(out, ShifterOperand(1), EQ);
+ } else {
+ __ b(final_label, NE);
+ __ LoadImmediate(out, 1);
+ }
+
break;
}
@@ -7084,14 +7117,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
super_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- // If `out` is null, we use it for the result, and jump to `done`.
+ // If `out` is null, we use it for the result, and jump to the final label.
__ CompareAndBranchIfZero(out, final_label);
__ cmp(out, ShifterOperand(cls));
__ b(&loop, NE);
__ LoadImmediate(out, 1);
- if (zero.IsLinked()) {
- __ b(final_label);
- }
break;
}
@@ -7114,14 +7144,32 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
super_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- __ CompareAndBranchIfNonZero(out, &loop);
- // If `out` is null, we use it for the result, and jump to `done`.
- __ b(final_label);
- __ Bind(&success);
- __ LoadImmediate(out, 1);
- if (zero.IsLinked()) {
+ // This is essentially a null check, but it sets the condition flags to the
+ // proper value for the code that follows the loop, i.e. not `EQ`.
+ __ cmp(out, ShifterOperand(1));
+ __ b(&loop, HS);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (ArmAssembler::IsLowRegister(out)) {
+ // If `out` is null, we use it for the result, and the condition flags
+ // have already been set to `NE`, so the IT block that comes afterwards
+ // (and which handles the successful case) turns into a NOP (instead of
+ // overwriting `out`).
+ __ Bind(&success);
+ // There is only one branch to the `success` label (which is bound to this
+ // IT block), and it has the same condition, `EQ`, so in that case the MOV
+ // is executed.
+ __ it(EQ);
+ __ mov(out, ShifterOperand(1), EQ);
+ } else {
+ // If `out` is null, we use it for the result, and jump to the final label.
__ b(final_label);
+ __ Bind(&success);
+ __ LoadImmediate(out, 1);
}
+
break;
}
@@ -7144,14 +7192,28 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
component_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- // If `out` is null, we use it for the result, and jump to `done`.
+ // If `out` is null, we use it for the result, and jump to the final label.
__ CompareAndBranchIfZero(out, final_label);
__ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(out, &zero);
- __ Bind(&exact_check);
- __ LoadImmediate(out, 1);
- __ b(final_label);
+ __ cmp(out, ShifterOperand(0));
+ // We speculatively set the result to false without changing the condition
+ // flags, which allows us to avoid some branching later.
+ __ mov(out, ShifterOperand(0), AL, kCcKeep);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (ArmAssembler::IsLowRegister(out)) {
+ __ Bind(&exact_check);
+ __ it(EQ);
+ __ mov(out, ShifterOperand(1), EQ);
+ } else {
+ __ b(final_label, NE);
+ __ Bind(&exact_check);
+ __ LoadImmediate(out, 1);
+ }
+
break;
}
@@ -7171,9 +7233,6 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
codegen_->AddSlowPath(slow_path);
__ b(slow_path->GetEntryLabel(), NE);
__ LoadImmediate(out, 1);
- if (zero.IsLinked()) {
- __ b(final_label);
- }
break;
}
@@ -7202,18 +7261,10 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
/* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
__ b(slow_path->GetEntryLabel());
- if (zero.IsLinked()) {
- __ b(final_label);
- }
break;
}
}
- if (zero.IsLinked()) {
- __ Bind(&zero);
- __ LoadImmediate(out, 0);
- }
-
if (done.IsLinked()) {
__ Bind(&done);
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 59a7f7c048..86f2f21df7 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -299,7 +299,6 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
void GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target,
Label* false_target);
- void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivRemByPowerOfTwo(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 28cc942dfb..d463830ff6 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1150,7 +1150,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation of object ArrayGet
@@ -3281,7 +3281,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati
void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
Primitive::Type type = instruction->GetResultType();
- DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
LocationSummary* locations = instruction->GetLocations();
Register out = OutputRegister(instruction);
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 2d2d8109a3..cce412b314 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -1175,7 +1175,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation of object ArrayGet
@@ -1687,14 +1687,21 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codege
}
}
-static vixl32::Condition GenerateLongTestConstant(HCondition* condition,
- bool invert,
- CodeGeneratorARMVIXL* codegen) {
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
+ HCondition* condition,
+ bool invert,
+ CodeGeneratorARMVIXL* codegen) {
DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
const LocationSummary* const locations = condition->GetLocations();
- IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
- vixl32::Condition ret = eq;
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+
+ if (invert) {
+ std::swap(cond, opposite);
+ }
+
+ std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
const Location left = locations->InAt(0);
const Location right = locations->InAt(1);
@@ -1713,13 +1720,14 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition,
case kCondAE: {
__ Cmp(left_high, High32Bits(value));
+ // We use the scope because of the IT block that follows.
ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
2 * vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
__ it(eq);
__ cmp(eq, left_low, Low32Bits(value));
- ret = ARMUnsignedCondition(cond);
+ ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
break;
}
case kCondLE:
@@ -1727,15 +1735,19 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition,
// Trivially true or false.
if (value == std::numeric_limits<int64_t>::max()) {
__ Cmp(left_low, left_low);
- ret = cond == kCondLE ? eq : ne;
+ ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
break;
}
if (cond == kCondLE) {
+ DCHECK_EQ(opposite, kCondGT);
cond = kCondLT;
+ opposite = kCondGE;
} else {
DCHECK_EQ(cond, kCondGT);
+ DCHECK_EQ(opposite, kCondLE);
cond = kCondGE;
+ opposite = kCondLT;
}
value++;
@@ -1746,7 +1758,7 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition,
__ Cmp(left_low, Low32Bits(value));
__ Sbcs(temps.Acquire(), left_high, High32Bits(value));
- ret = ARMCondition(cond);
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
break;
}
default:
@@ -1757,14 +1769,21 @@ static vixl32::Condition GenerateLongTestConstant(HCondition* condition,
return ret;
}
-static vixl32::Condition GenerateLongTest(HCondition* condition,
- bool invert,
- CodeGeneratorARMVIXL* codegen) {
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
+ HCondition* condition,
+ bool invert,
+ CodeGeneratorARMVIXL* codegen) {
DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
const LocationSummary* const locations = condition->GetLocations();
- IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
- vixl32::Condition ret = eq;
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+
+ if (invert) {
+ std::swap(cond, opposite);
+ }
+
+ std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
Location left = locations->InAt(0);
Location right = locations->InAt(1);
@@ -1779,22 +1798,27 @@ static vixl32::Condition GenerateLongTest(HCondition* condition,
case kCondAE: {
__ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
+ // We use the scope because of the IT block that follows.
ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
2 * vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
__ it(eq);
__ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
- ret = ARMUnsignedCondition(cond);
+ ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
break;
}
case kCondLE:
case kCondGT:
if (cond == kCondLE) {
+ DCHECK_EQ(opposite, kCondGT);
cond = kCondGE;
+ opposite = kCondLT;
} else {
DCHECK_EQ(cond, kCondGT);
+ DCHECK_EQ(opposite, kCondLE);
cond = kCondLT;
+ opposite = kCondGE;
}
std::swap(left, right);
@@ -1805,7 +1829,7 @@ static vixl32::Condition GenerateLongTest(HCondition* condition,
__ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
__ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
- ret = ARMCondition(cond);
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
break;
}
default:
@@ -1816,69 +1840,62 @@ static vixl32::Condition GenerateLongTest(HCondition* condition,
return ret;
}
-static vixl32::Condition GenerateTest(HInstruction* instruction,
- Location loc,
- bool invert,
- CodeGeneratorARMVIXL* codegen) {
- DCHECK(!instruction->IsConstant());
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
+ bool invert,
+ CodeGeneratorARMVIXL* codegen) {
+ const Primitive::Type type = condition->GetLeft()->GetType();
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+ std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
- vixl32::Condition ret = invert ? eq : ne;
+ if (invert) {
+ std::swap(cond, opposite);
+ }
- if (IsBooleanValueOrMaterializedCondition(instruction)) {
- __ Cmp(RegisterFrom(loc), 0);
+ if (type == Primitive::kPrimLong) {
+ ret = condition->GetLocations()->InAt(1).IsConstant()
+ ? GenerateLongTestConstant(condition, invert, codegen)
+ : GenerateLongTest(condition, invert, codegen);
+ } else if (Primitive::IsFloatingPointType(type)) {
+ GenerateVcmp(condition, codegen);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
+ ARMFPCondition(opposite, condition->IsGtBias()));
} else {
- HCondition* const condition = instruction->AsCondition();
- const Primitive::Type type = condition->GetLeft()->GetType();
- const IfCondition cond = invert ? condition->GetOppositeCondition() : condition->GetCondition();
-
- if (type == Primitive::kPrimLong) {
- ret = condition->GetLocations()->InAt(1).IsConstant()
- ? GenerateLongTestConstant(condition, invert, codegen)
- : GenerateLongTest(condition, invert, codegen);
- } else if (Primitive::IsFloatingPointType(type)) {
- GenerateVcmp(condition, codegen);
- __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
- ret = ARMFPCondition(cond, condition->IsGtBias());
- } else {
- DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
- __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
- ret = ARMCondition(cond);
- }
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+ __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
}
return ret;
}
-static bool CanGenerateTest(HInstruction* condition, ArmVIXLAssembler* assembler) {
- if (!IsBooleanValueOrMaterializedCondition(condition)) {
- const HCondition* const cond = condition->AsCondition();
-
- if (cond->GetLeft()->GetType() == Primitive::kPrimLong) {
- const LocationSummary* const locations = cond->GetLocations();
- const IfCondition c = cond->GetCondition();
+static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) {
+ if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
+ const LocationSummary* const locations = condition->GetLocations();
+ const IfCondition c = condition->GetCondition();
- if (locations->InAt(1).IsConstant()) {
- const int64_t value = Int64ConstantFrom(locations->InAt(1));
+ if (locations->InAt(1).IsConstant()) {
+ const int64_t value = Int64ConstantFrom(locations->InAt(1));
- if (c < kCondLT || c > kCondGE) {
- // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
- // we check that the least significant half of the first input to be compared
- // is in a low register (the other half is read outside an IT block), and
- // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
- // encoding can be used.
- if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) {
- return false;
- }
- // TODO(VIXL): The rest of the checks are there to keep the backend in sync with
- // the previous one, but are not strictly necessary.
- } else if (c == kCondLE || c == kCondGT) {
- if (value < std::numeric_limits<int64_t>::max() &&
- !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) {
- return false;
- }
- } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) {
+ if (c < kCondLT || c > kCondGE) {
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the least significant half of the first input to be compared
+ // is in a low register (the other half is read outside an IT block), and
+ // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
+ // encoding can be used.
+ if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) {
+ return false;
+ }
+ // TODO(VIXL): The rest of the checks are there to keep the backend in sync with
+ // the previous one, but are not strictly necessary.
+ } else if (c == kCondLE || c == kCondGT) {
+ if (value < std::numeric_limits<int64_t>::max() &&
+ !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) {
return false;
}
+ } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) {
+ return false;
}
}
}
@@ -2445,14 +2462,6 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
-void InstructionCodeGeneratorARMVIXL::GenerateFPJumps(HCondition* cond,
- vixl32::Label* true_label,
- vixl32::Label* false_label ATTRIBUTE_UNUSED) {
- // To branch on the result of the FP compare we transfer FPSCR to APSR (encoded as PC in VMRS).
- __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
- __ B(ARMFPCondition(cond->GetCondition(), cond->IsGtBias()), true_label);
-}
-
void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond,
vixl32::Label* true_label,
vixl32::Label* false_label) {
@@ -2469,7 +2478,6 @@ void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* c
// Set the conditions for the test, remembering that == needs to be
// decided using the low words.
- // TODO: consider avoiding jumps with temporary and CMP low+SBC high
switch (if_cond) {
case kCondEQ:
case kCondNE:
@@ -2540,31 +2548,44 @@ void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* c
void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
vixl32::Label* true_target_in,
vixl32::Label* false_target_in) {
+ if (CanGenerateTest(condition, codegen_->GetAssembler())) {
+ vixl32::Label* non_fallthrough_target;
+ bool invert;
+
+ if (true_target_in == nullptr) {
+ DCHECK(false_target_in != nullptr);
+ non_fallthrough_target = false_target_in;
+ invert = true;
+ } else {
+ non_fallthrough_target = true_target_in;
+ invert = false;
+ }
+
+ const auto cond = GenerateTest(condition, invert, codegen_);
+
+ __ B(cond.first, non_fallthrough_target);
+
+ if (false_target_in != nullptr && false_target_in != non_fallthrough_target) {
+ __ B(false_target_in);
+ }
+
+ return;
+ }
+
// Generated branching requires both targets to be explicit. If either of the
// targets is nullptr (fallthrough) use and bind `fallthrough` instead.
vixl32::Label fallthrough;
vixl32::Label* true_target = (true_target_in == nullptr) ? &fallthrough : true_target_in;
vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in;
- Primitive::Type type = condition->InputAt(0)->GetType();
- switch (type) {
- case Primitive::kPrimLong:
- GenerateLongComparesAndJumps(condition, true_target, false_target);
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- GenerateVcmp(condition, codegen_);
- GenerateFPJumps(condition, true_target, false_target);
- break;
- default:
- LOG(FATAL) << "Unexpected compare type " << type;
- }
+ DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+ GenerateLongComparesAndJumps(condition, true_target, false_target);
if (false_target != &fallthrough) {
__ B(false_target);
}
- if (true_target_in == nullptr || false_target_in == nullptr) {
+ if (fallthrough.IsReferenced()) {
__ Bind(&fallthrough);
}
}
@@ -2759,7 +2780,8 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
}
if (!Primitive::IsFloatingPointType(type) &&
- CanGenerateTest(condition, codegen_->GetAssembler())) {
+ (IsBooleanValueOrMaterializedCondition(condition) ||
+ CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) {
bool invert = false;
if (out.Equals(second)) {
@@ -2783,15 +2805,24 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
}
- const vixl32::Condition cond = GenerateTest(condition, locations->InAt(2), invert, codegen_);
+ std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
+
+ if (IsBooleanValueOrMaterializedCondition(condition)) {
+ __ Cmp(InputRegisterAt(select, 2), 0);
+ cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
+ } else {
+ cond = GenerateTest(condition->AsCondition(), invert, codegen_);
+ }
+
const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
+ // We use the scope because of the IT block that follows.
ExactAssemblyScope guard(GetVIXLAssembler(),
instr_count * vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
if (out.IsRegister()) {
- __ it(cond);
- __ mov(cond, RegisterFrom(out), OperandFrom(src, type));
+ __ it(cond.first);
+ __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
} else {
DCHECK(out.IsRegisterPair());
@@ -2809,10 +2840,10 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
operand_low = LowRegisterFrom(src);
}
- __ it(cond);
- __ mov(cond, LowRegisterFrom(out), operand_low);
- __ it(cond);
- __ mov(cond, HighRegisterFrom(out), operand_high);
+ __ it(cond.first);
+ __ mov(cond.first, LowRegisterFrom(out), operand_low);
+ __ it(cond.first);
+ __ mov(cond.first, HighRegisterFrom(out), operand_high);
}
return;
@@ -2865,7 +2896,7 @@ void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
if (!cond->IsEmittedAtUseSite()) {
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
break;
@@ -2892,50 +2923,48 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
return;
}
- Location right = cond->GetLocations()->InAt(1);
- vixl32::Register out = OutputRegister(cond);
- vixl32::Label true_label, false_label;
+ const vixl32::Register out = OutputRegister(cond);
- switch (cond->InputAt(0)->GetType()) {
- default: {
- // Integer case.
- if (right.IsRegister()) {
- __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
- } else {
- DCHECK(right.IsConstant());
- __ Cmp(InputRegisterAt(cond, 0),
- CodeGenerator::GetInt32ValueOf(right.GetConstant()));
- }
- ExactAssemblyScope aas(GetVIXLAssembler(),
- 3 * vixl32::kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ ite(ARMCondition(cond->GetCondition()));
- __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1);
- __ mov(ARMCondition(cond->GetOppositeCondition()), OutputRegister(cond), 0);
- return;
- }
- case Primitive::kPrimLong:
- GenerateLongComparesAndJumps(cond, &true_label, &false_label);
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- GenerateVcmp(cond, codegen_);
- GenerateFPJumps(cond, &true_label, &false_label);
- break;
+ if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) {
+ const auto condition = GenerateTest(cond, false, codegen_);
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ 4 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(condition.first);
+ __ mov(condition.first, out, 1);
+ __ it(condition.second);
+ __ mov(condition.second, out, 0);
+ return;
}
// Convert the jumps into the result.
vixl32::Label done_label;
- vixl32::Label* final_label = codegen_->GetFinalLabel(cond, &done_label);
+ vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
- // False case: result = 0.
- __ Bind(&false_label);
- __ Mov(out, 0);
- __ B(final_label);
+ if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
+ vixl32::Label true_label, false_label;
- // True case: result = 1.
- __ Bind(&true_label);
- __ Mov(out, 1);
+ GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+
+ // False case: result = 0.
+ __ Bind(&false_label);
+ __ Mov(out, 0);
+ __ B(final_label);
+
+ // True case: result = 1.
+ __ Bind(&true_label);
+ __ Mov(out, 1);
+ } else {
+ DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+
+ const auto condition = GenerateTest(cond, false, codegen_);
+
+ __ Mov(LeaveFlags, out, 0);
+ __ B(condition.second, final_label, /* far_target */ false);
+ __ Mov(out, 1);
+ }
if (done_label.IsReferenced()) {
__ Bind(&done_label);
@@ -7079,14 +7108,16 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- vixl32::Label done, zero;
- vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
+ vixl32::Label done;
+ vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARMVIXL* slow_path = nullptr;
// Return 0 if `obj` is null.
// avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
- __ CompareAndBranchIfZero(obj, &zero, /* far_target */ false);
+ DCHECK(!out.Is(obj));
+ __ Mov(out, 0);
+ __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false);
}
switch (type_check_kind) {
@@ -7098,11 +7129,28 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
class_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- __ Cmp(out, cls);
// Classes must be equal for the instanceof to succeed.
- __ B(ne, &zero, /* far_target */ false);
- __ Mov(out, 1);
- __ B(final_label);
+ __ Cmp(out, cls);
+ // We speculatively set the result to false without changing the condition
+ // flags, which allows us to avoid some branching later.
+ __ Mov(LeaveFlags, out, 0);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (out.IsLow()) {
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(eq);
+ __ mov(eq, out, 1);
+ } else {
+ __ B(ne, final_label, /* far_target */ false);
+ __ Mov(out, 1);
+ }
+
break;
}
@@ -7124,14 +7172,11 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
super_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- // If `out` is null, we use it for the result, and jump to `done`.
+ // If `out` is null, we use it for the result, and jump to the final label.
__ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
__ Cmp(out, cls);
__ B(ne, &loop, /* far_target */ false);
__ Mov(out, 1);
- if (zero.IsReferenced()) {
- __ B(final_label);
- }
break;
}
@@ -7154,14 +7199,38 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
super_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- __ CompareAndBranchIfNonZero(out, &loop);
- // If `out` is null, we use it for the result, and jump to `done`.
- __ B(final_label);
- __ Bind(&success);
- __ Mov(out, 1);
- if (zero.IsReferenced()) {
+ // This is essentially a null check, but it sets the condition flags to the
+ // proper value for the code that follows the loop, i.e. not `eq`.
+ __ Cmp(out, 1);
+ __ B(hs, &loop, /* far_target */ false);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (out.IsLow()) {
+ // If `out` is null, we use it for the result, and the condition flags
+ // have already been set to `ne`, so the IT block that comes afterwards
+ // (and which handles the successful case) turns into a NOP (instead of
+ // overwriting `out`).
+ __ Bind(&success);
+
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ // There is only one branch to the `success` label (which is bound to this
+ // IT block), and it has the same condition, `eq`, so in that case the MOV
+ // is executed.
+ __ it(eq);
+ __ mov(eq, out, 1);
+ } else {
+ // If `out` is null, we use it for the result, and jump to the final label.
__ B(final_label);
+ __ Bind(&success);
+ __ Mov(out, 1);
}
+
break;
}
@@ -7184,14 +7253,34 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
component_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- // If `out` is null, we use it for the result, and jump to `done`.
+ // If `out` is null, we use it for the result, and jump to the final label.
__ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false);
- __ Bind(&exact_check);
- __ Mov(out, 1);
- __ B(final_label);
+ __ Cmp(out, 0);
+ // We speculatively set the result to false without changing the condition
+ // flags, which allows us to avoid some branching later.
+ __ Mov(LeaveFlags, out, 0);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (out.IsLow()) {
+ __ Bind(&exact_check);
+
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(eq);
+ __ mov(eq, out, 1);
+ } else {
+ __ B(ne, final_label, /* far_target */ false);
+ __ Bind(&exact_check);
+ __ Mov(out, 1);
+ }
+
break;
}
@@ -7211,9 +7300,6 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
codegen_->AddSlowPath(slow_path);
__ B(ne, slow_path->GetEntryLabel());
__ Mov(out, 1);
- if (zero.IsReferenced()) {
- __ B(final_label);
- }
break;
}
@@ -7242,18 +7328,10 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
/* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
__ B(slow_path->GetEntryLabel());
- if (zero.IsReferenced()) {
- __ B(final_label);
- }
break;
}
}
- if (zero.IsReferenced()) {
- __ Bind(&zero);
- __ Mov(out, 0);
- }
-
if (done.IsReferenced()) {
__ Bind(&done);
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 781027ab30..1e9669dc38 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -401,9 +401,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
void GenerateCompareTestAndBranch(HCondition* condition,
vixl::aarch32::Label* true_target,
vixl::aarch32::Label* false_target);
- void GenerateFPJumps(HCondition* cond,
- vixl::aarch32::Label* true_label,
- vixl::aarch32::Label* false_label);
void GenerateLongComparesAndJumps(HCondition* cond,
vixl::aarch32::Label* true_label,
vixl::aarch32::Label* false_label);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5246dbc5cb..c82533bc7d 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -558,26 +558,21 @@ void CodeGeneratorMIPS64::GenerateFrameEntry() {
return;
}
- // Make sure the frame size isn't unreasonably large. Per the various APIs
- // it looks like it should always be less than 2GB in size, which allows
- // us using 32-bit signed offsets from the stack pointer.
- if (GetFrameSize() > 0x7FFFFFFF)
- LOG(FATAL) << "Stack frame larger than 2GB";
+ // Make sure the frame size isn't unreasonably large.
+ if (GetFrameSize() > GetStackOverflowReservedBytes(kMips64)) {
+ LOG(FATAL) << "Stack frame larger than " << GetStackOverflowReservedBytes(kMips64) << " bytes";
+ }
// Spill callee-saved registers.
- // Note that their cumulative size is small and they can be indexed using
- // 16-bit offsets.
-
- // TODO: increment/decrement SP in one step instead of two or remove this comment.
- uint32_t ofs = FrameEntrySpillSize();
+ uint32_t ofs = GetFrameSize();
__ IncreaseFrameSize(ofs);
for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
GpuRegister reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
ofs -= kMips64DoublewordSize;
- __ Sd(reg, SP, ofs);
+ __ StoreToOffset(kStoreDoubleword, reg, SP, ofs);
__ cfi().RelOffset(DWARFReg(reg), ofs);
}
}
@@ -586,23 +581,16 @@ void CodeGeneratorMIPS64::GenerateFrameEntry() {
FpuRegister reg = kFpuCalleeSaves[i];
if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
ofs -= kMips64DoublewordSize;
- __ Sdc1(reg, SP, ofs);
+ __ StoreFpuToOffset(kStoreDoubleword, reg, SP, ofs);
__ cfi().RelOffset(DWARFReg(reg), ofs);
}
}
- // Allocate the rest of the frame and store the current method pointer
- // at its end.
-
- __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
// Save the current method if we need it. Note that we do not
// do this in HCurrentMethod, as the instruction might have been removed
// in the SSA graph.
if (RequiresCurrentMethod()) {
- static_assert(IsInt<16>(kCurrentMethodStackOffset),
- "kCurrentMethodStackOffset must fit into int16_t");
- __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+ __ StoreToOffset(kStoreDoubleword, kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
}
if (GetGraph()->HasShouldDeoptimizeFlag()) {
@@ -615,42 +603,32 @@ void CodeGeneratorMIPS64::GenerateFrameExit() {
__ cfi().RememberState();
if (!HasEmptyFrame()) {
- // Deallocate the rest of the frame.
-
- __ DecreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
// Restore callee-saved registers.
- // Note that their cumulative size is small and they can be indexed using
- // 16-bit offsets.
-
- // TODO: increment/decrement SP in one step instead of two or remove this comment.
- uint32_t ofs = 0;
-
- for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
- FpuRegister reg = kFpuCalleeSaves[i];
- if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
- __ Ldc1(reg, SP, ofs);
- ofs += kMips64DoublewordSize;
+ // For better instruction scheduling restore RA before other registers.
+ uint32_t ofs = GetFrameSize();
+ for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+ GpuRegister reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ ofs -= kMips64DoublewordSize;
+ __ LoadFromOffset(kLoadDoubleword, reg, SP, ofs);
__ cfi().Restore(DWARFReg(reg));
}
}
- for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
- GpuRegister reg = kCoreCalleeSaves[i];
- if (allocated_registers_.ContainsCoreRegister(reg)) {
- __ Ld(reg, SP, ofs);
- ofs += kMips64DoublewordSize;
+ for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
+ FpuRegister reg = kFpuCalleeSaves[i];
+ if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+ ofs -= kMips64DoublewordSize;
+ __ LoadFpuFromOffset(kLoadDoubleword, reg, SP, ofs);
__ cfi().Restore(DWARFReg(reg));
}
}
- DCHECK_EQ(ofs, FrameEntrySpillSize());
- __ DecreaseFrameSize(ofs);
+ __ DecreaseFrameSize(GetFrameSize());
}
- __ Jr(RA);
- __ Nop();
+ __ Jic(RA, 0);
__ cfi().RestoreState();
__ cfi().DefCFAOffset(GetFrameSize());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4db4796985..80776e8b78 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -723,7 +723,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2ffc398287..49f099f6a9 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -744,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
@@ -3660,7 +3660,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
Primitive::Type type = instruction->GetResultType();
- DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
bool is_div = instruction->IsDiv();
LocationSummary* locations = instruction->GetLocations();
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index cd954043f5..31cd204c9f 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -74,7 +74,6 @@ class CodegenTargetConfig {
}
private:
- CodegenTargetConfig() {}
InstructionSet isa_;
CreateCodegenFn create_codegen_;
};
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index e184745520..01304ac35b 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -66,6 +66,11 @@ inline vixl::aarch32::SRegister LowSRegisterFrom(Location location) {
return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl::aarch32::SRegister>());
}
+inline vixl::aarch32::SRegister HighSRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegisterPair()) << location;
+ return vixl::aarch32::SRegister(location.AsFpuRegisterPairHigh<vixl::aarch32::SRegister>());
+}
+
inline vixl::aarch32::Register RegisterFrom(Location location) {
DCHECK(location.IsRegister()) << location;
return vixl::aarch32::Register(location.reg());
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 0dfae11465..cc3c143b15 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -505,6 +505,10 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
}
+ void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+ StartAttributeStream("kind") << deoptimize->GetKind();
+ }
+
#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetOpKind();
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 82ee93d5c2..9516ccb385 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -29,7 +29,21 @@ namespace art {
*/
class InductionVarAnalysisTest : public CommonCompilerTest {
public:
- InductionVarAnalysisTest() : pool_(), allocator_(&pool_) {
+ InductionVarAnalysisTest()
+ : pool_(),
+ allocator_(&pool_),
+ iva_(nullptr),
+ entry_(nullptr),
+ return_(nullptr),
+ exit_(nullptr),
+ parameter_(nullptr),
+ constant0_(nullptr),
+ constant1_(nullptr),
+ constant2_(nullptr),
+ constant7_(nullptr),
+ constant100_(nullptr),
+ constantm1_(nullptr),
+ float_constant0_(nullptr) {
graph_ = CreateGraph(&allocator_);
}
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 62f5114e59..19f668dc1d 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -46,32 +46,100 @@
namespace art {
-static constexpr size_t kMaximumNumberOfHInstructions = 32;
+// Instruction limit to control memory.
+static constexpr size_t kMaximumNumberOfTotalInstructions = 1024;
+
+// Maximum number of instructions for considering a method small,
+// which we will always try to inline if the other non-instruction limits
+// are not reached.
+static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3;
// Limit the number of dex registers that we accumulate while inlining
// to avoid creating large amount of nested environments.
static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64;
-// Avoid inlining within a huge method due to memory pressure.
-static constexpr size_t kMaximumCodeUnitSize = 4096;
+// Limit recursive call inlining, which do not benefit from too
+// much inlining compared to code locality.
+static constexpr size_t kMaximumNumberOfRecursiveCalls = 4;
// Controls the use of inline caches in AOT mode.
static constexpr bool kUseAOTInlineCaches = false;
-void HInliner::Run() {
- const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
- if ((compiler_options.GetInlineDepthLimit() == 0)
- || (compiler_options.GetInlineMaxCodeUnits() == 0)) {
- return;
+// We check for line numbers to make sure the DepthString implementation
+// aligns the output nicely.
+#define LOG_INTERNAL(msg) \
+ static_assert(__LINE__ > 10, "Unhandled line number"); \
+ static_assert(__LINE__ < 10000, "Unhandled line number"); \
+ VLOG(compiler) << DepthString(__LINE__) << msg
+
+#define LOG_TRY() LOG_INTERNAL("Try inlinining call: ")
+#define LOG_NOTE() LOG_INTERNAL("Note: ")
+#define LOG_SUCCESS() LOG_INTERNAL("Success: ")
+#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ")
+#define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ")
+
+std::string HInliner::DepthString(int line) const {
+ std::string value;
+ // Indent according to the inlining depth.
+ size_t count = depth_;
+ // Line numbers get printed in the log, so add a space if the log's line is less
+ // than 1000, and two if less than 100. 10 cannot be reached as it's the copyright.
+ if (!kIsTargetBuild) {
+ if (line < 100) {
+ value += " ";
+ }
+ if (line < 1000) {
+ value += " ";
+ }
+ // Safeguard if this file reaches more than 10000 lines.
+ DCHECK_LT(line, 10000);
}
- if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) {
- return;
+ for (size_t i = 0; i < count; ++i) {
+ value += " ";
+ }
+ return value;
+}
+
+static size_t CountNumberOfInstructions(HGraph* graph) {
+ size_t number_of_instructions = 0;
+ for (HBasicBlock* block : graph->GetReversePostOrderSkipEntryBlock()) {
+ for (HInstructionIterator instr_it(block->GetInstructions());
+ !instr_it.Done();
+ instr_it.Advance()) {
+ ++number_of_instructions;
+ }
+ }
+ return number_of_instructions;
+}
+
+void HInliner::UpdateInliningBudget() {
+ if (total_number_of_instructions_ >= kMaximumNumberOfTotalInstructions) {
+ // Always try to inline small methods.
+ inlining_budget_ = kMaximumNumberOfInstructionsForSmallMethod;
+ } else {
+ inlining_budget_ = std::max(
+ kMaximumNumberOfInstructionsForSmallMethod,
+ kMaximumNumberOfTotalInstructions - total_number_of_instructions_);
}
+}
+
+void HInliner::Run() {
if (graph_->IsDebuggable()) {
// For simplicity, we currently never inline when the graph is debuggable. This avoids
// doing some logic in the runtime to discover if a method could have been inlined.
return;
}
+
+ // Initialize the number of instructions for the method being compiled. Recursive calls
+ // to HInliner::Run have already updated the instruction count.
+ if (outermost_graph_ == graph_) {
+ total_number_of_instructions_ = CountNumberOfInstructions(graph_);
+ }
+
+ UpdateInliningBudget();
+ DCHECK_NE(total_number_of_instructions_, 0u);
+ DCHECK_NE(inlining_budget_, 0u);
+
// Keep a copy of all blocks when starting the visit.
ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
DCHECK(!blocks.empty());
@@ -292,7 +360,18 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
return nullptr;
}
PointerSize pointer_size = caller_compilation_unit_.GetClassLinker()->GetImagePointerSize();
- return resolved_method->GetSingleImplementation(pointer_size);
+ ArtMethod* single_impl = resolved_method->GetSingleImplementation(pointer_size);
+ if (single_impl == nullptr) {
+ return nullptr;
+ }
+ if (single_impl->IsProxyMethod()) {
+ // Proxy method is a generic invoker that's not worth
+ // devirtualizing/inlining. It also causes issues when the proxy
+ // method is in another dex file if we try to rewrite invoke-interface to
+ // invoke-virtual because a proxy method doesn't have a real dex file.
+ return nullptr;
+ }
+ return single_impl;
}
bool HInliner::TryInline(HInvoke* invoke_instruction) {
@@ -305,17 +384,18 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
ScopedObjectAccess soa(Thread::Current());
uint32_t method_index = invoke_instruction->GetDexMethodIndex();
const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
- VLOG(compiler) << "Try inlining " << caller_dex_file.PrettyMethod(method_index);
+ LOG_TRY() << caller_dex_file.PrettyMethod(method_index);
- // We can query the dex cache directly. The verifier has populated it already.
ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod();
- ArtMethod* actual_method = nullptr;
if (resolved_method == nullptr) {
DCHECK(invoke_instruction->IsInvokeStaticOrDirect());
DCHECK(invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit());
- VLOG(compiler) << "Not inlining a String.<init> method";
+ LOG_FAIL_NO_STAT() << "Not inlining a String.<init> method";
return false;
- } else if (invoke_instruction->IsInvokeStaticOrDirect()) {
+ }
+ ArtMethod* actual_method = nullptr;
+
+ if (invoke_instruction->IsInvokeStaticOrDirect()) {
actual_method = resolved_method;
} else {
// Check if we can statically find the method.
@@ -328,6 +408,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
if (method != nullptr) {
cha_devirtualize = true;
actual_method = method;
+ LOG_NOTE() << "Try CHA-based inlining of " << actual_method->PrettyMethod();
}
}
@@ -390,16 +471,23 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
: GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache);
switch (inline_cache_type) {
- case kInlineCacheNoData:
- break;
+ case kInlineCacheNoData: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " could not be statically determined";
+ return false;
+ }
- case kInlineCacheUninitialized:
- VLOG(compiler) << "Interface or virtual call to "
- << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
- << " is not hit and not inlined";
+ case kInlineCacheUninitialized: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " is not hit and not inlined";
return false;
+ }
- case kInlineCacheMonomorphic:
+ case kInlineCacheMonomorphic: {
MaybeRecordStat(kMonomorphicCall);
if (outermost_graph_->IsCompilingOsr()) {
// If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
@@ -408,23 +496,29 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
} else {
return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
}
+ }
- case kInlineCachePolymorphic:
+ case kInlineCachePolymorphic: {
MaybeRecordStat(kPolymorphicCall);
return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
+ }
- case kInlineCacheMegamorphic:
- VLOG(compiler) << "Interface or virtual call to "
- << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
- << " is megamorphic and not inlined";
+ case kInlineCacheMegamorphic: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " is megamorphic and not inlined";
MaybeRecordStat(kMegamorphicCall);
return false;
+ }
- case kInlineCacheMissingTypes:
- VLOG(compiler) << "Interface or virtual call to "
- << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
- << " is missing types and not inlined";
+ case kInlineCacheMissingTypes: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " is missing types and not inlined";
return false;
+ }
}
UNREACHABLE();
}
@@ -587,9 +681,10 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
dex::TypeIndex class_index = FindClassIndexIn(
GetMonomorphicType(classes), caller_compilation_unit_);
if (!class_index.IsValid()) {
- VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
- << " from inline cache is not inlined because its class is not"
- << " accessible to the caller";
+ LOG_FAIL(kNotInlinedDexCache)
+ << "Call to " << ArtMethod::PrettyMethod(resolved_method)
+ << " from inline cache is not inlined because its class is not"
+ << " accessible to the caller";
return false;
}
@@ -603,6 +698,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForVirtual(
resolved_method, pointer_size);
}
+ LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod();
DCHECK(resolved_method != nullptr);
HInstruction* receiver = invoke_instruction->InputAt(0);
HInstruction* cursor = invoke_instruction->GetPrevious();
@@ -646,7 +742,8 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction,
HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc);
HInstruction* compare = new (graph_->GetArena()) HNotEqual(
deopt_flag, graph_->GetIntConstant(0, dex_pc));
- HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc);
+ HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(
+ graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc);
if (cursor != nullptr) {
bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
@@ -710,9 +807,16 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
bb_cursor->InsertInstructionAfter(compare, load_class);
if (with_deoptimization) {
HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
- compare, invoke_instruction->GetDexPc());
+ graph_->GetArena(),
+ compare,
+ receiver,
+ HDeoptimize::Kind::kInline,
+ invoke_instruction->GetDexPc());
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+ DCHECK_EQ(invoke_instruction->InputAt(0), receiver);
+ receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
+ deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
}
return compare;
}
@@ -752,6 +856,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
dex::TypeIndex class_index = FindClassIndexIn(handle.Get(), caller_compilation_unit_);
HInstruction* return_replacement = nullptr;
+ LOG_NOTE() << "Try inline polymorphic call to " << method->PrettyMethod();
if (!class_index.IsValid() ||
!TryBuildAndInline(invoke_instruction,
method,
@@ -761,8 +866,8 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
} else {
one_target_inlined = true;
- VLOG(compiler) << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method)
- << " has inlined " << ArtMethod::PrettyMethod(method);
+ LOG_SUCCESS() << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method)
+ << " has inlined " << ArtMethod::PrettyMethod(method);
// If we have inlined all targets before, and this receiver is the last seen,
// we deoptimize instead of keeping the original invoke instruction.
@@ -796,9 +901,10 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
}
if (!one_target_inlined) {
- VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
- << " from inline cache is not inlined because none"
- << " of its targets could be inlined";
+ LOG_FAIL_NO_STAT()
+ << "Call to " << ArtMethod::PrettyMethod(resolved_method)
+ << " from inline cache is not inlined because none"
+ << " of its targets could be inlined";
return false;
}
@@ -932,9 +1038,6 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
actual_method = new_method;
} else if (actual_method != new_method) {
// Different methods, bailout.
- VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
- << " from inline cache is not inlined because it resolves"
- << " to different methods";
return false;
}
}
@@ -988,13 +1091,19 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
} else {
HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
- compare, invoke_instruction->GetDexPc());
+ graph_->GetArena(),
+ compare,
+ receiver,
+ HDeoptimize::Kind::kInline,
+ invoke_instruction->GetDexPc());
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
if (return_replacement != nullptr) {
invoke_instruction->ReplaceWith(return_replacement);
}
+ receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+ deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
}
// Run type propagation to get the guard typed.
@@ -1007,6 +1116,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
MaybeRecordStat(kInlinedPolymorphicCall);
+ LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod();
return true;
}
@@ -1021,11 +1131,23 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) {
if (invoke_instruction->IsInvokeInterface()) {
+ DCHECK(!method->IsProxyMethod());
// Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always
// better than an invoke-interface because:
// 1) In the best case, the interface call has one more indirection (to fetch the IMT).
// 2) We will not go to the conflict trampoline with an invoke-virtual.
// TODO: Consider sharpening once it is not dependent on the compiler driver.
+
+ if (method->IsDefault() && !method->IsCopied()) {
+ // Changing to invoke-virtual cannot be done on an original default method
+ // since it's not in any vtable. Devirtualization by exact type/inline-cache
+ // always uses a method in the iftable which is never an original default
+ // method.
+ // On the other hand, inlining an original default method by CHA is fine.
+ DCHECK(cha_devirtualize);
+ return false;
+ }
+
const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
uint32_t dex_method_index = FindMethodIndexIn(
method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
@@ -1076,13 +1198,34 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
return true;
}
+size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const {
+ const HInliner* current = this;
+ size_t count = 0;
+ do {
+ if (current->graph_->GetArtMethod() == method) {
+ ++count;
+ }
+ current = current->parent_;
+ } while (current != nullptr);
+ return count;
+}
+
bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* method,
ReferenceTypeInfo receiver_type,
HInstruction** return_replacement) {
if (method->IsProxyMethod()) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because of unimplemented inline support for proxy methods.";
+ LOG_FAIL(kNotInlinedProxy)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because of unimplemented inline support for proxy methods.";
+ return false;
+ }
+
+ if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
+ LOG_FAIL(kNotInlinedRecursiveBudget)
+ << "Method "
+ << method->PrettyMethod()
+ << " is not inlined because it has reached its recursive call budget.";
return false;
}
@@ -1091,15 +1234,16 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
if (!compiler_driver_->MayInline(method->GetDexFile(),
outer_compilation_unit_.GetDexFile())) {
if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
- VLOG(compiler) << "Successfully replaced pattern of invoke "
- << method->PrettyMethod();
+ LOG_SUCCESS() << "Successfully replaced pattern of invoke "
+ << method->PrettyMethod();
MaybeRecordStat(kReplacedInvokeWithSimplePattern);
return true;
}
- VLOG(compiler) << "Won't inline " << method->PrettyMethod() << " in "
- << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
- << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
- << method->GetDexFile()->GetLocation();
+ LOG_FAIL(kNotInlinedWont)
+ << "Won't inline " << method->PrettyMethod() << " in "
+ << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
+ << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
+ << method->GetDexFile()->GetLocation();
return false;
}
@@ -1108,30 +1252,32 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
const DexFile::CodeItem* code_item = method->GetCodeItem();
if (code_item == nullptr) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because it is native";
+ LOG_FAIL_NO_STAT()
+ << "Method " << method->PrettyMethod() << " is not inlined because it is native";
return false;
}
size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is too big to inline: "
- << code_item->insns_size_in_code_units_
- << " > "
- << inline_max_code_units;
+ LOG_FAIL(kNotInlinedCodeItem)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because its code item is too big: "
+ << code_item->insns_size_in_code_units_
+ << " > "
+ << inline_max_code_units;
return false;
}
if (code_item->tries_size_ != 0) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because of try block";
+ LOG_FAIL(kNotInlinedTryCatch)
+ << "Method " << method->PrettyMethod() << " is not inlined because of try block";
return false;
}
if (!method->IsCompilable()) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " has soft failures un-handled by the compiler, so it cannot be inlined";
+ LOG_FAIL(kNotInlinedNotVerified)
+ << "Method " << method->PrettyMethod()
+ << " has soft failures un-handled by the compiler, so it cannot be inlined";
}
if (!method->GetDeclaringClass()->IsVerified()) {
@@ -1139,8 +1285,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
if (Runtime::Current()->UseJitCompilation() ||
!compiler_driver_->IsMethodVerifiedWithoutFailures(
method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " couldn't be verified, so it cannot be inlined";
+ LOG_FAIL(kNotInlinedNotVerified)
+ << "Method " << method->PrettyMethod()
+ << " couldn't be verified, so it cannot be inlined";
return false;
}
}
@@ -1149,9 +1296,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
// Case of a static method that cannot be inlined because it implicitly
// requires an initialization check of its declaring class.
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because it is static and requires a clinit"
- << " check that cannot be emitted due to Dex cache limitations";
+ LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod()
+ << " is not inlined because it is static and requires a clinit"
+ << " check that cannot be emitted due to Dex cache limitations";
return false;
}
@@ -1160,7 +1307,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
return false;
}
- VLOG(compiler) << "Successfully inlined " << method->PrettyMethod();
+ LOG_SUCCESS() << method->PrettyMethod();
MaybeRecordStat(kInlinedInvoke);
return true;
}
@@ -1448,15 +1595,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
handles_);
if (builder.BuildGraph() != kAnalysisSuccess) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be built, so cannot be inlined";
+ LOG_FAIL(kNotInlinedCannotBuild)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be built, so cannot be inlined";
return false;
}
if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
compiler_driver_->GetInstructionSet())) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " cannot be inlined because of the register allocator";
+ LOG_FAIL(kNotInlinedRegisterAllocator)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " cannot be inlined because of the register allocator";
return false;
}
@@ -1503,15 +1652,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
/* is_first_run */ false).Run();
}
- size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
- size_t number_of_inlined_instructions =
- RunOptimizations(callee_graph, code_item, dex_compilation_unit);
- number_of_instructions_budget += number_of_inlined_instructions;
+ RunOptimizations(callee_graph, code_item, dex_compilation_unit);
HBasicBlock* exit_block = callee_graph->GetExitBlock();
if (exit_block == nullptr) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it has an infinite loop";
+ LOG_FAIL(kNotInlinedInfiniteLoop)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it has an infinite loop";
return false;
}
@@ -1520,15 +1667,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
if (predecessor->GetLastInstruction()->IsThrow()) {
if (invoke_instruction->GetBlock()->IsTryBlock()) {
// TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because one branch always throws and"
- << " caller is in a try/catch block";
+ LOG_FAIL(kNotInlinedTryCatch)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because one branch always throws and"
+ << " caller is in a try/catch block";
return false;
} else if (graph_->GetExitBlock() == nullptr) {
// TODO(ngeoffray): Support adding HExit in the caller graph.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because one branch always throws and"
- << " caller does not have an exit block";
+ LOG_FAIL(kNotInlinedInfiniteLoop)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because one branch always throws and"
+ << " caller does not have an exit block";
return false;
} else if (graph_->HasIrreducibleLoops()) {
// TODO(ngeoffray): Support re-computing loop information to graphs with
@@ -1544,32 +1693,31 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
}
if (!has_one_return) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it always throws";
+ LOG_FAIL(kNotInlinedAlwaysThrows)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it always throws";
return false;
}
size_t number_of_instructions = 0;
-
- bool can_inline_environment =
- total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
-
// Skip the entry block, it does not contain instructions that prevent inlining.
for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) {
if (block->IsLoopHeader()) {
if (block->GetLoopInformation()->IsIrreducible()) {
// Don't inline methods with irreducible loops, they could prevent some
// optimizations to run.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it contains an irreducible loop";
+ LOG_FAIL(kNotInlinedIrreducibleLoop)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it contains an irreducible loop";
return false;
}
if (!block->GetLoopInformation()->HasExitEdge()) {
// Don't inline methods with loops without exit, since they cause the
// loop information to be computed incorrectly when updating after
// inlining.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it contains a loop with no exit";
+ LOG_FAIL(kNotInlinedLoopWithoutExit)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it contains a loop with no exit";
return false;
}
}
@@ -1577,34 +1725,39 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
for (HInstructionIterator instr_it(block->GetInstructions());
!instr_it.Done();
instr_it.Advance()) {
- if (number_of_instructions++ == number_of_instructions_budget) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " is not inlined because its caller has reached"
- << " its instruction budget limit.";
+ if (++number_of_instructions >= inlining_budget_) {
+ LOG_FAIL(kNotInlinedInstructionBudget)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " is not inlined because the outer method has reached"
+ << " its instruction budget limit.";
return false;
}
HInstruction* current = instr_it.Current();
- if (!can_inline_environment && current->NeedsEnvironment()) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " is not inlined because its caller has reached"
- << " its environment budget limit.";
+ if (current->NeedsEnvironment() &&
+ (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) {
+ LOG_FAIL(kNotInlinedEnvironmentBudget)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " is not inlined because its caller has reached"
+ << " its environment budget limit.";
return false;
}
if (current->NeedsEnvironment() &&
!CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(),
resolved_method)) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because " << current->DebugName()
- << " needs an environment, is in a different dex file"
- << ", and cannot be encoded in the stack maps.";
+ LOG_FAIL(kNotInlinedStackMaps)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because " << current->DebugName()
+ << " needs an environment, is in a different dex file"
+ << ", and cannot be encoded in the stack maps.";
return false;
}
if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because " << current->DebugName()
- << " it is in a different dex file and requires access to the dex cache";
+ LOG_FAIL(kNotInlinedDexCache)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because " << current->DebugName()
+ << " it is in a different dex file and requires access to the dex cache";
return false;
}
@@ -1613,21 +1766,24 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
current->IsUnresolvedStaticFieldSet() ||
current->IsUnresolvedInstanceFieldSet()) {
// Entrypoint for unresolved fields does not handle inlined frames.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it is using an unresolved"
- << " entrypoint";
+ LOG_FAIL(kNotInlinedUnresolvedEntrypoint)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it is using an unresolved"
+ << " entrypoint";
return false;
}
}
}
- number_of_inlined_instructions_ += number_of_instructions;
-
DCHECK_EQ(caller_instruction_counter, graph_->GetCurrentInstructionId())
<< "No instructions can be added to the outer graph while inner graph is being built";
+ // Inline the callee graph inside the caller graph.
const int32_t callee_instruction_counter = callee_graph->GetCurrentInstructionId();
graph_->SetCurrentInstructionId(callee_instruction_counter);
*return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
+ // Update our budget for other inlining attempts in `caller_graph`.
+ total_number_of_instructions_ += number_of_instructions;
+ UpdateInliningBudget();
DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId())
<< "No instructions can be added to the inner graph during inlining into the outer graph";
@@ -1640,9 +1796,9 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
return true;
}
-size_t HInliner::RunOptimizations(HGraph* callee_graph,
- const DexFile::CodeItem* code_item,
- const DexCompilationUnit& dex_compilation_unit) {
+void HInliner::RunOptimizations(HGraph* callee_graph,
+ const DexFile::CodeItem* code_item,
+ const DexCompilationUnit& dex_compilation_unit) {
// Note: if the outermost_graph_ is being compiled OSR, we should not run any
// optimization that could lead to a HDeoptimize. The following optimizations do not.
HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
@@ -1664,23 +1820,37 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph,
optimization->Run();
}
- size_t number_of_inlined_instructions = 0u;
- if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
- HInliner inliner(callee_graph,
- outermost_graph_,
- codegen_,
- outer_compilation_unit_,
- dex_compilation_unit,
- compiler_driver_,
- handles_,
- inline_stats_,
- total_number_of_dex_registers_ + code_item->registers_size_,
- depth_ + 1);
- inliner.Run();
- number_of_inlined_instructions += inliner.number_of_inlined_instructions_;
+ // Bail early for pathological cases on the environment (for example recursive calls,
+ // or too large environment).
+ if (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters) {
+ LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
+ << " will not be inlined because the outer method has reached"
+ << " its environment budget limit.";
+ return;
+ }
+
+ // Bail early if we know we already are over the limit.
+ size_t number_of_instructions = CountNumberOfInstructions(callee_graph);
+ if (number_of_instructions > inlining_budget_) {
+ LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
+ << " will not be inlined because the outer method has reached"
+ << " its instruction budget limit. " << number_of_instructions;
+ return;
}
- return number_of_inlined_instructions;
+ HInliner inliner(callee_graph,
+ outermost_graph_,
+ codegen_,
+ outer_compilation_unit_,
+ dex_compilation_unit,
+ compiler_driver_,
+ handles_,
+ inline_stats_,
+ total_number_of_dex_registers_ + code_item->registers_size_,
+ total_number_of_instructions_ + number_of_instructions,
+ this,
+ depth_ + 1);
+ inliner.Run();
}
static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index a032042c78..9e4685cbf4 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -42,7 +42,9 @@ class HInliner : public HOptimization {
VariableSizedHandleScope* handles,
OptimizingCompilerStats* stats,
size_t total_number_of_dex_registers,
- size_t depth)
+ size_t total_number_of_instructions,
+ HInliner* parent,
+ size_t depth = 0)
: HOptimization(outer_graph, kInlinerPassName, stats),
outermost_graph_(outermost_graph),
outer_compilation_unit_(outer_compilation_unit),
@@ -50,8 +52,10 @@ class HInliner : public HOptimization {
codegen_(codegen),
compiler_driver_(compiler_driver),
total_number_of_dex_registers_(total_number_of_dex_registers),
+ total_number_of_instructions_(total_number_of_instructions),
+ parent_(parent),
depth_(depth),
- number_of_inlined_instructions_(0),
+ inlining_budget_(0),
handles_(handles),
inline_stats_(nullptr) {}
@@ -95,10 +99,10 @@ class HInliner : public HOptimization {
HInstruction** return_replacement);
// Run simple optimizations on `callee_graph`.
- // Returns the number of inlined instructions.
- size_t RunOptimizations(HGraph* callee_graph,
- const DexFile::CodeItem* code_item,
- const DexCompilationUnit& dex_compilation_unit);
+ void RunOptimizations(HGraph* callee_graph,
+ const DexFile::CodeItem* code_item,
+ const DexCompilationUnit& dex_compilation_unit)
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Try to recognize known simple patterns and replace invoke call with appropriate instructions.
bool TryPatternSubstitution(HInvoke* invoke_instruction,
@@ -259,14 +263,30 @@ class HInliner : public HOptimization {
HInstruction* return_replacement,
HInstruction* invoke_instruction);
+ // Update the inlining budget based on `total_number_of_instructions_`.
+ void UpdateInliningBudget();
+
+ // Count the number of calls of `method` being inlined recursively.
+ size_t CountRecursiveCallsOf(ArtMethod* method) const;
+
+ // Pretty-print for spaces during logging.
+ std::string DepthString(int line) const;
+
HGraph* const outermost_graph_;
const DexCompilationUnit& outer_compilation_unit_;
const DexCompilationUnit& caller_compilation_unit_;
CodeGenerator* const codegen_;
CompilerDriver* const compiler_driver_;
const size_t total_number_of_dex_registers_;
+ size_t total_number_of_instructions_;
+
+ // The 'parent' inliner, that means the inlinigng optimization that requested
+ // `graph_` to be inlined.
+ const HInliner* const parent_;
const size_t depth_;
- size_t number_of_inlined_instructions_;
+
+ // The budget left for inlining, in number of instructions.
+ size_t inlining_budget_;
VariableSizedHandleScope* const handles_;
// Used to record stats about optimizations on the inlined graph.
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 17421fc364..60790e5b84 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -2132,6 +2132,9 @@ void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) {
if (cond->IsConstant()) {
if (cond->AsIntConstant()->IsFalse()) {
// Never deopt: instruction can be removed.
+ if (deoptimize->GuardsAnInput()) {
+ deoptimize->ReplaceWith(deoptimize->GuardedInput());
+ }
deoptimize->GetBlock()->RemoveInstruction(deoptimize);
} else {
// Always deopt.
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index b25bad7170..0d933eaf82 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -39,6 +39,7 @@ using helpers::Int32ConstantFrom;
using helpers::LocationFrom;
using helpers::LowRegisterFrom;
using helpers::LowSRegisterFrom;
+using helpers::HighSRegisterFrom;
using helpers::OutputDRegister;
using helpers::OutputSRegister;
using helpers::OutputRegister;
@@ -794,6 +795,58 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
__ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
}
+void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+ if (features_.HasARMv8AInstructions()) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
+
+ ArmVIXLAssembler* assembler = GetAssembler();
+ vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
+ vixl32::Register out_reg = OutputRegister(invoke);
+ vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+ vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+ vixl32::Label done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
+
+ // Round to nearest integer, ties away from zero.
+ __ Vcvta(S32, F32, temp1, in_reg);
+ __ Vmov(out_reg, temp1);
+
+ // For positive, zero or NaN inputs, rounding is done.
+ __ Cmp(out_reg, 0);
+ __ B(ge, final_label, /* far_target */ false);
+
+ // Handle input < 0 cases.
+ // If input is negative but not a tie, previous result (round to nearest) is valid.
+ // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
+ __ Vrinta(F32, F32, temp1, in_reg);
+ __ Vmov(temp2, 0.5);
+ __ Vsub(F32, temp1, in_reg, temp1);
+ __ Vcmp(F32, temp1, temp2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ {
+ // Use ExactAsemblyScope here because we are using IT.
+ ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(eq);
+ __ add(eq, out_reg, out_reg, 1);
+ }
+
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
CreateIntToIntLocations(arena_, invoke);
}
@@ -3100,7 +3153,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
}
UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 5bcfa4c98b..8d15f78cce 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -28,7 +28,18 @@ namespace art {
*/
class LICMTest : public CommonCompilerTest {
public:
- LICMTest() : pool_(), allocator_(&pool_) {
+ LICMTest()
+ : pool_(),
+ allocator_(&pool_),
+ entry_(nullptr),
+ loop_preheader_(nullptr),
+ loop_header_(nullptr),
+ loop_body_(nullptr),
+ return_(nullptr),
+ exit_(nullptr),
+ parameter_(nullptr),
+ int_constant_(nullptr),
+ float_constant_(nullptr) {
graph_ = CreateGraph(&allocator_);
}
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ec706e6694..caada8bccb 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1088,6 +1088,19 @@ void HInstruction::ReplaceWith(HInstruction* other) {
DCHECK(env_uses_.empty());
}
+void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) {
+ const HUseList<HInstruction*>& uses = GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+ HInstruction* user = it->GetUser();
+ size_t index = it->GetIndex();
+ // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+ ++it;
+ if (dominator->StrictlyDominates(user)) {
+ user->ReplaceInput(replacement, index);
+ }
+ }
+}
+
void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
HUserRecord<HInstruction*> input_use = InputRecordAt(index);
if (input_use.GetInstruction() == replacement) {
@@ -1323,6 +1336,18 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) {
}
}
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) {
+ switch (rhs) {
+ case HDeoptimize::Kind::kBCE:
+ return os << "bce";
+ case HDeoptimize::Kind::kInline:
+ return os << "inline";
+ default:
+ LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs);
+ UNREACHABLE();
+ }
+}
+
bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
return this == instruction->GetPreviousDisregardingMoves();
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6881d8f6ae..5f5a28c520 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -341,6 +341,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_current_method_(nullptr),
+ art_method_(nullptr),
inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
osr_(osr),
cha_single_implementation_list_(arena->Adapter(kArenaAllocCHA)) {
@@ -2080,6 +2081,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
void SetLocations(LocationSummary* locations) { locations_ = locations; }
void ReplaceWith(HInstruction* instruction);
+ void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
void ReplaceInput(HInstruction* replacement, size_t index);
// This is almost the same as doing `ReplaceWith()`. But in this helper, the
@@ -2943,28 +2945,97 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> {
};
// Deoptimize to interpreter, upon checking a condition.
-class HDeoptimize FINAL : public HTemplateInstruction<1> {
+class HDeoptimize FINAL : public HVariableInputSizeInstruction {
public:
+ enum class Kind {
+ kBCE,
+ kInline,
+ kLast = kInline
+ };
+
+ // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move
+ // across.
+ HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc)
+ : HVariableInputSizeInstruction(
+ SideEffects::All(),
+ dex_pc,
+ arena,
+ /* number_of_inputs */ 1,
+ kArenaAllocMisc) {
+ SetPackedFlag<kFieldCanBeMoved>(false);
+ SetPackedField<DeoptimizeKindField>(kind);
+ SetRawInputAt(0, cond);
+ }
+
+ // Use this constructor when the `HDeoptimize` guards an instruction, and any user
+ // that relies on the deoptimization to pass should have its input be the `HDeoptimize`
+ // instead of `guard`.
// We set CanTriggerGC to prevent any intermediate address to be live
// at the point of the `HDeoptimize`.
- HDeoptimize(HInstruction* cond, uint32_t dex_pc)
- : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
+ HDeoptimize(ArenaAllocator* arena,
+ HInstruction* cond,
+ HInstruction* guard,
+ Kind kind,
+ uint32_t dex_pc)
+ : HVariableInputSizeInstruction(
+ SideEffects::CanTriggerGC(),
+ dex_pc,
+ arena,
+ /* number_of_inputs */ 2,
+ kArenaAllocMisc) {
+ SetPackedFlag<kFieldCanBeMoved>(true);
+ SetPackedField<DeoptimizeKindField>(kind);
SetRawInputAt(0, cond);
+ SetRawInputAt(1, guard);
}
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
+ bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); }
+
+ bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind());
}
+
bool NeedsEnvironment() const OVERRIDE { return true; }
+
bool CanThrow() const OVERRIDE { return true; }
+ Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); }
+
+ Primitive::Type GetType() const OVERRIDE {
+ return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid;
+ }
+
+ bool GuardsAnInput() const {
+ return InputCount() == 2;
+ }
+
+ HInstruction* GuardedInput() const {
+ DCHECK(GuardsAnInput());
+ return InputAt(1);
+ }
+
+ void RemoveGuard() {
+ RemoveInputAt(1);
+ }
+
DECLARE_INSTRUCTION(Deoptimize);
private:
+ static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits;
+ static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1;
+ static constexpr size_t kFieldDeoptimizeKindSize =
+ MinimumBitsToStore(static_cast<size_t>(Kind::kLast));
+ static constexpr size_t kNumberOfDeoptimizePackedBits =
+ kFieldDeoptimizeKind + kFieldDeoptimizeKindSize;
+ static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits,
+ "Too many packed fields.");
+ using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
+
DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
};
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs);
+
// Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
// The compiled code checks this flag value in a guard before devirtualized call and
// if it's true, starts to do deoptimization.
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index d84fe6ccff..60af2b4201 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -174,53 +174,45 @@ static constexpr uint8_t expected_cfi_kMips[] = {
// 0x00000034: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kMips64[] = {
- 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
- 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
- 0xE8, 0xFF, 0xBD, 0x67, 0x18, 0x00, 0xBD, 0x67,
- 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF,
- 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67,
- 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+ 0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF,
+ 0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7,
+ 0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF,
+ 0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67,
+ 0x00, 0x00, 0x1F, 0xD8,
};
-
static constexpr uint8_t expected_cfi_kMips64[] = {
- 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
- 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x0A, 0x44,
- 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
- 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+ 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+ 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44,
+ 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
};
-// 0x00000000: daddiu r29, r29, -40
-// 0x00000004: .cfi_def_cfa_offset: 40
-// 0x00000004: sd r31, +32(r29)
+// 0x00000000: daddiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sd r31, +56(r29)
// 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +24(r29)
+// 0x00000008: sd r17, +48(r29)
// 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +16(r29)
+// 0x0000000c: sd r16, +40(r29)
// 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000010: sdc1 f25, +32(r29)
// 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000014: sdc1 f24, +24(r29)
// 0x00000018: .cfi_offset: r56 at cfa-40
-// 0x00000018: daddiu r29, r29, -24
-// 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: .cfi_remember_state
-// 0x0000001c: daddiu r29, r29, 24
-// 0x00000020: .cfi_def_cfa_offset: 40
-// 0x00000020: ldc1 f24, +0(r29)
-// 0x00000024: .cfi_restore: r56
-// 0x00000024: ldc1 f25, +8(r29)
+// 0x00000018: .cfi_remember_state
+// 0x00000018: ld r31, +56(r29)
+// 0x0000001c: .cfi_restore: r31
+// 0x0000001c: ld r17, +48(r29)
+// 0x00000020: .cfi_restore: r17
+// 0x00000020: ld r16, +40(r29)
+// 0x00000024: .cfi_restore: r16
+// 0x00000024: ldc1 f25, +32(r29)
// 0x00000028: .cfi_restore: r57
-// 0x00000028: ld r16, +16(r29)
-// 0x0000002c: .cfi_restore: r16
-// 0x0000002c: ld r17, +24(r29)
-// 0x00000030: .cfi_restore: r17
-// 0x00000030: ld r31, +32(r29)
-// 0x00000034: .cfi_restore: r31
-// 0x00000034: daddiu r29, r29, 40
-// 0x00000038: .cfi_def_cfa_offset: 0
-// 0x00000038: jr r31
-// 0x0000003c: nop
-// 0x00000040: .cfi_restore_state
-// 0x00000040: .cfi_def_cfa_offset: 64
+// 0x00000028: ldc1 f24, +24(r29)
+// 0x0000002c: .cfi_restore: r56
+// 0x0000002c: daddiu r29, r29, 64
+// 0x00000030: .cfi_def_cfa_offset: 0
+// 0x00000030: jic r31, 0
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
#ifdef ART_USE_OLD_ARM_BACKEND
@@ -403,58 +395,52 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = {
// 0x00020060: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
- 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
- 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
- 0xE8, 0xFF, 0xBD, 0x67, 0x02, 0x00, 0xA6, 0x60,
- 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
+ 0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF,
+ 0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7,
+ 0x02, 0x00, 0xA6, 0x60, 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
};
static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = {
- 0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7,
- 0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF,
- 0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+ 0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF,
+ 0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67,
+ 0x00, 0x00, 0x1F, 0xD8,
};
static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
- 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
- 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x10, 0x00,
- 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0,
- 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+ 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+ 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x04, 0x10, 0x00, 0x02, 0x00, 0x0A,
+ 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E,
+ 0x00, 0x44, 0x0B, 0x0E, 0x40,
};
-// 0x00000000: daddiu r29, r29, -40
-// 0x00000004: .cfi_def_cfa_offset: 40
-// 0x00000004: sd r31, +32(r29)
+// 0x00000000: daddiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sd r31, +56(r29)
// 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +24(r29)
+// 0x00000008: sd r17, +48(r29)
// 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +16(r29)
+// 0x0000000c: sd r16, +40(r29)
// 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000010: sdc1 f25, +32(r29)
// 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000014: sdc1 f24, +24(r29)
// 0x00000018: .cfi_offset: r56 at cfa-40
-// 0x00000018: daddiu r29, r29, -24
-// 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: bnec r5, r6, 0x0000002c ; +12
-// 0x00000020: auipc r1, 2
-// 0x00000024: jic r1, 12 ; b 0x00020030 ; +131080
-// 0x00000028: nop
+// 0x00000018: bnec r5, r6, 0x00000024 ; +12
+// 0x0000001c: auipc r1, 2
+// 0x00000020: jic r1, 12 ; bc 0x00020028 ; +131080
+// 0x00000024: nop
// ...
-// 0x00020028: nop
-// 0x0002002c: .cfi_remember_state
-// 0x0002002c: daddiu r29, r29, 24
-// 0x00020030: .cfi_def_cfa_offset: 40
-// 0x00020030: ldc1 f24, +0(r29)
-// 0x00020034: .cfi_restore: r56
-// 0x00020034: ldc1 f25, +8(r29)
+// 0x00020024: nop
+// 0x00020028: .cfi_remember_state
+// 0x00020028: ld r31, +56(r29)
+// 0x0002002c: .cfi_restore: r31
+// 0x0002002c: ld r17, +48(r29)
+// 0x00020030: .cfi_restore: r17
+// 0x00020030: ld r16, +40(r29)
+// 0x00020034: .cfi_restore: r16
+// 0x00020034: ldc1 f25, +32(r29)
// 0x00020038: .cfi_restore: r57
-// 0x00020038: ld r16, +16(r29)
-// 0x0002003c: .cfi_restore: r16
-// 0x0002003c: ld r17, +24(r29)
-// 0x00020040: .cfi_restore: r17
-// 0x00020040: ld r31, +32(r29)
-// 0x00020044: .cfi_restore: r31
-// 0x00020044: daddiu r29, r29, 40
-// 0x00020047: .cfi_def_cfa_offset: 0
-// 0x00020048: jr r31
-// 0x0002004c: nop
-// 0x00020050: .cfi_restore_state
-// 0x00020050: .cfi_def_cfa_offset: 64
+// 0x00020038: ldc1 f24, +24(r29)
+// 0x0002003c: .cfi_restore: r56
+// 0x0002003c: daddiu r29, r29, 64
+// 0x00020040: .cfi_def_cfa_offset: 0
+// 0x00020040: jic r31, 0
+// 0x00020044: .cfi_restore_state
+// 0x00020044: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 23ccd9e953..3c6d2d64a9 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -499,7 +499,8 @@ static HOptimization* BuildOptimization(
handles,
stats,
number_of_dex_registers,
- /* depth */ 0);
+ /* total_number_of_instructions */ 0,
+ /* parent */ nullptr);
} else if (opt_name == HSharpening::kSharpeningPassName) {
return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles);
} else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
@@ -607,8 +608,7 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
VariableSizedHandleScope* handles) const {
OptimizingCompilerStats* stats = compilation_stats_.get();
const CompilerOptions& compiler_options = driver->GetCompilerOptions();
- bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
- && (compiler_options.GetInlineMaxCodeUnits() > 0);
+ bool should_inline = (compiler_options.GetInlineMaxCodeUnits() > 0);
if (!should_inline) {
return;
}
@@ -623,7 +623,8 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
handles,
stats,
number_of_dex_registers,
- /* depth */ 0);
+ /* total_number_of_instructions */ 0,
+ /* parent */ nullptr);
HOptimization* optimizations[] = { inliner };
RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index ae9a8119a7..a211c5472a 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -69,6 +69,23 @@ enum MethodCompilationStat {
kExplicitNullCheckGenerated,
kSimplifyIf,
kInstructionSunk,
+ kNotInlinedUnresolvedEntrypoint,
+ kNotInlinedDexCache,
+ kNotInlinedStackMaps,
+ kNotInlinedEnvironmentBudget,
+ kNotInlinedInstructionBudget,
+ kNotInlinedLoopWithoutExit,
+ kNotInlinedIrreducibleLoop,
+ kNotInlinedAlwaysThrows,
+ kNotInlinedInfiniteLoop,
+ kNotInlinedTryCatch,
+ kNotInlinedRegisterAllocator,
+ kNotInlinedCannotBuild,
+ kNotInlinedNotVerified,
+ kNotInlinedCodeItem,
+ kNotInlinedWont,
+ kNotInlinedRecursiveBudget,
+ kNotInlinedProxy,
kLastStat
};
@@ -168,6 +185,23 @@ class OptimizingCompilerStats {
case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
case kSimplifyIf: name = "SimplifyIf"; break;
case kInstructionSunk: name = "InstructionSunk"; break;
+ case kNotInlinedUnresolvedEntrypoint: name = "NotInlinedUnresolvedEntrypoint"; break;
+ case kNotInlinedDexCache: name = "NotInlinedDexCache"; break;
+ case kNotInlinedStackMaps: name = "NotInlinedStackMaps"; break;
+ case kNotInlinedEnvironmentBudget: name = "NotInlinedEnvironmentBudget"; break;
+ case kNotInlinedInstructionBudget: name = "NotInlinedInstructionBudget"; break;
+ case kNotInlinedLoopWithoutExit: name = "NotInlinedLoopWithoutExit"; break;
+ case kNotInlinedIrreducibleLoop: name = "NotInlinedIrreducibleLoop"; break;
+ case kNotInlinedAlwaysThrows: name = "NotInlinedAlwaysThrows"; break;
+ case kNotInlinedInfiniteLoop: name = "NotInlinedInfiniteLoop"; break;
+ case kNotInlinedTryCatch: name = "NotInlinedTryCatch"; break;
+ case kNotInlinedRegisterAllocator: name = "NotInlinedRegisterAllocator"; break;
+ case kNotInlinedCannotBuild: name = "NotInlinedCannotBuild"; break;
+ case kNotInlinedNotVerified: name = "NotInlinedNotVerified"; break;
+ case kNotInlinedCodeItem: name = "NotInlinedCodeItem"; break;
+ case kNotInlinedWont: name = "NotInlinedWont"; break;
+ case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break;
+ case kNotInlinedProxy: name = "NotInlinedProxy"; break;
case kLastStat:
LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index efbaf6c221..66bfea9860 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -40,6 +40,14 @@ void PrepareForRegisterAllocation::VisitDivZeroCheck(HDivZeroCheck* check) {
check->ReplaceWith(check->InputAt(0));
}
+void PrepareForRegisterAllocation::VisitDeoptimize(HDeoptimize* deoptimize) {
+ if (deoptimize->GuardsAnInput()) {
+ // Replace the uses with the actual guarded instruction.
+ deoptimize->ReplaceWith(deoptimize->GuardedInput());
+ deoptimize->RemoveGuard();
+ }
+}
+
void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
check->ReplaceWith(check->InputAt(0));
if (check->IsStringCharAt()) {
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index c128227654..7ffbe44ef6 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -44,6 +44,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
void VisitCondition(HCondition* condition) OVERRIDE;
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+ void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const;
bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 6e332ca59b..d5637b9b75 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -310,8 +310,8 @@ static void BoundTypeForClassCheck(HInstruction* check) {
BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti);
} else {
DCHECK(check->IsDeoptimize());
- if (compare->IsEqual()) {
- BoundTypeIn(receiver, check->GetBlock(), check, class_rti);
+ if (compare->IsEqual() && check->AsDeoptimize()->GuardsAnInput()) {
+ check->SetReferenceTypeInfo(class_rti);
}
}
}
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index 84a4bab1a9..0b49ce1a4c 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -29,7 +29,7 @@ namespace art {
*/
class ReferenceTypePropagationTest : public CommonCompilerTest {
public:
- ReferenceTypePropagationTest() : pool_(), allocator_(&pool_) {
+ ReferenceTypePropagationTest() : pool_(), allocator_(&pool_), propagation_(nullptr) {
graph_ = CreateGraph(&allocator_);
}
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index ab0dad4300..9236a0e4fa 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -315,7 +315,10 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor {
// This class and its sub-classes will never be used to drive a visit of an
// `HGraph` but only to visit `HInstructions` one at a time, so we do not need
// to pass a valid graph to `HGraphDelegateVisitor()`.
- SchedulingLatencyVisitor() : HGraphDelegateVisitor(nullptr) {}
+ SchedulingLatencyVisitor()
+ : HGraphDelegateVisitor(nullptr),
+ last_visited_latency_(0),
+ last_visited_internal_latency_(0) {}
void VisitInstruction(HInstruction* instruction) OVERRIDE {
LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". "
@@ -413,6 +416,7 @@ class HScheduler {
selector_(selector),
only_optimize_loop_blocks_(true),
scheduling_graph_(this, arena),
+ cursor_(nullptr),
candidates_(arena_->Adapter(kArenaAllocScheduler)) {}
virtual ~HScheduler() {}
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index 1916c73ca4..a1016d1d47 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -189,13 +189,14 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
// Use HAboveOrEqual+HDeoptimize as the bounds check.
HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length);
block->AddInstruction(ae);
- HInstruction* deoptimize = new(&allocator_) HDeoptimize(ae, /* dex_pc */ 0u);
+ HInstruction* deoptimize =
+ new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u);
block->AddInstruction(deoptimize);
HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_,
- /* number_of_vregs */ 5,
- /* method */ nullptr,
- /* dex_pc */ 0u,
- deoptimize);
+ /* number_of_vregs */ 5,
+ /* method */ nullptr,
+ /* dex_pc */ 0u,
+ deoptimize);
deoptimize_env->CopyFrom(args);
deoptimize->SetRawEnvironment(deoptimize_env);
HInstruction* array_set =
diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h
index d71c2fe997..ad3a099eb6 100644
--- a/compiler/utils/atomic_method_ref_map-inl.h
+++ b/compiler/utils/atomic_method_ref_map-inl.h
@@ -42,7 +42,7 @@ template <typename T>
inline bool AtomicMethodRefMap<T>::Get(MethodReference ref, T* out) const {
const ElementArray* const array = GetArray(ref.dex_file);
if (array == nullptr) {
- return kInsertResultInvalidDexFile;
+ return false;
}
*out = (*array)[ref.dex_method_index].LoadRelaxed();
return true;
diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc
index dea396e4a7..42d061ec15 100644
--- a/compiler/utils/mips64/managed_register_mips64.cc
+++ b/compiler/utils/mips64/managed_register_mips64.cc
@@ -26,6 +26,11 @@ bool Mips64ManagedRegister::Overlaps(const Mips64ManagedRegister& other) const {
CHECK(IsValidManagedRegister());
CHECK(other.IsValidManagedRegister());
if (Equals(other)) return true;
+ if (IsFpuRegister() && other.IsVectorRegister()) {
+ return (AsFpuRegister() == other.AsOverlappingFpuRegister());
+ } else if (IsVectorRegister() && other.IsFpuRegister()) {
+ return (AsVectorRegister() == other.AsOverlappingVectorRegister());
+ }
return false;
}
@@ -36,6 +41,8 @@ void Mips64ManagedRegister::Print(std::ostream& os) const {
os << "GPU: " << static_cast<int>(AsGpuRegister());
} else if (IsFpuRegister()) {
os << "FpuRegister: " << static_cast<int>(AsFpuRegister());
+ } else if (IsVectorRegister()) {
+ os << "VectorRegister: " << static_cast<int>(AsVectorRegister());
} else {
os << "??: " << RegId();
}
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index c9f95569cf..3980199b1e 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -30,11 +30,27 @@ const int kNumberOfGpuAllocIds = kNumberOfGpuRegisters;
const int kNumberOfFpuRegIds = kNumberOfFpuRegisters;
const int kNumberOfFpuAllocIds = kNumberOfFpuRegisters;
-const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds;
-const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds;
-
-// An instance of class 'ManagedRegister' represents a single GPU register (enum
-// Register) or a double precision FP register (enum FpuRegister)
+const int kNumberOfVecRegIds = kNumberOfVectorRegisters;
+const int kNumberOfVecAllocIds = kNumberOfVectorRegisters;
+
+const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds + kNumberOfVecRegIds;
+const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds + kNumberOfVecAllocIds;
+
+// Register ids map:
+// [0..R[ core registers (enum GpuRegister)
+// [R..F[ floating-point registers (enum FpuRegister)
+// [F..W[ MSA vector registers (enum VectorRegister)
+// where
+// R = kNumberOfGpuRegIds
+// F = R + kNumberOfFpuRegIds
+// W = F + kNumberOfVecRegIds
+
+// An instance of class 'ManagedRegister' represents a single Mips64 register.
+// A register can be one of the following:
+// * core register (enum GpuRegister)
+// * floating-point register (enum FpuRegister)
+// * MSA vector register (enum VectorRegister)
+//
// 'ManagedRegister::NoRegister()' provides an invalid register.
// There is a one-to-one mapping between ManagedRegister and register id.
class Mips64ManagedRegister : public ManagedRegister {
@@ -49,6 +65,21 @@ class Mips64ManagedRegister : public ManagedRegister {
return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds);
}
+ constexpr VectorRegister AsVectorRegister() const {
+ CHECK(IsVectorRegister());
+ return static_cast<VectorRegister>(id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegisters));
+ }
+
+ constexpr FpuRegister AsOverlappingFpuRegister() const {
+ CHECK(IsValidManagedRegister());
+ return static_cast<FpuRegister>(AsVectorRegister());
+ }
+
+ constexpr VectorRegister AsOverlappingVectorRegister() const {
+ CHECK(IsValidManagedRegister());
+ return static_cast<VectorRegister>(AsFpuRegister());
+ }
+
constexpr bool IsGpuRegister() const {
CHECK(IsValidManagedRegister());
return (0 <= id_) && (id_ < kNumberOfGpuRegIds);
@@ -60,6 +91,12 @@ class Mips64ManagedRegister : public ManagedRegister {
return (0 <= test) && (test < kNumberOfFpuRegIds);
}
+ constexpr bool IsVectorRegister() const {
+ CHECK(IsValidManagedRegister());
+ const int test = id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegIds);
+ return (0 <= test) && (test < kNumberOfVecRegIds);
+ }
+
void Print(std::ostream& os) const;
// Returns true if the two managed-registers ('this' and 'other') overlap.
@@ -77,6 +114,11 @@ class Mips64ManagedRegister : public ManagedRegister {
return FromRegId(r + kNumberOfGpuRegIds);
}
+ static constexpr Mips64ManagedRegister FromVectorRegister(VectorRegister r) {
+ CHECK_NE(r, kNoVectorRegister);
+ return FromRegId(r + kNumberOfGpuRegIds + kNumberOfFpuRegIds);
+ }
+
private:
constexpr bool IsValidManagedRegister() const {
return (0 <= id_) && (id_ < kNumberOfRegIds);
diff --git a/compiler/utils/mips64/managed_register_mips64_test.cc b/compiler/utils/mips64/managed_register_mips64_test.cc
new file mode 100644
index 0000000000..8b72d7e61d
--- /dev/null
+++ b/compiler/utils/mips64/managed_register_mips64_test.cc
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "managed_register_mips64.h"
+#include "globals.h"
+#include "gtest/gtest.h"
+
+namespace art {
+namespace mips64 {
+
+TEST(Mips64ManagedRegister, NoRegister) {
+ Mips64ManagedRegister reg = ManagedRegister::NoRegister().AsMips64();
+ EXPECT_TRUE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.Overlaps(reg));
+}
+
+TEST(Mips64ManagedRegister, GpuRegister) {
+ Mips64ManagedRegister reg = Mips64ManagedRegister::FromGpuRegister(ZERO);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(ZERO, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(AT);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(AT, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(V0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(V0, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(A0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(A0, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(A7);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(A7, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(T0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(T0, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(T3);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(T3, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(S0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(S0, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(GP);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(GP, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(SP);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(SP, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(RA);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(RA, reg.AsGpuRegister());
+}
+
+TEST(Mips64ManagedRegister, FpuRegister) {
+ Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0);
+ Mips64ManagedRegister vreg = Mips64ManagedRegister::FromVectorRegister(W0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_TRUE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(vreg));
+ EXPECT_EQ(F0, reg.AsFpuRegister());
+ EXPECT_EQ(W0, reg.AsOverlappingVectorRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F1);
+ vreg = Mips64ManagedRegister::FromVectorRegister(W1);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_TRUE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(vreg));
+ EXPECT_EQ(F1, reg.AsFpuRegister());
+ EXPECT_EQ(W1, reg.AsOverlappingVectorRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F20);
+ vreg = Mips64ManagedRegister::FromVectorRegister(W20);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_TRUE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(vreg));
+ EXPECT_EQ(F20, reg.AsFpuRegister());
+ EXPECT_EQ(W20, reg.AsOverlappingVectorRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F20)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F31);
+ vreg = Mips64ManagedRegister::FromVectorRegister(W31);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_TRUE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(vreg));
+ EXPECT_EQ(F31, reg.AsFpuRegister());
+ EXPECT_EQ(W31, reg.AsOverlappingVectorRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+}
+
+TEST(Mips64ManagedRegister, VectorRegister) {
+ Mips64ManagedRegister reg = Mips64ManagedRegister::FromVectorRegister(W0);
+ Mips64ManagedRegister freg = Mips64ManagedRegister::FromFpuRegister(F0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_TRUE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(freg));
+ EXPECT_EQ(W0, reg.AsVectorRegister());
+ EXPECT_EQ(F0, reg.AsOverlappingFpuRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W2);
+ freg = Mips64ManagedRegister::FromFpuRegister(F2);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_TRUE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(freg));
+ EXPECT_EQ(W2, reg.AsVectorRegister());
+ EXPECT_EQ(F2, reg.AsOverlappingFpuRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W2)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W13);
+ freg = Mips64ManagedRegister::FromFpuRegister(F13);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_TRUE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(freg));
+ EXPECT_EQ(W13, reg.AsVectorRegister());
+ EXPECT_EQ(F13, reg.AsOverlappingFpuRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W13)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W29);
+ freg = Mips64ManagedRegister::FromFpuRegister(F29);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_TRUE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(freg));
+ EXPECT_EQ(W29, reg.AsVectorRegister());
+ EXPECT_EQ(F29, reg.AsOverlappingFpuRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W29)));
+}
+
+TEST(Mips64ManagedRegister, Equals) {
+ ManagedRegister no_reg = ManagedRegister::NoRegister();
+ EXPECT_TRUE(no_reg.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_ZERO = Mips64ManagedRegister::FromGpuRegister(ZERO);
+ EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_TRUE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_A1 = Mips64ManagedRegister::FromGpuRegister(A1);
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_TRUE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_S2 = Mips64ManagedRegister::FromGpuRegister(S2);
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+ EXPECT_TRUE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_F0 = Mips64ManagedRegister::FromFpuRegister(F0);
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_TRUE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_F31 = Mips64ManagedRegister::FromFpuRegister(F31);
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+ EXPECT_TRUE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_W0 = Mips64ManagedRegister::FromVectorRegister(W0);
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_TRUE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W1)));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ Mips64ManagedRegister reg_W31 = Mips64ManagedRegister::FromVectorRegister(W31);
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W1)));
+ EXPECT_TRUE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W31)));
+}
+
+TEST(Mips64ManagedRegister, Overlaps) {
+ Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0);
+ Mips64ManagedRegister reg_o = Mips64ManagedRegister::FromVectorRegister(W0);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(F0, reg_o.AsOverlappingFpuRegister());
+ EXPECT_EQ(W0, reg.AsOverlappingVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F4);
+ reg_o = Mips64ManagedRegister::FromVectorRegister(W4);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(F4, reg_o.AsOverlappingFpuRegister());
+ EXPECT_EQ(W4, reg.AsOverlappingVectorRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F16);
+ reg_o = Mips64ManagedRegister::FromVectorRegister(W16);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(F16, reg_o.AsOverlappingFpuRegister());
+ EXPECT_EQ(W16, reg.AsOverlappingVectorRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F31);
+ reg_o = Mips64ManagedRegister::FromVectorRegister(W31);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(F31, reg_o.AsOverlappingFpuRegister());
+ EXPECT_EQ(W31, reg.AsOverlappingVectorRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W0);
+ reg_o = Mips64ManagedRegister::FromFpuRegister(F0);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(W0, reg_o.AsOverlappingVectorRegister());
+ EXPECT_EQ(F0, reg.AsOverlappingFpuRegister());
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W4);
+ reg_o = Mips64ManagedRegister::FromFpuRegister(F4);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(W4, reg_o.AsOverlappingVectorRegister());
+ EXPECT_EQ(F4, reg.AsOverlappingFpuRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W16);
+ reg_o = Mips64ManagedRegister::FromFpuRegister(F16);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(W16, reg_o.AsOverlappingVectorRegister());
+ EXPECT_EQ(F16, reg.AsOverlappingFpuRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W31);
+ reg_o = Mips64ManagedRegister::FromFpuRegister(F31);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(W31, reg_o.AsOverlappingVectorRegister());
+ EXPECT_EQ(F31, reg.AsOverlappingFpuRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromGpuRegister(ZERO);
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromGpuRegister(A0);
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromGpuRegister(S0);
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromGpuRegister(RA);
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+}
+
+} // namespace mips64
+} // namespace art
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 4bfc84990d..fa7e98586c 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -18,21 +18,21 @@
#include "verifier/verifier_deps.h"
#include "class_linker.h"
-#include "compiler/common_compiler_test.h"
-#include "compiler/dex/verification_results.h"
-#include "compiler/dex/verified_method.h"
-#include "compiler/driver/compiler_options.h"
-#include "compiler/driver/compiler_driver.h"
-#include "compiler/utils/atomic_method_ref_map-inl.h"
+#include "common_compiler_test.h"
#include "compiler_callbacks.h"
+#include "dex/verification_results.h"
+#include "dex/verified_method.h"
#include "dex_file.h"
#include "dex_file_types.h"
+#include "driver/compiler_options.h"
+#include "driver/compiler_driver.h"
#include "handle_scope-inl.h"
#include "verifier/method_verifier-inl.h"
#include "mirror/class_loader.h"
#include "runtime.h"
#include "thread.h"
#include "scoped_thread_state_change-inl.h"
+#include "utils/atomic_method_ref_map-inl.h"
namespace art {
namespace verifier {