diff options
144 files changed, 4969 insertions, 3096 deletions
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index bd13d1622c..a679ac2548 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -158,6 +158,10 @@ art_clang_cflags += -Wdeprecated # Enable warning for unreachable break & return. art_clang_cflags += -Wunreachable-code-break -Wunreachable-code-return +# Bug: http://b/29823425 Disable -Wconstant-conversion and +# -Wundefined-var-template for Clang update to r271374 +art_clang_cflags += -Wno-constant-conversion -Wno-undefined-var-template + # Enable missing-noreturn only on non-Mac. As lots of things are not implemented for Apple, it's # a pain. ifneq ($(HOST_OS),darwin) diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk index b1644df529..e213dc4750 100644 --- a/build/Android.common_path.mk +++ b/build/Android.common_path.mk @@ -38,7 +38,7 @@ ART_TARGET_TEST_OUT := $(TARGET_OUT_DATA)/art-test ifneq ($(TMPDIR),) ART_HOST_TEST_DIR := $(TMPDIR)/test-art-$(shell echo $$PPID) else -ART_HOST_TEST_DIR := /tmp/test-art-$(shell echo $$PPID) +ART_HOST_TEST_DIR := /tmp/$(USER)/test-art-$(shell echo $$PPID) endif # core.oat location on the device. diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index cc96cf0a9e..7f8fa8e391 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -71,7 +71,7 @@ ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MultiDex MyClass Nested Stati ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex ART_GTEST_dex_cache_test_DEX_DEPS := Main ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested -ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) +ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) Statics ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h index 9b4042c590..f05648c58f 100644 --- a/cmdline/cmdline_types.h +++ b/cmdline/cmdline_types.h @@ -462,7 +462,7 @@ static gc::CollectorType ParseCollectorType(const std::string& option) { struct XGcOption { // These defaults are used when the command line arguments for -Xgc: // are either omitted completely or partially. - gc::CollectorType collector_type_ = kUseReadBarrier ? + gc::CollectorType collector_type_ = kUseReadBarrier ? // If RB is enabled (currently a build-time decision), // use CC as the default GC. gc::kCollectorTypeCC : @@ -473,6 +473,7 @@ struct XGcOption { bool verify_pre_gc_rosalloc_ = kIsDebugBuild; bool verify_pre_sweeping_rosalloc_ = false; bool verify_post_gc_rosalloc_ = false; + bool measure_ = kIsDebugBuild; bool gcstress_ = false; }; @@ -515,6 +516,8 @@ struct CmdlineType<XGcOption> : CmdlineTypeParser<XGcOption> { xgc.gcstress_ = true; } else if (gc_option == "nogcstress") { xgc.gcstress_ = false; + } else if (gc_option == "measure") { + xgc.measure_ = true; } else if ((gc_option == "precise") || (gc_option == "noprecise") || (gc_option == "verifycardtable") || diff --git a/compiler/Android.mk b/compiler/Android.mk index 689f2d028e..e3f8a5cf7f 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -68,6 +68,8 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/prepare_for_register_allocation.cc \ optimizing/reference_type_propagation.cc \ optimizing/register_allocator.cc \ + optimizing/register_allocation_resolver.cc \ + optimizing/register_allocator_linear_scan.cc \ optimizing/select_generator.cc \ optimizing/sharpening.cc \ optimizing/side_effects_analysis.cc \ diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 1fc247faf1..8aefd9ea1f 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -533,9 +533,6 @@ class BCEVisitor : public HGraphVisitor { first_index_bounds_check_map_( std::less<int>(), graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), - dynamic_bce_standby_( - graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), - record_dynamic_bce_standby_(true), early_exit_loop_( std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), @@ -560,14 +557,6 @@ class BCEVisitor : public HGraphVisitor { } void Finish() { - // Retry dynamic bce candidates on standby that are still in the graph. - record_dynamic_bce_standby_ = false; - for (HBoundsCheck* bounds_check : dynamic_bce_standby_) { - if (bounds_check->IsInBlock()) { - TryDynamicBCE(bounds_check); - } - } - // Preserve SSA structure which may have been broken by adding one or more // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()). InsertPhiNodes(); @@ -576,7 +565,6 @@ class BCEVisitor : public HGraphVisitor { early_exit_loop_.clear(); taken_test_loop_.clear(); finite_loop_.clear(); - dynamic_bce_standby_.clear(); } private: @@ -832,7 +820,6 @@ class BCEVisitor : public HGraphVisitor { array_length->IsArrayLength() || array_length->IsPhi()); bool try_dynamic_bce = true; - // Analyze index range. if (!index->IsIntConstant()) { // Non-constant index. @@ -896,10 +883,20 @@ class BCEVisitor : public HGraphVisitor { // If static analysis fails, and OOB is not certain, try dynamic elimination. if (try_dynamic_bce) { // Try loop-based dynamic elimination. - if (TryDynamicBCE(bounds_check)) { + HLoopInformation* loop = bounds_check->GetBlock()->GetLoopInformation(); + bool needs_finite_test = false; + bool needs_taken_test = false; + if (DynamicBCESeemsProfitable(loop, bounds_check->GetBlock()) && + induction_range_.CanGenerateCode( + bounds_check, index, &needs_finite_test, &needs_taken_test) && + CanHandleInfiniteLoop(loop, index, needs_finite_test) && + // Do this test last, since it may generate code. + CanHandleLength(loop, array_length, needs_taken_test)) { + TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + TransformLoopForDynamicBCE(loop, bounds_check); return; } - // Prepare dominator-based dynamic elimination. + // Otherwise, prepare dominator-based dynamic elimination. if (first_index_bounds_check_map_.find(array_length->GetId()) == first_index_bounds_check_map_.end()) { // Remember the first bounds check against each array_length. That bounds check @@ -1180,7 +1177,7 @@ class BCEVisitor : public HGraphVisitor { } } - // Perform dominator-based dynamic elimination on suitable set of bounds checks. + /** Performs dominator-based dynamic elimination on suitable set of bounds checks. */ void AddCompareWithDeoptimization(HBasicBlock* block, HInstruction* array_length, HInstruction* base, @@ -1190,6 +1187,12 @@ class BCEVisitor : public HGraphVisitor { // Construct deoptimization on single or double bounds on range [base-min_c,base+max_c], // for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1 // and base+3, since we made the assumption any in between value may occur too. + // In code, using unsigned comparisons: + // (1) constants only + // if (max_c >= a.length) deoptimize; + // (2) general case + // if (base-min_c > base+max_c) deoptimize; + // if (base+max_c >= a.length ) deoptimize; static_assert(kMaxLengthForAddingDeoptimize < std::numeric_limits<int32_t>::max(), "Incorrect max length may be subject to arithmetic wrap-around"); HInstruction* upper = GetGraph()->GetIntConstant(max_c); @@ -1208,7 +1211,7 @@ class BCEVisitor : public HGraphVisitor { has_dom_based_dynamic_bce_ = true; } - // Attempt dominator-based dynamic elimination on remaining candidates. + /** Attempts dominator-based dynamic elimination on remaining candidates. */ void AddComparesWithDeoptimization(HBasicBlock* block) { for (const auto& entry : first_index_bounds_check_map_) { HBoundsCheck* bounds_check = entry.second; @@ -1272,17 +1275,19 @@ class BCEVisitor : public HGraphVisitor { candidates.push_back(other_bounds_check); } } - // Perform dominator-based deoptimization if it seems profitable. Note that we reject cases - // where the distance min_c:max_c range gets close to the maximum possible array length, - // since those cases are likely to always deopt (such situations do not necessarily go - // OOB, though, since the programmer could rely on wrap-around from max to min). + // Perform dominator-based deoptimization if it seems profitable, where we eliminate + // bounds checks and replace these with deopt checks that guard against any possible + // OOB. Note that we reject cases where the distance min_c:max_c range gets close to + // the maximum possible array length, since those cases are likely to always deopt + // (such situations do not necessarily go OOB, though, since the array could be really + // large, or the programmer could rely on arithmetic wrap-around from max to min). size_t threshold = kThresholdForAddingDeoptimize + (base == nullptr ? 0 : 1); // extra test? uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c); if (candidates.size() >= threshold && (base != nullptr || min_c >= 0) && // reject certain OOB distance <= kMaxLengthForAddingDeoptimize) { // reject likely/certain deopt AddCompareWithDeoptimization(block, array_length, base, min_c, max_c); - for (HInstruction* other_bounds_check : candidates) { + for (HBoundsCheck* other_bounds_check : candidates) { // Only replace if still in the graph. This avoids visiting the same // bounds check twice if it occurred multiple times in the use list. if (other_bounds_check->IsInBlock()) { @@ -1328,45 +1333,127 @@ class BCEVisitor : public HGraphVisitor { } /** - * When the compiler fails to remove a bounds check statically, we try to remove the bounds - * check dynamically by adding runtime tests that trigger a deoptimization in case bounds - * will go out of range (we want to be rather certain of that given the slowdown of - * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding - * bounds checks and related null checks removed. + * Performs loop-based dynamic elimination on a bounds check. In order to minimize the + * number of eventually generated tests, related bounds checks with tests that can be + * combined with tests for the given bounds check are collected first. */ - bool TryDynamicBCE(HBoundsCheck* instruction) { - HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation(); - HInstruction* index = instruction->InputAt(0); - HInstruction* length = instruction->InputAt(1); - // If dynamic bounds check elimination seems profitable and is possible, then proceed. - bool needs_finite_test = false; - bool needs_taken_test = false; - if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) && - induction_range_.CanGenerateCode( - instruction, index, &needs_finite_test, &needs_taken_test) && - CanHandleInfiniteLoop(loop, instruction, index, needs_finite_test) && - CanHandleLength(loop, length, needs_taken_test)) { // do this test last (may code gen) - HInstruction* lower = nullptr; - HInstruction* upper = nullptr; - // Generate the following unsigned comparisons - // if (lower > upper) deoptimize; - // if (upper >= length) deoptimize; - // or, for a non-induction index, just the unsigned comparison on its 'upper' value - // if (upper >= length) deoptimize; - // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit - // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests - // correctly guard against any possible OOB (including arithmetic wrap-around cases). - TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); - HBasicBlock* block = GetPreHeader(loop, instruction); - induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper); - if (lower != nullptr) { - InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper)); - } - InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length)); - ReplaceInstruction(instruction, index); - return true; + void TransformLoopForDynamicBCE(HLoopInformation* loop, HBoundsCheck* bounds_check) { + HInstruction* index = bounds_check->InputAt(0); + HInstruction* array_length = bounds_check->InputAt(1); + DCHECK(loop->IsDefinedOutOfTheLoop(array_length)); // pre-checked + DCHECK(loop->DominatesAllBackEdges(bounds_check->GetBlock())); + // Collect all bounds checks in the same loop that are related as "a[base + constant]" + // for a base instruction (possibly absent) and various constants. + ValueBound value = ValueBound::AsValueBound(index); + HInstruction* base = value.GetInstruction(); + int32_t min_c = base == nullptr ? 0 : value.GetConstant(); + int32_t max_c = value.GetConstant(); + ArenaVector<HBoundsCheck*> candidates( + GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)); + ArenaVector<HBoundsCheck*> standby( + GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)); + for (const HUseListNode<HInstruction*>& use : array_length->GetUses()) { + HInstruction* user = use.GetUser(); + if (user->IsBoundsCheck() && loop == user->GetBlock()->GetLoopInformation()) { + HBoundsCheck* other_bounds_check = user->AsBoundsCheck(); + HInstruction* other_index = other_bounds_check->InputAt(0); + HInstruction* other_array_length = other_bounds_check->InputAt(1); + ValueBound other_value = ValueBound::AsValueBound(other_index); + int32_t other_c = other_value.GetConstant(); + if (array_length == other_array_length && base == other_value.GetInstruction()) { + // Does the current basic block dominate all back edges? If not, + // add this candidate later only if it falls into the range. + if (!loop->DominatesAllBackEdges(user->GetBlock())) { + standby.push_back(other_bounds_check); + continue; + } + min_c = std::min(min_c, other_c); + max_c = std::max(max_c, other_c); + candidates.push_back(other_bounds_check); + } + } + } + // Add standby candidates that fall in selected range. + for (HBoundsCheck* other_bounds_check : standby) { + HInstruction* other_index = other_bounds_check->InputAt(0); + int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant(); + if (min_c <= other_c && other_c <= max_c) { + candidates.push_back(other_bounds_check); + } + } + // Perform loop-based deoptimization if it seems profitable, where we eliminate bounds + // checks and replace these with deopt checks that guard against any possible OOB. + DCHECK_LT(0u, candidates.size()); + uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c); + if ((base != nullptr || min_c >= 0) && // reject certain OOB + distance <= kMaxLengthForAddingDeoptimize) { // reject likely/certain deopt + HBasicBlock* block = GetPreHeader(loop, bounds_check); + HInstruction* min_lower = nullptr; + HInstruction* min_upper = nullptr; + HInstruction* max_lower = nullptr; + HInstruction* max_upper = nullptr; + // Iterate over all bounds checks. + for (HBoundsCheck* other_bounds_check : candidates) { + // Only handle if still in the graph. This avoids visiting the same + // bounds check twice if it occurred multiple times in the use list. + if (other_bounds_check->IsInBlock()) { + HInstruction* other_index = other_bounds_check->InputAt(0); + int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant(); + // Generate code for either the maximum or minimum. Range analysis already was queried + // whether code generation on the original and, thus, related bounds check was possible. + // It handles either loop invariants (lower is not set) or unit strides. + if (other_c == max_c) { + induction_range_.GenerateRangeCode( + other_bounds_check, other_index, GetGraph(), block, &max_lower, &max_upper); + } else if (other_c == min_c && base != nullptr) { + induction_range_.GenerateRangeCode( + other_bounds_check, other_index, GetGraph(), block, &min_lower, &min_upper); + } + ReplaceInstruction(other_bounds_check, other_index); + } + } + // In code, using unsigned comparisons: + // (1) constants only + // if (max_upper >= a.length ) deoptimize; + // (2) two symbolic invariants + // if (min_upper > max_upper) deoptimize; unless min_c == max_c + // if (max_upper >= a.length ) deoptimize; + // (3) general case, unit strides (where lower would exceed upper for arithmetic wrap-around) + // if (min_lower > max_lower) deoptimize; unless min_c == max_c + // if (max_lower > max_upper) deoptimize; + // if (max_upper >= a.length ) deoptimize; + if (base == nullptr) { + // Constants only. + DCHECK_GE(min_c, 0); + DCHECK(min_lower == nullptr && min_upper == nullptr && + max_lower == nullptr && max_upper != nullptr); + } else if (max_lower == nullptr) { + // Two symbolic invariants. + if (min_c != max_c) { + DCHECK(min_lower == nullptr && min_upper != nullptr && + max_lower == nullptr && max_upper != nullptr); + InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_upper, max_upper)); + } else { + DCHECK(min_lower == nullptr && min_upper == nullptr && + max_lower == nullptr && max_upper != nullptr); + } + } else { + // General case, unit strides. + if (min_c != max_c) { + DCHECK(min_lower != nullptr && min_upper != nullptr && + max_lower != nullptr && max_upper != nullptr); + InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_lower, max_lower)); + } else { + DCHECK(min_lower == nullptr && min_upper == nullptr && + max_lower != nullptr && max_upper != nullptr); + } + InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(max_lower, max_upper)); + } + InsertDeoptInLoop( + loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(max_upper, array_length)); + } else { + // TODO: if rejected, avoid doing this again for subsequent instructions in this set? } - return false; } /** @@ -1474,8 +1561,7 @@ class BCEVisitor : public HGraphVisitor { * of the loop to use, dynamic bce in such cases is only allowed if other tests * ensure the loop is finite. */ - bool CanHandleInfiniteLoop( - HLoopInformation* loop, HBoundsCheck* check, HInstruction* index, bool needs_infinite_test) { + bool CanHandleInfiniteLoop(HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) { if (needs_infinite_test) { // If we already forced the loop to be finite, allow directly. const uint32_t loop_id = loop->GetHeader()->GetBlockId(); @@ -1497,11 +1583,6 @@ class BCEVisitor : public HGraphVisitor { } } } - // If bounds check made it this far, it is worthwhile to check later if - // the loop was forced finite by another candidate. - if (record_dynamic_bce_standby_) { - dynamic_bce_standby_.push_back(check); - } return false; } return true; @@ -1727,10 +1808,6 @@ class BCEVisitor : public HGraphVisitor { // in a block that checks an index against that HArrayLength. ArenaSafeMap<int, HBoundsCheck*> first_index_bounds_check_map_; - // Stand by list for dynamic bce. - ArenaVector<HBoundsCheck*> dynamic_bce_standby_; - bool record_dynamic_bce_standby_; - // Early-exit loop bookkeeping. ArenaSafeMap<uint32_t, bool> early_exit_loop_; diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 47e6625d07..5e6e175c67 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -80,7 +80,11 @@ class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { virtual void EmitNativeCode(CodeGenerator* codegen) = 0; + // Save live core and floating-point caller-save registers and + // update the stack mask in `locations` for registers holding object + // references. virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); + // Restore live core and floating-point caller-save registers. virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); bool IsCoreRegisterSaved(int reg) const { diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 1aa7b5404c..236ed20fc0 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -316,7 +316,7 @@ class TypeCheckSlowPathARM : public SlowPathCode { instruction_->GetDexPc(), this); CheckEntrypointTypes< - kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); + kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); } else { DCHECK(instruction_->IsCheckCast()); @@ -437,11 +437,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - // Save live registers before the runtime call, and in particular - // R0 (if it is live), as it is clobbered by functions - // art_quick_read_barrier_mark_regX. - SaveLiveRegisters(codegen, locations); - + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. InvokeRuntimeCallingConvention calling_convention; CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); DCHECK_NE(reg, SP); @@ -469,8 +467,6 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); - - RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -1937,7 +1933,7 @@ void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value()); uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex() % ImTable::kSize, kArmPointerSize)); + invoke->GetImtIndex(), kArmPointerSize)); // temp = temp->GetImtEntryAt(method_offset); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); uint32_t entry_point = @@ -4286,6 +4282,122 @@ void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { codegen_->GenerateNullCheck(instruction); } +static LoadOperandType GetLoadOperandType(Primitive::Type type) { + switch (type) { + case Primitive::kPrimNot: + return kLoadWord; + case Primitive::kPrimBoolean: + return kLoadUnsignedByte; + case Primitive::kPrimByte: + return kLoadSignedByte; + case Primitive::kPrimChar: + return kLoadUnsignedHalfword; + case Primitive::kPrimShort: + return kLoadSignedHalfword; + case Primitive::kPrimInt: + return kLoadWord; + case Primitive::kPrimLong: + return kLoadWordPair; + case Primitive::kPrimFloat: + return kLoadSWord; + case Primitive::kPrimDouble: + return kLoadDWord; + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +static StoreOperandType GetStoreOperandType(Primitive::Type type) { + switch (type) { + case Primitive::kPrimNot: + return kStoreWord; + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + return kStoreByte; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + return kStoreHalfword; + case Primitive::kPrimInt: + return kStoreWord; + case Primitive::kPrimLong: + return kStoreWordPair; + case Primitive::kPrimFloat: + return kStoreSWord; + case Primitive::kPrimDouble: + return kStoreDWord; + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +void CodeGeneratorARM::LoadFromShiftedRegOffset(Primitive::Type type, + Location out_loc, + Register base, + Register reg_offset, + Condition cond) { + uint32_t shift_count = Primitive::ComponentSizeShift(type); + Address mem_address(base, reg_offset, Shift::LSL, shift_count); + + switch (type) { + case Primitive::kPrimByte: + __ ldrsb(out_loc.AsRegister<Register>(), mem_address, cond); + break; + case Primitive::kPrimBoolean: + __ ldrb(out_loc.AsRegister<Register>(), mem_address, cond); + break; + case Primitive::kPrimShort: + __ ldrsh(out_loc.AsRegister<Register>(), mem_address, cond); + break; + case Primitive::kPrimChar: + __ ldrh(out_loc.AsRegister<Register>(), mem_address, cond); + break; + case Primitive::kPrimNot: + case Primitive::kPrimInt: + __ ldr(out_loc.AsRegister<Register>(), mem_address, cond); + break; + // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types. + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +void CodeGeneratorARM::StoreToShiftedRegOffset(Primitive::Type type, + Location loc, + Register base, + Register reg_offset, + Condition cond) { + uint32_t shift_count = Primitive::ComponentSizeShift(type); + Address mem_address(base, reg_offset, Shift::LSL, shift_count); + + switch (type) { + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + __ strb(loc.AsRegister<Register>(), mem_address, cond); + break; + case Primitive::kPrimShort: + case Primitive::kPrimChar: + __ strh(loc.AsRegister<Register>(), mem_address, cond); + break; + case Primitive::kPrimNot: + case Primitive::kPrimInt: + __ str(loc.AsRegister<Register>(), mem_address, cond); + break; + // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types. + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); @@ -4320,70 +4432,40 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location index = locations->InAt(1); Location out_loc = locations->Out(); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - Primitive::Type type = instruction->GetType(); - switch (type) { - case Primitive::kPrimBoolean: { - Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); - } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>())); - __ LoadFromOffset(kLoadUnsignedByte, out, IP, data_offset); - } - break; - } - - case Primitive::kPrimByte: { - Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadSignedByte, out, obj, offset); - } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>())); - __ LoadFromOffset(kLoadSignedByte, out, IP, data_offset); - } - break; - } - - case Primitive::kPrimShort: { - Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); - } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2)); - __ LoadFromOffset(kLoadSignedHalfword, out, IP, data_offset); - } - break; - } - - case Primitive::kPrimChar: { - Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); - } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2)); - __ LoadFromOffset(kLoadUnsignedHalfword, out, IP, data_offset); - } - break; - } + HInstruction* array_instr = instruction->GetArray(); + bool has_intermediate_address = array_instr->IsIntermediateAddress(); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: case Primitive::kPrimInt: { - Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadFromOffset(kLoadWord, out, obj, offset); + int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); + uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); + + LoadOperandType load_type = GetLoadOperandType(type); + __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset); } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); - __ LoadFromOffset(kLoadWord, out, IP, data_offset); + Register temp = IP; + + if (has_intermediate_address) { + // We do not need to compute the intermediate address from the array: the + // input instruction has done it already. See the comment in + // `TryExtractArrayAccessAddress()`. + if (kIsDebugBuild) { + HIntermediateAddress* tmp = array_instr->AsIntermediateAddress(); + DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset); + } + temp = obj; + } else { + __ add(temp, obj, ShifterOperand(data_offset)); + } + codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); } break; } @@ -4412,8 +4494,22 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { // reference, if heap poisoning is enabled). codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); - __ LoadFromOffset(kLoadWord, out, IP, data_offset); + Register temp = IP; + + if (has_intermediate_address) { + // We do not need to compute the intermediate address from the array: the + // input instruction has done it already. See the comment in + // `TryExtractArrayAccessAddress()`. + if (kIsDebugBuild) { + HIntermediateAddress* tmp = array_instr->AsIntermediateAddress(); + DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset); + } + temp = obj; + } else { + __ add(temp, obj, ShifterOperand(data_offset)); + } + codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); // If read barriers are enabled, emit read barriers other than // Baker's using a slow path (and also unpoison the loaded @@ -4512,54 +4608,68 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + uint32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); + Location value_loc = locations->InAt(2); + HInstruction* array_instr = instruction->GetArray(); + bool has_intermediate_address = array_instr->IsIntermediateAddress(); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); switch (value_type) { case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ StoreToOffset(kStoreByte, value, array, offset); - } else { - __ add(IP, array, ShifterOperand(index.AsRegister<Register>())); - __ StoreToOffset(kStoreByte, value, IP, data_offset); - } - break; - } - + case Primitive::kPrimByte: case Primitive::kPrimShort: - case Primitive::kPrimChar: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegister<Register>(); + case Primitive::kPrimChar: + case Primitive::kPrimInt: { if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ StoreToOffset(kStoreHalfword, value, array, offset); + int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); + uint32_t full_offset = + data_offset + (const_index << Primitive::ComponentSizeShift(value_type)); + StoreOperandType store_type = GetStoreOperandType(value_type); + __ StoreToOffset(store_type, value_loc.AsRegister<Register>(), array, full_offset); } else { - __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2)); - __ StoreToOffset(kStoreHalfword, value, IP, data_offset); + Register temp = IP; + + if (has_intermediate_address) { + // We do not need to compute the intermediate address from the array: the + // input instruction has done it already. See the comment in + // `TryExtractArrayAccessAddress()`. + if (kIsDebugBuild) { + HIntermediateAddress* tmp = array_instr->AsIntermediateAddress(); + DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == data_offset); + } + temp = array; + } else { + __ add(temp, array, ShifterOperand(data_offset)); + } + codegen_->StoreToShiftedRegOffset(value_type, + value_loc, + temp, + index.AsRegister<Register>()); } break; } case Primitive::kPrimNot: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Location value_loc = locations->InAt(2); Register value = value_loc.AsRegister<Register>(); - Register source = value; + // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet. + // See the comment in instruction_simplifier_shared.cc. + DCHECK(!has_intermediate_address); if (instruction->InputAt(2)->IsNullConstant()) { // Just setting null. if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreToOffset(kStoreWord, source, array, offset); + __ StoreToOffset(kStoreWord, value, array, offset); } else { DCHECK(index.IsRegister()) << index; - __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); - __ StoreToOffset(kStoreWord, source, IP, data_offset); + __ add(IP, array, ShifterOperand(data_offset)); + codegen_->StoreToShiftedRegOffset(value_type, + value_loc, + IP, + index.AsRegister<Register>()); } codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); @@ -4588,8 +4698,11 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ StoreToOffset(kStoreWord, value, array, offset); } else { DCHECK(index.IsRegister()) << index; - __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); - __ StoreToOffset(kStoreWord, value, IP, data_offset); + __ add(IP, array, ShifterOperand(data_offset)); + codegen_->StoreToShiftedRegOffset(value_type, + value_loc, + IP, + index.AsRegister<Register>()); } codegen_->MaybeRecordImplicitNullCheck(instruction); __ b(&done); @@ -4656,6 +4769,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { } } + Register source = value; if (kPoisonHeapReferences) { // Note that in the case where `value` is a null reference, // we do not enter this block, as a null reference does not @@ -4672,8 +4786,12 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ StoreToOffset(kStoreWord, source, array, offset); } else { DCHECK(index.IsRegister()) << index; - __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); - __ StoreToOffset(kStoreWord, source, IP, data_offset); + + __ add(IP, array, ShifterOperand(data_offset)); + codegen_->StoreToShiftedRegOffset(value_type, + Location::RegisterLocation(source), + IP, + index.AsRegister<Register>()); } if (!may_need_runtime_call_for_type_check) { @@ -4693,23 +4811,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimInt: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreToOffset(kStoreWord, value, array, offset); - } else { - DCHECK(index.IsRegister()) << index; - __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); - __ StoreToOffset(kStoreWord, value, IP, data_offset); - } - break; - } - case Primitive::kPrimLong: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); Location value = locations->InAt(2); if (index.IsConstant()) { size_t offset = @@ -4723,7 +4825,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { } case Primitive::kPrimFloat: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); Location value = locations->InAt(2); DCHECK(value.IsFpuRegister()); if (index.IsConstant()) { @@ -4737,7 +4838,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { } case Primitive::kPrimDouble: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); Location value = locations->InAt(2); DCHECK(value.IsFpuRegisterPair()); if (index.IsConstant()) { @@ -4778,6 +4878,37 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); } +void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) { + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset())); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location out = locations->Out(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); + + if (second.IsRegister()) { + __ add(out.AsRegister<Register>(), + first.AsRegister<Register>(), + ShifterOperand(second.AsRegister<Register>())); + } else { + __ AddConstant(out.AsRegister<Register>(), + first.AsRegister<Register>(), + second.GetConstant()->AsIntConstant()->GetValue()); + } +} + void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) { LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() ? LocationSummary::kCallOnSlowPath @@ -6979,7 +7110,7 @@ void InstructionCodeGeneratorARM::VisitClassTableGet(HClassTableGet* instruction method_offset); } else { uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex() % ImTable::kSize, kArmPointerSize)); + instruction->GetIndex(), kArmPointerSize)); __ LoadFromOffset(kLoadWord, locations->Out().AsRegister<Register>(), locations->InAt(0).AsRegister<Register>(), diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index cc38f3e6a6..ef7913b6fd 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -365,6 +365,24 @@ class CodeGeneratorARM : public CodeGenerator { // Helper method to move a 64bits value between two locations. void Move64(Location destination, Location source); + void LoadOrStoreToOffset(Primitive::Type type, + Location loc, + Register base, + int32_t offset, + bool is_load, + Condition cond = AL); + + void LoadFromShiftedRegOffset(Primitive::Type type, + Location out_loc, + Register base, + Register reg_offset, + Condition cond = AL); + void StoreToShiftedRegOffset(Primitive::Type type, + Location out_loc, + Register base, + Register reg_offset, + Condition cond = AL); + // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 07b7823571..76b07979f5 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -462,7 +462,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { if (instruction_->IsInstanceOf()) { arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, + CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); @@ -603,11 +603,9 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - // Save live registers before the runtime call, and in particular - // W0 (if it is live), as it is clobbered by functions - // art_quick_read_barrier_mark_regX. - SaveLiveRegisters(codegen, locations); - + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. InvokeRuntimeCallingConvention calling_convention; CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); DCHECK_NE(obj_.reg(), LR); @@ -635,8 +633,6 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { instruction_, instruction_->GetDexPc(), this); - - RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } @@ -690,10 +686,9 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress())); + instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); @@ -1983,9 +1978,8 @@ void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( } } -void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) { - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. +void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. DCHECK(!kEmitCompilerReadBarrier); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -1994,10 +1988,9 @@ void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddr locations->SetOut(Location::RequiresRegister()); } -void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( - HArm64IntermediateAddress* instruction) { - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. +void InstructionCodeGeneratorARM64::VisitIntermediateAddress( + HIntermediateAddress* instruction) { + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. DCHECK(!kEmitCompilerReadBarrier); __ Add(OutputRegister(instruction), InputRegisterAt(instruction, 0), @@ -2097,9 +2090,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Object ArrayGet with Baker's read barrier case. Register temp = temps.AcquireW(); - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. - DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress()); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!instruction->GetArray()->IsIntermediateAddress()); // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( @@ -2112,15 +2104,15 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { source = HeapOperand(obj, offset); } else { Register temp = temps.AcquireSameSizeAs(obj); - if (instruction->GetArray()->IsArm64IntermediateAddress()) { + if (instruction->GetArray()->IsIntermediateAddress()) { // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. + // HIntermediateAddress instruction yet. DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in - // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. + // `TryExtractArrayAccessAddress()`. if (kIsDebugBuild) { - HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); + HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); } temp = obj; @@ -2204,15 +2196,15 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } else { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireSameSizeAs(array); - if (instruction->GetArray()->IsArm64IntermediateAddress()) { + if (instruction->GetArray()->IsIntermediateAddress()) { // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. + // HIntermediateAddress instruction yet. DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in - // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. + // `TryExtractArrayAccessAddress()`. if (kIsDebugBuild) { - HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); + HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); } temp = array; @@ -2228,7 +2220,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); } else { DCHECK(needs_write_barrier); - DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress()); + DCHECK(!instruction->GetArray()->IsIntermediateAddress()); vixl::aarch64::Label done; SlowPathCodeARM64* slow_path = nullptr; { @@ -3561,7 +3553,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok __ Ldr(temp, MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex() % ImTable::kSize, kArm64PointerSize)); + invoke->GetImtIndex(), kArm64PointerSize)); // temp = temp->GetImtEntryAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); // lr = temp->GetEntryPoint(); @@ -5382,7 +5374,7 @@ void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instructi MemOperand(XRegisterFrom(locations->InAt(0)), method_offset)); } else { uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex() % ImTable::kSize, kArm64PointerSize)); + instruction->GetIndex(), kArm64PointerSize)); __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)), mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); __ Ldr(XRegisterFrom(locations->Out()), diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 2b71da0d1c..39248aa430 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -415,7 +415,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { this, IsDirectEntrypoint(kQuickInstanceofNonTrivial)); CheckEntrypointTypes< - kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); + kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); @@ -3791,7 +3791,7 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value()); uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex() % ImTable::kSize, kMipsPointerSize)); + invoke->GetImtIndex(), kMipsPointerSize)); // temp = temp->GetImtEntryAt(method_offset); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); @@ -5389,7 +5389,7 @@ void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet* instructio method_offset); } else { uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex() % ImTable::kSize, kMipsPointerSize)); + instruction->GetIndex(), kMipsPointerSize)); __ LoadFromOffset(kLoadWord, locations->Out().AsRegister<Register>(), locations->InAt(0).AsRegister<Register>(), diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index aa1ba84178..29b8c206e6 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -362,7 +362,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { dex_pc, this); CheckEntrypointTypes< - kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); + kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); @@ -2951,7 +2951,7 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo __ LoadFromOffset(kLoadDoubleword, temp, temp, mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value()); uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex() % ImTable::kSize, kMips64PointerSize)); + invoke->GetImtIndex(), kMips64PointerSize)); // temp = temp->GetImtEntryAt(method_offset); __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 1cc6060f68..82baaa0443 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -349,7 +349,7 @@ class TypeCheckSlowPathX86 : public SlowPathCode { instruction_->GetDexPc(), this); CheckEntrypointTypes< - kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); + kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), @@ -472,11 +472,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - // Save live registers before the runtime call, and in particular - // EAX (if it is live), as it is clobbered by functions - // art_quick_read_barrier_mark_regX. - SaveLiveRegisters(codegen, locations); - + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); DCHECK_NE(reg, ESP); @@ -502,8 +500,6 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); - - RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -2093,7 +2089,7 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value())); // temp = temp->GetImtEntryAt(method_offset); uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex() % ImTable::kSize, kX86PointerSize)); + invoke->GetImtIndex(), kX86PointerSize)); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); __ call(Address(temp, @@ -4115,7 +4111,7 @@ void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction Address(locations->InAt(0).AsRegister<Register>(), method_offset)); } else { uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex() % ImTable::kSize, kX86PointerSize)); + instruction->GetIndex(), kX86PointerSize)); __ movl(locations->Out().AsRegister<Register>(), Address(locations->InAt(0).AsRegister<Register>(), mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value())); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index a0158938b5..b6ba30e154 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -369,7 +369,7 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { dex_pc, this); CheckEntrypointTypes< - kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); + kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), @@ -493,11 +493,9 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - // Save live registers before the runtime call, and in particular - // RDI and/or RAX (if they are live), as they are clobbered by - // functions art_quick_read_barrier_mark_regX. - SaveLiveRegisters(codegen, locations); - + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); DCHECK_NE(reg, RSP); @@ -523,8 +521,6 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); - - RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -2322,7 +2318,7 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); // temp = temp->GetImtEntryAt(method_offset); uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex() % ImTable::kSize, kX86_64PointerSize)); + invoke->GetImtIndex(), kX86_64PointerSize)); // temp = temp->GetImtEntryAt(method_offset); __ movq(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -4048,7 +4044,7 @@ void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruct Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset)); } else { uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize)); + instruction->GetIndex(), kX86_64PointerSize)); __ movq(locations->Out().AsRegister<CpuRegister>(), Address(locations->InAt(0).AsRegister<CpuRegister>(), mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 6be79fa75c..fe9a7af250 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -44,7 +44,7 @@ #include "nodes.h" #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" -#include "register_allocator.h" +#include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" #include "utils.h" #include "utils/arm/managed_register_arm.h" @@ -219,7 +219,7 @@ static void RunCode(CodeGenerator* codegen, PrepareForRegisterAllocation(graph).Run(); liveness.Analyze(); - RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters(); + RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters(); hook_before_codegen(graph); InternalCodeAllocator allocator; diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index d2afa5b914..af0ee4e197 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -227,7 +227,7 @@ static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst return vixl::aarch64::Assembler::IsImmMovn(value, vixl::aarch64::kXRegSize); } else { DCHECK(instr->IsAdd() || - instr->IsArm64IntermediateAddress() || + instr->IsIntermediateAddress() || instr->IsBoundsCheck() || instr->IsCompare() || instr->IsCondition() || diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index e14f603fe1..0b4c569b05 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -31,7 +31,7 @@ #include "nodes.h" #include "optimization.h" #include "reference_type_propagation.h" -#include "register_allocator.h" +#include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" #include "utils/assembler.h" diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 6c1292cf66..a592162eb2 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -35,7 +35,7 @@ #include "nodes.h" #include "optimizing_compiler.h" #include "reference_type_propagation.h" -#include "register_allocator.h" +#include "register_allocator_linear_scan.h" #include "quick/inline_method_analyser.h" #include "sharpening.h" #include "ssa_builder.h" @@ -208,12 +208,8 @@ static uint32_t FindClassIndexIn(mirror::Class* cls, DCHECK(cls->IsProxyClass()) << PrettyClass(cls); // TODO: deal with proxy classes. } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) { + DCHECK_EQ(cls->GetDexCache(), dex_cache.Get()); index = cls->GetDexTypeIndex(); - } else { - index = cls->FindTypeIndexInOtherDexFile(dex_file); - } - - if (index != DexFile::kDexNoIndex) { // Update the dex cache to ensure the class is in. The generated code will // consider it is. We make it safe by updating the dex cache, as other // dex files might also load the class, and there is no guarantee the dex @@ -221,6 +217,14 @@ static uint32_t FindClassIndexIn(mirror::Class* cls, if (dex_cache->GetResolvedType(index) == nullptr) { dex_cache->SetResolvedType(index, cls); } + } else { + index = cls->FindTypeIndexInOtherDexFile(dex_file); + // We cannot guarantee the entry in the dex cache will resolve to the same class, + // as there may be different class loaders. So only return the index if it's + // the right class in the dex cache already. + if (index != DexFile::kDexNoIndex && dex_cache->GetResolvedType(index) != cls) { + index = DexFile::kDexNoIndex; + } } return index; @@ -273,7 +277,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { return false; } MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod(); - mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file) + mirror::DexCache* const dex_cache = IsSameDexFile(caller_dex_file, *ref.dex_file) ? caller_compilation_unit_.GetDexCache().Get() : class_linker->FindDexCache(soa.Self(), *ref.dex_file); resolved_method = dex_cache->GetResolvedMethod( @@ -657,7 +661,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, ArtMethod* new_method = nullptr; if (invoke_instruction->IsInvokeInterface()) { new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get( - method_index % ImTable::kSize, pointer_size); + method_index, pointer_size); if (new_method->IsRuntimeMethod()) { // Bail out as soon as we see a conflict trampoline in one of the target's // interface table. @@ -804,8 +808,6 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* metho bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, HInstruction** return_replacement) { - const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); - if (method->IsProxyMethod()) { VLOG(compiler) << "Method " << PrettyMethod(method) << " is not inlined because of unimplemented inline support for proxy methods."; @@ -828,15 +830,6 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - uint32_t method_index = FindMethodIndexIn( - method, caller_dex_file, invoke_instruction->GetDexMethodIndex()); - if (method_index == DexFile::kDexNoIndex) { - VLOG(compiler) << "Call to " - << PrettyMethod(method) - << " cannot be inlined because unaccessible to caller"; - return false; - } - bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile()); const DexFile::CodeItem* code_item = method->GetCodeItem(); @@ -873,7 +866,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, if (Runtime::Current()->UseJitCompilation() || !compiler_driver_->IsMethodVerifiedWithoutFailures( method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method) << " couldn't be verified, so it cannot be inlined"; return false; } @@ -883,7 +876,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) { // Case of a static method that cannot be inlined because it implicitly // requires an initialization check of its declaring class. - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method) << " is not inlined because it is static and requires a clinit" << " check that cannot be emitted due to Dex cache limitations"; return false; @@ -893,7 +886,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, caller_dex_file); + VLOG(compiler) << "Successfully inlined " << PrettyMethod(method); MaybeRecordStat(kInlinedInvoke); return true; } diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index b4125299ea..afac5f9cf1 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -16,6 +16,7 @@ #include "instruction_builder.h" +#include "art_method-inl.h" #include "bytecode_utils.h" #include "class_linker.h" #include "driver/compiler_options.h" @@ -890,7 +891,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, return_type, dex_pc, method_idx, - resolved_method->GetDexMethodIndex()); + resolved_method->GetImtIndex()); } return HandleInvoke(invoke, diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index cd026b8770..495f3fd232 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -14,8 +14,10 @@ * limitations under the License. */ +#include "code_generator.h" #include "instruction_simplifier_arm.h" #include "instruction_simplifier_shared.h" +#include "mirror/array-inl.h" namespace art { namespace arm { @@ -38,6 +40,46 @@ void InstructionSimplifierArmVisitor::VisitAnd(HAnd* instruction) { } } +void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) { + size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + Primitive::Type type = instruction->GetType(); + + if (type == Primitive::kPrimLong + || type == Primitive::kPrimFloat + || type == Primitive::kPrimDouble) { + // T32 doesn't support ShiftedRegOffset mem address mode for these types + // to enable optimization. + return; + } + + if (TryExtractArrayAccessAddress(instruction, + instruction->GetArray(), + instruction->GetIndex(), + data_offset)) { + RecordSimplification(); + } +} + +void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) { + size_t access_size = Primitive::ComponentSize(instruction->GetComponentType()); + size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value(); + Primitive::Type type = instruction->GetComponentType(); + + if (type == Primitive::kPrimLong + || type == Primitive::kPrimFloat + || type == Primitive::kPrimDouble) { + // T32 doesn't support ShiftedRegOffset mem address mode for these types + // to enable optimization. + return; + } + + if (TryExtractArrayAccessAddress(instruction, + instruction->GetArray(), + instruction->GetIndex(), + data_offset)) { + RecordSimplification(); + } +} } // namespace arm } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index 14c940eb21..3d297dacc0 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -38,6 +38,8 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { void VisitMul(HMul* instruction) OVERRIDE; void VisitOr(HOr* instruction) OVERRIDE; void VisitAnd(HAnd* instruction) OVERRIDE; + void VisitArrayGet(HArrayGet* instruction) OVERRIDE; + void VisitArraySet(HArraySet* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 983d31d168..6d107d571f 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -28,56 +28,6 @@ using helpers::CanFitInShifterOperand; using helpers::HasShifterOperand; using helpers::ShifterOperandSupportsExtension; -void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access, - HInstruction* array, - HInstruction* index, - size_t data_offset) { - if (kEmitCompilerReadBarrier) { - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. - // - // TODO: Handle this case properly in the ARM64 code generator and - // re-enable this optimization; otherwise, remove this TODO. - // b/26601270 - return; - } - if (index->IsConstant() || - (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { - // When the index is a constant all the addressing can be fitted in the - // memory access instruction, so do not split the access. - return; - } - if (access->IsArraySet() && - access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) { - // The access may require a runtime call or the original array pointer. - return; - } - - // Proceed to extract the base address computation. - ArenaAllocator* arena = GetGraph()->GetArena(); - - HIntConstant* offset = GetGraph()->GetIntConstant(data_offset); - HArm64IntermediateAddress* address = - new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc); - address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); - access->GetBlock()->InsertInstructionBefore(address, access); - access->ReplaceInput(address, 0); - // Both instructions must depend on GC to prevent any instruction that can - // trigger GC to be inserted between the two. - access->AddSideEffects(SideEffects::DependsOnGC()); - DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC())); - DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC())); - // TODO: Code generation for HArrayGet and HArraySet will check whether the input address - // is an HArm64IntermediateAddress and generate appropriate code. - // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe - // `HArm64Load` and `HArm64Store`). We defer these changes because these new instructions would - // not bring any advantages yet. - // Also see the comments in - // `InstructionCodeGeneratorARM64::VisitArrayGet()` and - // `InstructionCodeGeneratorARM64::VisitArraySet()`. - RecordSimplification(); -} - bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge) { @@ -190,19 +140,23 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) { void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - TryExtractArrayAccessAddress(instruction, - instruction->GetArray(), - instruction->GetIndex(), - data_offset); + if (TryExtractArrayAccessAddress(instruction, + instruction->GetArray(), + instruction->GetIndex(), + data_offset)) { + RecordSimplification(); + } } void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { size_t access_size = Primitive::ComponentSize(instruction->GetComponentType()); size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value(); - TryExtractArrayAccessAddress(instruction, - instruction->GetArray(), - instruction->GetIndex(), - data_offset); + if (TryExtractArrayAccessAddress(instruction, + instruction->GetArray(), + instruction->GetIndex(), + data_offset)) { + RecordSimplification(); + } } void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 4735f85ab0..28648b3bea 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -35,10 +35,6 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { } } - void TryExtractArrayAccessAddress(HInstruction* access, - HInstruction* array, - HInstruction* index, - size_t data_offset); bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index dab1ebc16d..8f7778fe68 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -226,4 +226,59 @@ bool TryMergeNegatedInput(HBinaryOperation* op) { return false; } + +bool TryExtractArrayAccessAddress(HInstruction* access, + HInstruction* array, + HInstruction* index, + size_t data_offset) { + if (kEmitCompilerReadBarrier) { + // The read barrier instrumentation does not support the + // HIntermediateAddress instruction yet. + // + // TODO: Handle this case properly in the ARM64 and ARM code generator and + // re-enable this optimization; otherwise, remove this TODO. + // b/26601270 + return false; + } + if (index->IsConstant() || + (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { + // When the index is a constant all the addressing can be fitted in the + // memory access instruction, so do not split the access. + return false; + } + if (access->IsArraySet() && + access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) { + // The access may require a runtime call or the original array pointer. + return false; + } + + // Proceed to extract the base address computation. + HGraph* graph = access->GetBlock()->GetGraph(); + ArenaAllocator* arena = graph->GetArena(); + + HIntConstant* offset = graph->GetIntConstant(data_offset); + HIntermediateAddress* address = + new (arena) HIntermediateAddress(array, offset, kNoDexPc); + address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); + access->GetBlock()->InsertInstructionBefore(address, access); + access->ReplaceInput(address, 0); + // Both instructions must depend on GC to prevent any instruction that can + // trigger GC to be inserted between the two. + access->AddSideEffects(SideEffects::DependsOnGC()); + DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC())); + DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC())); + // TODO: Code generation for HArrayGet and HArraySet will check whether the input address + // is an HIntermediateAddress and generate appropriate code. + // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe + // `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes + // because these new instructions would not bring any advantages yet. + // Also see the comments in + // `InstructionCodeGeneratorARM::VisitArrayGet()` + // `InstructionCodeGeneratorARM::VisitArraySet()` + // `InstructionCodeGeneratorARM64::VisitArrayGet()` + // `InstructionCodeGeneratorARM64::VisitArraySet()`. + return true; +} + + } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index b1fe8f4756..56804f5e90 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -26,6 +26,11 @@ bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa); // a negated bitwise instruction. bool TryMergeNegatedInput(HBinaryOperation* op); +bool TryExtractArrayAccessAddress(HInstruction* access, + HInstruction* array, + HInstruction* index, + size_t data_offset); + } // namespace art #endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 0f0ef26ea9..23ac457568 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1289,7 +1289,8 @@ class HLoopInformationOutwardIterator : public ValueObject { #else #define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \ M(BitwiseNegatedRight, Instruction) \ - M(MultiplyAccumulate, Instruction) + M(MultiplyAccumulate, Instruction) \ + M(IntermediateAddress, Instruction) #endif #ifndef ART_ENABLE_CODEGEN_arm @@ -1303,8 +1304,7 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) #else #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ - M(Arm64DataProcWithShifterOp, Instruction) \ - M(Arm64IntermediateAddress, Instruction) + M(Arm64DataProcWithShifterOp, Instruction) #endif #ifndef ART_ENABLE_CODEGEN_mips diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 06b073c3e2..3f88717c2a 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -94,32 +94,6 @@ class HArm64DataProcWithShifterOp FINAL : public HExpression<2> { std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op); -// This instruction computes an intermediate address pointing in the 'middle' of an object. The -// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is -// never used across anything that can trigger GC. -class HArm64IntermediateAddress FINAL : public HExpression<2> { - public: - HArm64IntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc) - : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) { - SetRawInputAt(0, base_address); - SetRawInputAt(1, offset); - } - - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; - } - bool IsActualObject() const OVERRIDE { return false; } - - HInstruction* GetBaseAddress() const { return InputAt(0); } - HInstruction* GetOffset() const { return InputAt(1); } - - DECLARE_INSTRUCTION(Arm64IntermediateAddress); - - private: - DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress); -}; - } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index f2d5cf3253..8bd8667f84 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -113,6 +113,34 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation { DISALLOW_COPY_AND_ASSIGN(HBitwiseNegatedRight); }; + +// This instruction computes an intermediate address pointing in the 'middle' of an object. The +// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is +// never used across anything that can trigger GC. +class HIntermediateAddress FINAL : public HExpression<2> { + public: + HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc) + : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) { + SetRawInputAt(0, base_address); + SetRawInputAt(1, offset); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; + } + bool IsActualObject() const OVERRIDE { return false; } + + HInstruction* GetBaseAddress() const { return InputAt(0); } + HInstruction* GetOffset() const { return InputAt(1); } + + DECLARE_INSTRUCTION(IntermediateAddress); + + private: + DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress); +}; + + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index d6e09d7acb..0bca186814 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -81,7 +81,7 @@ #include "oat_quick_method_header.h" #include "prepare_for_register_allocation.h" #include "reference_type_propagation.h" -#include "register_allocator.h" +#include "register_allocator_linear_scan.h" #include "select_generator.h" #include "sharpening.h" #include "side_effects_analysis.h" @@ -448,8 +448,12 @@ static void RunArchOptimizations(InstructionSet instruction_set, arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats); arm::InstructionSimplifierArm* simplifier = new (arena) arm::InstructionSimplifierArm(graph, stats); + SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch"); HOptimization* arm_optimizations[] = { simplifier, + side_effects, + gvn, fixups }; RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); @@ -531,7 +535,7 @@ static void AllocateRegisters(HGraph* graph, } { PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer); - RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters(); + RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters(); } } diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc new file mode 100644 index 0000000000..34502869e4 --- /dev/null +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -0,0 +1,653 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "register_allocation_resolver.h" + +#include "code_generator.h" +#include "ssa_liveness_analysis.h" + +namespace art { + +RegisterAllocationResolver::RegisterAllocationResolver(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& liveness) + : allocator_(allocator), + codegen_(codegen), + liveness_(liveness) {} + +void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs, + size_t max_safepoint_live_fp_regs, + size_t reserved_out_slots, + size_t int_spill_slots, + size_t long_spill_slots, + size_t float_spill_slots, + size_t double_spill_slots, + size_t catch_phi_spill_slots, + const ArenaVector<LiveInterval*>& temp_intervals) { + size_t spill_slots = int_spill_slots + + long_spill_slots + + float_spill_slots + + double_spill_slots + + catch_phi_spill_slots; + + // Computes frame size and spill mask. + codegen_->InitializeCodeGeneration(spill_slots, + max_safepoint_live_core_regs, + max_safepoint_live_fp_regs, + reserved_out_slots, // Includes slot(s) for the art method. + codegen_->GetGraph()->GetLinearOrder()); + + // Resolve outputs, including stack locations. + // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. + for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + LiveInterval* current = instruction->GetLiveInterval(); + LocationSummary* locations = instruction->GetLocations(); + Location location = locations->Out(); + if (instruction->IsParameterValue()) { + // Now that we know the frame size, adjust the parameter's location. + if (location.IsStackSlot()) { + location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + current->SetSpillSlot(location.GetStackIndex()); + locations->UpdateOut(location); + } else if (location.IsDoubleStackSlot()) { + location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + current->SetSpillSlot(location.GetStackIndex()); + locations->UpdateOut(location); + } else if (current->HasSpillSlot()) { + current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize()); + } + } else if (instruction->IsCurrentMethod()) { + // The current method is always at offset 0. + DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0)); + } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) { + DCHECK(current->HasSpillSlot()); + size_t slot = current->GetSpillSlot() + + spill_slots + + reserved_out_slots + - catch_phi_spill_slots; + current->SetSpillSlot(slot * kVRegSize); + } else if (current->HasSpillSlot()) { + // Adjust the stack slot, now that we know the number of them for each type. + // The way this implementation lays out the stack is the following: + // [parameter slots ] + // [catch phi spill slots ] + // [double spill slots ] + // [long spill slots ] + // [float spill slots ] + // [int/ref values ] + // [maximum out values ] (number of arguments for calls) + // [art method ]. + size_t slot = current->GetSpillSlot(); + switch (current->GetType()) { + case Primitive::kPrimDouble: + slot += long_spill_slots; + FALLTHROUGH_INTENDED; + case Primitive::kPrimLong: + slot += float_spill_slots; + FALLTHROUGH_INTENDED; + case Primitive::kPrimFloat: + slot += int_spill_slots; + FALLTHROUGH_INTENDED; + case Primitive::kPrimNot: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + case Primitive::kPrimShort: + slot += reserved_out_slots; + break; + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected type for interval " << current->GetType(); + } + current->SetSpillSlot(slot * kVRegSize); + } + + Location source = current->ToLocation(); + + if (location.IsUnallocated()) { + if (location.GetPolicy() == Location::kSameAsFirstInput) { + if (locations->InAt(0).IsUnallocated()) { + locations->SetInAt(0, source); + } else { + DCHECK(locations->InAt(0).Equals(source)); + } + } + locations->UpdateOut(source); + } else { + DCHECK(source.Equals(location)); + } + } + + // Connect siblings and resolve inputs. + for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + ConnectSiblings(instruction->GetLiveInterval(), + max_safepoint_live_core_regs + max_safepoint_live_fp_regs); + } + + // Resolve non-linear control flow across branches. Order does not matter. + for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + if (block->IsCatchBlock() || + (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { + // Instructions live at the top of catch blocks or irreducible loop header + // were forced to spill. + if (kIsDebugBuild) { + BitVector* live = liveness_.GetLiveInSet(*block); + for (uint32_t idx : live->Indexes()) { + LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval(); + LiveInterval* sibling = interval->GetSiblingAt(block->GetLifetimeStart()); + // `GetSiblingAt` returns the sibling that contains a position, but there could be + // a lifetime hole in it. `CoversSlow` returns whether the interval is live at that + // position. + if ((sibling != nullptr) && sibling->CoversSlow(block->GetLifetimeStart())) { + DCHECK(!sibling->HasRegister()); + } + } + } + } else { + BitVector* live = liveness_.GetLiveInSet(*block); + for (uint32_t idx : live->Indexes()) { + LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval(); + for (HBasicBlock* predecessor : block->GetPredecessors()) { + ConnectSplitSiblings(interval, predecessor, block); + } + } + } + } + + // Resolve phi inputs. Order does not matter. + for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + if (current->IsCatchBlock()) { + // Catch phi values are set at runtime by the exception delivery mechanism. + } else { + for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + HInstruction* phi = inst_it.Current(); + for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) { + HBasicBlock* predecessor = current->GetPredecessors()[i]; + DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u); + HInstruction* input = phi->InputAt(i); + Location source = input->GetLiveInterval()->GetLocationAt( + predecessor->GetLifetimeEnd() - 1); + Location destination = phi->GetLiveInterval()->ToLocation(); + InsertParallelMoveAtExitOf(predecessor, phi, source, destination); + } + } + } + } + + // Resolve temp locations. + for (LiveInterval* temp : temp_intervals) { + if (temp->IsHighInterval()) { + // High intervals can be skipped, they are already handled by the low interval. + continue; + } + HInstruction* at = liveness_.GetTempUser(temp); + size_t temp_index = liveness_.GetTempIndex(temp); + LocationSummary* locations = at->GetLocations(); + switch (temp->GetType()) { + case Primitive::kPrimInt: + locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister())); + break; + + case Primitive::kPrimDouble: + if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { + Location location = Location::FpuRegisterPairLocation( + temp->GetRegister(), temp->GetHighInterval()->GetRegister()); + locations->SetTempAt(temp_index, location); + } else { + locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister())); + } + break; + + default: + LOG(FATAL) << "Unexpected type for temporary location " + << temp->GetType(); + } + } +} + +void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval, + size_t max_safepoint_live_regs) { + LiveInterval* current = interval; + if (current->HasSpillSlot() + && current->HasRegister() + // Currently, we spill unconditionnally the current method in the code generators. + && !interval->GetDefinedBy()->IsCurrentMethod()) { + // We spill eagerly, so move must be at definition. + InsertMoveAfter(interval->GetDefinedBy(), + interval->ToLocation(), + interval->NeedsTwoSpillSlots() + ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) + : Location::StackSlot(interval->GetParent()->GetSpillSlot())); + } + UsePosition* use = current->GetFirstUse(); + UsePosition* env_use = current->GetFirstEnvironmentUse(); + + // Walk over all siblings, updating locations of use positions, and + // connecting them when they are adjacent. + do { + Location source = current->ToLocation(); + + // Walk over all uses covered by this interval, and update the location + // information. + + LiveRange* range = current->GetFirstRange(); + while (range != nullptr) { + while (use != nullptr && use->GetPosition() < range->GetStart()) { + DCHECK(use->IsSynthesized()); + use = use->GetNext(); + } + while (use != nullptr && use->GetPosition() <= range->GetEnd()) { + DCHECK(!use->GetIsEnvironment()); + DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); + if (!use->IsSynthesized()) { + LocationSummary* locations = use->GetUser()->GetLocations(); + Location expected_location = locations->InAt(use->GetInputIndex()); + // The expected (actual) location may be invalid in case the input is unused. Currently + // this only happens for intrinsics. + if (expected_location.IsValid()) { + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else if (!expected_location.IsConstant()) { + AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); + } + } else { + DCHECK(use->GetUser()->IsInvoke()); + DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); + } + } + use = use->GetNext(); + } + + // Walk over the environment uses, and update their locations. + while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) { + env_use = env_use->GetNext(); + } + + while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) { + DCHECK(current->CoversSlow(env_use->GetPosition()) + || (env_use->GetPosition() == range->GetEnd())); + HEnvironment* environment = env_use->GetEnvironment(); + environment->SetLocationAt(env_use->GetInputIndex(), source); + env_use = env_use->GetNext(); + } + + range = range->GetNext(); + } + + // If the next interval starts just after this one, and has a register, + // insert a move. + LiveInterval* next_sibling = current->GetNextSibling(); + if (next_sibling != nullptr + && next_sibling->HasRegister() + && current->GetEnd() == next_sibling->GetStart()) { + Location destination = next_sibling->ToLocation(); + InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination); + } + + for (SafepointPosition* safepoint_position = current->GetFirstSafepoint(); + safepoint_position != nullptr; + safepoint_position = safepoint_position->GetNext()) { + DCHECK(current->CoversSlow(safepoint_position->GetPosition())); + + LocationSummary* locations = safepoint_position->GetLocations(); + if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { + DCHECK(interval->GetDefinedBy()->IsActualObject()) + << interval->GetDefinedBy()->DebugName() + << "@" << safepoint_position->GetInstruction()->DebugName(); + locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); + } + + switch (source.GetKind()) { + case Location::kRegister: { + locations->AddLiveRegister(source); + if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) { + DCHECK_LE(locations->GetNumberOfLiveRegisters(), + max_safepoint_live_regs); + } + if (current->GetType() == Primitive::kPrimNot) { + DCHECK(interval->GetDefinedBy()->IsActualObject()) + << interval->GetDefinedBy()->DebugName() + << "@" << safepoint_position->GetInstruction()->DebugName(); + locations->SetRegisterBit(source.reg()); + } + break; + } + case Location::kFpuRegister: { + locations->AddLiveRegister(source); + break; + } + + case Location::kRegisterPair: + case Location::kFpuRegisterPair: { + locations->AddLiveRegister(source.ToLow()); + locations->AddLiveRegister(source.ToHigh()); + break; + } + case Location::kStackSlot: // Fall-through + case Location::kDoubleStackSlot: // Fall-through + case Location::kConstant: { + // Nothing to do. + break; + } + default: { + LOG(FATAL) << "Unexpected location for object"; + } + } + } + current = next_sibling; + } while (current != nullptr); + + if (kIsDebugBuild) { + // Following uses can only be synthesized uses. + while (use != nullptr) { + DCHECK(use->IsSynthesized()); + use = use->GetNext(); + } + } +} + +static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop( + HInstruction* instruction) { + return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() && + (instruction->IsConstant() || instruction->IsCurrentMethod()); +} + +void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval, + HBasicBlock* from, + HBasicBlock* to) const { + if (interval->GetNextSibling() == nullptr) { + // Nothing to connect. The whole range was allocated to the same location. + return; + } + + // Find the intervals that cover `from` and `to`. + size_t destination_position = to->GetLifetimeStart(); + size_t source_position = from->GetLifetimeEnd() - 1; + LiveInterval* destination = interval->GetSiblingAt(destination_position); + LiveInterval* source = interval->GetSiblingAt(source_position); + + if (destination == source) { + // Interval was not split. + return; + } + + LiveInterval* parent = interval->GetParent(); + HInstruction* defined_by = parent->GetDefinedBy(); + if (codegen_->GetGraph()->HasIrreducibleLoops() && + (destination == nullptr || !destination->CoversSlow(destination_position))) { + // Our live_in fixed point calculation has found that the instruction is live + // in the `to` block because it will eventually enter an irreducible loop. Our + // live interval computation however does not compute a fixed point, and + // therefore will not have a location for that instruction for `to`. + // Because the instruction is a constant or the ArtMethod, we don't need to + // do anything: it will be materialized in the irreducible loop. + DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by)) + << defined_by->DebugName() << ":" << defined_by->GetId() + << " " << from->GetBlockId() << " -> " << to->GetBlockId(); + return; + } + + if (!destination->HasRegister()) { + // Values are eagerly spilled. Spill slot already contains appropriate value. + return; + } + + Location location_source; + // `GetSiblingAt` returns the interval whose start and end cover `position`, + // but does not check whether the interval is inactive at that position. + // The only situation where the interval is inactive at that position is in the + // presence of irreducible loops for constants and ArtMethod. + if (codegen_->GetGraph()->HasIrreducibleLoops() && + (source == nullptr || !source->CoversSlow(source_position))) { + DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by)); + if (defined_by->IsConstant()) { + location_source = defined_by->GetLocations()->Out(); + } else { + DCHECK(defined_by->IsCurrentMethod()); + location_source = parent->NeedsTwoSpillSlots() + ? Location::DoubleStackSlot(parent->GetSpillSlot()) + : Location::StackSlot(parent->GetSpillSlot()); + } + } else { + DCHECK(source != nullptr); + DCHECK(source->CoversSlow(source_position)); + DCHECK(destination->CoversSlow(destination_position)); + location_source = source->ToLocation(); + } + + // If `from` has only one successor, we can put the moves at the exit of it. Otherwise + // we need to put the moves at the entry of `to`. + if (from->GetNormalSuccessors().size() == 1) { + InsertParallelMoveAtExitOf(from, + defined_by, + location_source, + destination->ToLocation()); + } else { + DCHECK_EQ(to->GetPredecessors().size(), 1u); + InsertParallelMoveAtEntryOf(to, + defined_by, + location_source, + destination->ToLocation()); + } +} + +static bool IsValidDestination(Location destination) { + return destination.IsRegister() + || destination.IsRegisterPair() + || destination.IsFpuRegister() + || destination.IsFpuRegisterPair() + || destination.IsStackSlot() + || destination.IsDoubleStackSlot(); +} + +void RegisterAllocationResolver::AddMove(HParallelMove* move, + Location source, + Location destination, + HInstruction* instruction, + Primitive::Type type) const { + if (type == Primitive::kPrimLong + && codegen_->ShouldSplitLongMoves() + // The parallel move resolver knows how to deal with long constants. + && !source.IsConstant()) { + move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction); + move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr); + } else { + move->AddMove(source, destination, type, instruction); + } +} + +void RegisterAllocationResolver::AddInputMoveFor(HInstruction* input, + HInstruction* user, + Location source, + Location destination) const { + if (source.Equals(destination)) return; + + DCHECK(!user->IsPhi()); + + HInstruction* previous = user->GetPrevious(); + HParallelMove* move = nullptr; + if (previous == nullptr + || !previous->IsParallelMove() + || previous->GetLifetimePosition() < user->GetLifetimePosition()) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(user->GetLifetimePosition()); + user->GetBlock()->InsertInstructionBefore(move, user); + } else { + move = previous->AsParallelMove(); + } + DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition()); + AddMove(move, source, destination, nullptr, input->GetType()); +} + +static bool IsInstructionStart(size_t position) { + return (position & 1) == 0; +} + +static bool IsInstructionEnd(size_t position) { + return (position & 1) == 1; +} + +void RegisterAllocationResolver::InsertParallelMoveAt(size_t position, + HInstruction* instruction, + Location source, + Location destination) const { + DCHECK(IsValidDestination(destination)) << destination; + if (source.Equals(destination)) return; + + HInstruction* at = liveness_.GetInstructionFromPosition(position / 2); + HParallelMove* move; + if (at == nullptr) { + if (IsInstructionStart(position)) { + // Block boundary, don't do anything the connection of split siblings will handle it. + return; + } else { + // Move must happen before the first instruction of the block. + at = liveness_.GetInstructionFromPosition((position + 1) / 2); + // Note that parallel moves may have already been inserted, so we explicitly + // ask for the first instruction of the block: `GetInstructionFromPosition` does + // not contain the `HParallelMove` instructions. + at = at->GetBlock()->GetFirstInstruction(); + + if (at->GetLifetimePosition() < position) { + // We may insert moves for split siblings and phi spills at the beginning of the block. + // Since this is a different lifetime position, we need to go to the next instruction. + DCHECK(at->IsParallelMove()); + at = at->GetNext(); + } + + if (at->GetLifetimePosition() != position) { + DCHECK_GT(at->GetLifetimePosition(), position); + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + at->GetBlock()->InsertInstructionBefore(move, at); + } else { + DCHECK(at->IsParallelMove()); + move = at->AsParallelMove(); + } + } + } else if (IsInstructionEnd(position)) { + // Move must happen after the instruction. + DCHECK(!at->IsControlFlow()); + move = at->GetNext()->AsParallelMove(); + // This is a parallel move for connecting siblings in a same block. We need to + // differentiate it with moves for connecting blocks, and input moves. + if (move == nullptr || move->GetLifetimePosition() > position) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + at->GetBlock()->InsertInstructionBefore(move, at->GetNext()); + } + } else { + // Move must happen before the instruction. + HInstruction* previous = at->GetPrevious(); + if (previous == nullptr + || !previous->IsParallelMove() + || previous->GetLifetimePosition() != position) { + // If the previous is a parallel move, then its position must be lower + // than the given `position`: it was added just after the non-parallel + // move instruction that precedes `instruction`. + DCHECK(previous == nullptr + || !previous->IsParallelMove() + || previous->GetLifetimePosition() < position); + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + at->GetBlock()->InsertInstructionBefore(move, at); + } else { + move = previous->AsParallelMove(); + } + } + DCHECK_EQ(move->GetLifetimePosition(), position); + AddMove(move, source, destination, instruction, instruction->GetType()); +} + +void RegisterAllocationResolver::InsertParallelMoveAtExitOf(HBasicBlock* block, + HInstruction* instruction, + Location source, + Location destination) const { + DCHECK(IsValidDestination(destination)) << destination; + if (source.Equals(destination)) return; + + DCHECK_EQ(block->GetNormalSuccessors().size(), 1u); + HInstruction* last = block->GetLastInstruction(); + // We insert moves at exit for phi predecessors and connecting blocks. + // A block ending with an if or a packed switch cannot branch to a block + // with phis because we do not allow critical edges. It can also not connect + // a split interval between two blocks: the move has to happen in the successor. + DCHECK(!last->IsIf() && !last->IsPackedSwitch()); + HInstruction* previous = last->GetPrevious(); + HParallelMove* move; + // This is a parallel move for connecting blocks. We need to differentiate + // it with moves for connecting siblings in a same block, and output moves. + size_t position = last->GetLifetimePosition(); + if (previous == nullptr || !previous->IsParallelMove() + || previous->AsParallelMove()->GetLifetimePosition() != position) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + block->InsertInstructionBefore(move, last); + } else { + move = previous->AsParallelMove(); + } + AddMove(move, source, destination, instruction, instruction->GetType()); +} + +void RegisterAllocationResolver::InsertParallelMoveAtEntryOf(HBasicBlock* block, + HInstruction* instruction, + Location source, + Location destination) const { + DCHECK(IsValidDestination(destination)) << destination; + if (source.Equals(destination)) return; + + HInstruction* first = block->GetFirstInstruction(); + HParallelMove* move = first->AsParallelMove(); + size_t position = block->GetLifetimeStart(); + // This is a parallel move for connecting blocks. We need to differentiate + // it with moves for connecting siblings in a same block, and input moves. + if (move == nullptr || move->GetLifetimePosition() != position) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + block->InsertInstructionBefore(move, first); + } + AddMove(move, source, destination, instruction, instruction->GetType()); +} + +void RegisterAllocationResolver::InsertMoveAfter(HInstruction* instruction, + Location source, + Location destination) const { + DCHECK(IsValidDestination(destination)) << destination; + if (source.Equals(destination)) return; + + if (instruction->IsPhi()) { + InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination); + return; + } + + size_t position = instruction->GetLifetimePosition() + 1; + HParallelMove* move = instruction->GetNext()->AsParallelMove(); + // This is a parallel move for moving the output of an instruction. We need + // to differentiate with input moves, moves for connecting siblings in a + // and moves for connecting blocks. + if (move == nullptr || move->GetLifetimePosition() != position) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext()); + } + AddMove(move, source, destination, instruction, instruction->GetType()); +} + +} // namespace art diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h new file mode 100644 index 0000000000..6ceb9bc955 --- /dev/null +++ b/compiler/optimizing/register_allocation_resolver.h @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_ +#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_ + +#include "base/arena_containers.h" +#include "base/value_object.h" +#include "primitive.h" + +namespace art { + +class ArenaAllocator; +class CodeGenerator; +class HBasicBlock; +class HInstruction; +class HParallelMove; +class LiveInterval; +class Location; +class SsaLivenessAnalysis; + +/** + * Reconciles the locations assigned to live intervals with the location + * summary of each instruction, and inserts moves to resolve split intervals, + * nonlinear control flow, and phi inputs. + */ +class RegisterAllocationResolver : ValueObject { + public: + RegisterAllocationResolver(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& liveness); + + void Resolve(size_t max_safepoint_live_core_regs, + size_t max_safepoint_live_fp_regs, + size_t reserved_out_slots, // Includes slot(s) for the art method. + size_t int_spill_slots, + size_t long_spill_slots, + size_t float_spill_slots, + size_t double_spill_slots, + size_t catch_phi_spill_slots, + const ArenaVector<LiveInterval*>& temp_intervals); + + private: + // Connect adjacent siblings within blocks, and resolve inputs along the way. + // Uses max_safepoint_live_regs to check that we did not underestimate the + // number of live registers at safepoints. + void ConnectSiblings(LiveInterval* interval, size_t max_safepoint_live_regs); + + // Connect siblings between block entries and exits. + void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const; + + // Helper methods for inserting parallel moves in the graph. + void InsertParallelMoveAtExitOf(HBasicBlock* block, + HInstruction* instruction, + Location source, + Location destination) const; + void InsertParallelMoveAtEntryOf(HBasicBlock* block, + HInstruction* instruction, + Location source, + Location destination) const; + void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const; + void AddInputMoveFor(HInstruction* input, + HInstruction* user, + Location source, + Location destination) const; + void InsertParallelMoveAt(size_t position, + HInstruction* instruction, + Location source, + Location destination) const; + void AddMove(HParallelMove* move, + Location source, + Location destination, + HInstruction* instruction, + Primitive::Type type) const; + + ArenaAllocator* const allocator_; + CodeGenerator* const codegen_; + const SsaLivenessAnalysis& liveness_; + + DISALLOW_COPY_AND_ASSIGN(RegisterAllocationResolver); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_ diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 1b33408b7e..2367ce1aeb 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 The Android Open Source Project + * Copyright (C) 2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,65 +21,30 @@ #include "base/bit_vector-inl.h" #include "code_generator.h" +#include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" -namespace art { - -static constexpr size_t kMaxLifetimePosition = -1; -static constexpr size_t kDefaultNumberOfSpillSlots = 4; -// For simplicity, we implement register pairs as (reg, reg + 1). -// Note that this is a requirement for double registers on ARM, since we -// allocate SRegister. -static int GetHighForLowRegister(int reg) { return reg + 1; } -static bool IsLowRegister(int reg) { return (reg & 1) == 0; } -static bool IsLowOfUnalignedPairInterval(LiveInterval* low) { - return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister(); -} +namespace art { RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& liveness) - : allocator_(allocator), - codegen_(codegen), - liveness_(liveness), - unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - unhandled_(nullptr), - handled_(allocator->Adapter(kArenaAllocRegisterAllocator)), - active_(allocator->Adapter(kArenaAllocRegisterAllocator)), - inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), - long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), - float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), - double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), - catch_phi_spill_slots_(0), - safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), - processing_core_registers_(false), - number_of_registers_(-1), - registers_array_(nullptr), - blocked_core_registers_(codegen->GetBlockedCoreRegisters()), - blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()), - reserved_out_slots_(0), - maximum_number_of_live_core_registers_(0), - maximum_number_of_live_fp_registers_(0) { - temp_intervals_.reserve(4); - int_spill_slots_.reserve(kDefaultNumberOfSpillSlots); - long_spill_slots_.reserve(kDefaultNumberOfSpillSlots); - float_spill_slots_.reserve(kDefaultNumberOfSpillSlots); - double_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + : allocator_(allocator), + codegen_(codegen), + liveness_(liveness) {} - codegen->SetupBlockedRegisters(); - physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr); - physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr); - // Always reserve for the current method and the graph's max out registers. - // TODO: compute it instead. - // ArtMethod* takes 2 vregs for 64 bits. - reserved_out_slots_ = InstructionSetPointerSize(codegen->GetInstructionSet()) / kVRegSize + - codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); +RegisterAllocator* RegisterAllocator::Create(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& analysis, + Strategy strategy) { + switch (strategy) { + case kRegisterAllocatorLinearScan: + return new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis); + default: + LOG(FATAL) << "Invalid register allocation strategy: " << strategy; + UNREACHABLE(); + } } bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED, @@ -93,328 +58,6 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UN || instruction_set == kX86_64; } -static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) { - if (interval == nullptr) return false; - bool is_core_register = (interval->GetType() != Primitive::kPrimDouble) - && (interval->GetType() != Primitive::kPrimFloat); - return processing_core_registers == is_core_register; -} - -void RegisterAllocator::AllocateRegisters() { - AllocateRegistersInternal(); - Resolve(); - - if (kIsDebugBuild) { - processing_core_registers_ = true; - ValidateInternal(true); - processing_core_registers_ = false; - ValidateInternal(true); - // Check that the linear order is still correct with regards to lifetime positions. - // Since only parallel moves have been inserted during the register allocation, - // these checks are mostly for making sure these moves have been added correctly. - size_t current_liveness = 0; - for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { - HInstruction* instruction = inst_it.Current(); - DCHECK_LE(current_liveness, instruction->GetLifetimePosition()); - current_liveness = instruction->GetLifetimePosition(); - } - for (HInstructionIterator inst_it(block->GetInstructions()); - !inst_it.Done(); - inst_it.Advance()) { - HInstruction* instruction = inst_it.Current(); - DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName(); - current_liveness = instruction->GetLifetimePosition(); - } - } - } -} - -void RegisterAllocator::BlockRegister(Location location, size_t start, size_t end) { - int reg = location.reg(); - DCHECK(location.IsRegister() || location.IsFpuRegister()); - LiveInterval* interval = location.IsRegister() - ? physical_core_register_intervals_[reg] - : physical_fp_register_intervals_[reg]; - Primitive::Type type = location.IsRegister() - ? Primitive::kPrimInt - : Primitive::kPrimFloat; - if (interval == nullptr) { - interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); - if (location.IsRegister()) { - physical_core_register_intervals_[reg] = interval; - } else { - physical_fp_register_intervals_[reg] = interval; - } - } - DCHECK(interval->GetRegister() == reg); - interval->AddRange(start, end); -} - -void RegisterAllocator::BlockRegisters(size_t start, size_t end, bool caller_save_only) { - for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { - if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) { - BlockRegister(Location::RegisterLocation(i), start, end); - } - } - for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { - if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) { - BlockRegister(Location::FpuRegisterLocation(i), start, end); - } - } -} - -void RegisterAllocator::AllocateRegistersInternal() { - // Iterate post-order, to ensure the list is sorted, and the last added interval - // is the one with the lowest start position. - for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done(); - back_it.Advance()) { - ProcessInstruction(back_it.Current()); - } - for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { - ProcessInstruction(inst_it.Current()); - } - - if (block->IsCatchBlock() || - (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { - // By blocking all registers at the top of each catch block or irreducible loop, we force - // intervals belonging to the live-in set of the catch/header block to be spilled. - // TODO(ngeoffray): Phis in this block could be allocated in register. - size_t position = block->GetLifetimeStart(); - BlockRegisters(position, position + 1); - } - } - - number_of_registers_ = codegen_->GetNumberOfCoreRegisters(); - registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_, - kArenaAllocRegisterAllocator); - processing_core_registers_ = true; - unhandled_ = &unhandled_core_intervals_; - for (LiveInterval* fixed : physical_core_register_intervals_) { - if (fixed != nullptr) { - // Fixed interval is added to inactive_ instead of unhandled_. - // It's also the only type of inactive interval whose start position - // can be after the current interval during linear scan. - // Fixed interval is never split and never moves to unhandled_. - inactive_.push_back(fixed); - } - } - LinearScan(); - - inactive_.clear(); - active_.clear(); - handled_.clear(); - - number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters(); - registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_, - kArenaAllocRegisterAllocator); - processing_core_registers_ = false; - unhandled_ = &unhandled_fp_intervals_; - for (LiveInterval* fixed : physical_fp_register_intervals_) { - if (fixed != nullptr) { - // Fixed interval is added to inactive_ instead of unhandled_. - // It's also the only type of inactive interval whose start position - // can be after the current interval during linear scan. - // Fixed interval is never split and never moves to unhandled_. - inactive_.push_back(fixed); - } - } - LinearScan(); -} - -void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - size_t position = instruction->GetLifetimePosition(); - - if (locations == nullptr) return; - - // Create synthesized intervals for temporaries. - for (size_t i = 0; i < locations->GetTempCount(); ++i) { - Location temp = locations->GetTemp(i); - if (temp.IsRegister() || temp.IsFpuRegister()) { - BlockRegister(temp, position, position + 1); - // Ensure that an explicit temporary register is marked as being allocated. - codegen_->AddAllocatedRegister(temp); - } else { - DCHECK(temp.IsUnallocated()); - switch (temp.GetPolicy()) { - case Location::kRequiresRegister: { - LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); - temp_intervals_.push_back(interval); - interval->AddTempUse(instruction, i); - unhandled_core_intervals_.push_back(interval); - break; - } - - case Location::kRequiresFpuRegister: { - LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); - temp_intervals_.push_back(interval); - interval->AddTempUse(instruction, i); - if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { - interval->AddHighInterval(/* is_temp */ true); - LiveInterval* high = interval->GetHighInterval(); - temp_intervals_.push_back(high); - unhandled_fp_intervals_.push_back(high); - } - unhandled_fp_intervals_.push_back(interval); - break; - } - - default: - LOG(FATAL) << "Unexpected policy for temporary location " - << temp.GetPolicy(); - } - } - } - - bool core_register = (instruction->GetType() != Primitive::kPrimDouble) - && (instruction->GetType() != Primitive::kPrimFloat); - - if (locations->NeedsSafepoint()) { - if (codegen_->IsLeafMethod()) { - // TODO: We do this here because we do not want the suspend check to artificially - // create live registers. We should find another place, but this is currently the - // simplest. - DCHECK(instruction->IsSuspendCheckEntry()); - instruction->GetBlock()->RemoveInstruction(instruction); - return; - } - safepoints_.push_back(instruction); - if (locations->OnlyCallsOnSlowPath()) { - // We add a synthesized range at this position to record the live registers - // at this position. Ideally, we could just update the safepoints when locations - // are updated, but we currently need to know the full stack size before updating - // locations (because of parameters and the fact that we don't have a frame pointer). - // And knowing the full stack size requires to know the maximum number of live - // registers at calls in slow paths. - // By adding the following interval in the algorithm, we can compute this - // maximum before updating locations. - LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); - interval->AddRange(position, position + 1); - AddSorted(&unhandled_core_intervals_, interval); - AddSorted(&unhandled_fp_intervals_, interval); - } - } - - if (locations->WillCall()) { - BlockRegisters(position, position + 1, /* caller_save_only */ true); - } - - for (size_t i = 0; i < locations->GetInputCount(); ++i) { - Location input = locations->InAt(i); - if (input.IsRegister() || input.IsFpuRegister()) { - BlockRegister(input, position, position + 1); - } else if (input.IsPair()) { - BlockRegister(input.ToLow(), position, position + 1); - BlockRegister(input.ToHigh(), position, position + 1); - } - } - - LiveInterval* current = instruction->GetLiveInterval(); - if (current == nullptr) return; - - ArenaVector<LiveInterval*>& unhandled = core_register - ? unhandled_core_intervals_ - : unhandled_fp_intervals_; - - DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back())); - - if (codegen_->NeedsTwoRegisters(current->GetType())) { - current->AddHighInterval(); - } - - for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) { - HInstruction* safepoint = safepoints_[safepoint_index - 1u]; - size_t safepoint_position = safepoint->GetLifetimePosition(); - - // Test that safepoints are ordered in the optimal way. - DCHECK(safepoint_index == safepoints_.size() || - safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position); - - if (safepoint_position == current->GetStart()) { - // The safepoint is for this instruction, so the location of the instruction - // does not need to be saved. - DCHECK_EQ(safepoint_index, safepoints_.size()); - DCHECK_EQ(safepoint, instruction); - continue; - } else if (current->IsDeadAt(safepoint_position)) { - break; - } else if (!current->Covers(safepoint_position)) { - // Hole in the interval. - continue; - } - current->AddSafepoint(safepoint); - } - current->ResetSearchCache(); - - // Some instructions define their output in fixed register/stack slot. We need - // to ensure we know these locations before doing register allocation. For a - // given register, we create an interval that covers these locations. The register - // will be unavailable at these locations when trying to allocate one for an - // interval. - // - // The backwards walking ensures the ranges are ordered on increasing start positions. - Location output = locations->Out(); - if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) { - Location first = locations->InAt(0); - if (first.IsRegister() || first.IsFpuRegister()) { - current->SetFrom(position + 1); - current->SetRegister(first.reg()); - } else if (first.IsPair()) { - current->SetFrom(position + 1); - current->SetRegister(first.low()); - LiveInterval* high = current->GetHighInterval(); - high->SetRegister(first.high()); - high->SetFrom(position + 1); - } - } else if (output.IsRegister() || output.IsFpuRegister()) { - // Shift the interval's start by one to account for the blocked register. - current->SetFrom(position + 1); - current->SetRegister(output.reg()); - BlockRegister(output, position, position + 1); - } else if (output.IsPair()) { - current->SetFrom(position + 1); - current->SetRegister(output.low()); - LiveInterval* high = current->GetHighInterval(); - high->SetRegister(output.high()); - high->SetFrom(position + 1); - BlockRegister(output.ToLow(), position, position + 1); - BlockRegister(output.ToHigh(), position, position + 1); - } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) { - current->SetSpillSlot(output.GetStackIndex()); - } else { - DCHECK(output.IsUnallocated() || output.IsConstant()); - } - - if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) { - AllocateSpillSlotForCatchPhi(instruction->AsPhi()); - } - - // If needed, add interval to the list of unhandled intervals. - if (current->HasSpillSlot() || instruction->IsConstant()) { - // Split just before first register use. - size_t first_register_use = current->FirstRegisterUse(); - if (first_register_use != kNoLifetime) { - LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1); - // Don't add directly to `unhandled`, it needs to be sorted and the start - // of this new interval might be after intervals already in the list. - AddSorted(&unhandled, split); - } else { - // Nothing to do, we won't allocate a register for this value. - } - } else { - // Don't add directly to `unhandled`, temp or safepoint intervals - // for this instruction may have been added, and those can be - // processed first. - AddSorted(&unhandled, current); - } -} - class AllRangesIterator : public ValueObject { public: explicit AllRangesIterator(LiveInterval* interval) @@ -442,36 +85,6 @@ class AllRangesIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(AllRangesIterator); }; -bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { - // To simplify unit testing, we eagerly create the array of intervals, and - // call the helper method. - ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate)); - for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { - HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); - if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) { - intervals.push_back(instruction->GetLiveInterval()); - } - } - - const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_ - ? &physical_core_register_intervals_ - : &physical_fp_register_intervals_; - for (LiveInterval* fixed : *physical_register_intervals) { - if (fixed != nullptr) { - intervals.push_back(fixed); - } - } - - for (LiveInterval* temp : temp_intervals_) { - if (ShouldProcess(processing_core_registers_, temp)) { - intervals.push_back(temp); - } - } - - return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_, - allocator_, processing_core_registers_, log_fatal_on_failure); -} - bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals, size_t number_of_spill_slots, size_t number_of_out_slots, @@ -564,638 +177,30 @@ bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& inte return true; } -void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) const { - interval->Dump(stream); - stream << ": "; - if (interval->HasRegister()) { - if (interval->IsFloatingPoint()) { - codegen_->DumpFloatingPointRegister(stream, interval->GetRegister()); - } else { - codegen_->DumpCoreRegister(stream, interval->GetRegister()); - } - } else { - stream << "spilled"; - } - stream << std::endl; -} - -void RegisterAllocator::DumpAllIntervals(std::ostream& stream) const { - stream << "inactive: " << std::endl; - for (LiveInterval* inactive_interval : inactive_) { - DumpInterval(stream, inactive_interval); - } - stream << "active: " << std::endl; - for (LiveInterval* active_interval : active_) { - DumpInterval(stream, active_interval); - } - stream << "unhandled: " << std::endl; - auto unhandled = (unhandled_ != nullptr) ? - unhandled_ : &unhandled_core_intervals_; - for (LiveInterval* unhandled_interval : *unhandled) { - DumpInterval(stream, unhandled_interval); - } - stream << "handled: " << std::endl; - for (LiveInterval* handled_interval : handled_) { - DumpInterval(stream, handled_interval); - } -} - -// By the book implementation of a linear scan register allocator. -void RegisterAllocator::LinearScan() { - while (!unhandled_->empty()) { - // (1) Remove interval with the lowest start position from unhandled. - LiveInterval* current = unhandled_->back(); - unhandled_->pop_back(); - - // Make sure the interval is an expected state. - DCHECK(!current->IsFixed() && !current->HasSpillSlot()); - // Make sure we are going in the right order. - DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart()); - // Make sure a low interval is always with a high. - DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval()); - // Make sure a high interval is always with a low. - DCHECK(current->IsLowInterval() || - unhandled_->empty() || - !unhandled_->back()->IsHighInterval()); - - size_t position = current->GetStart(); - - // Remember the inactive_ size here since the ones moved to inactive_ from - // active_ below shouldn't need to be re-checked. - size_t inactive_intervals_to_handle = inactive_.size(); - - // (2) Remove currently active intervals that are dead at this position. - // Move active intervals that have a lifetime hole at this position - // to inactive. - auto active_kept_end = std::remove_if( - active_.begin(), - active_.end(), - [this, position](LiveInterval* interval) { - if (interval->IsDeadAt(position)) { - handled_.push_back(interval); - return true; - } else if (!interval->Covers(position)) { - inactive_.push_back(interval); - return true; - } else { - return false; // Keep this interval. - } - }); - active_.erase(active_kept_end, active_.end()); - - // (3) Remove currently inactive intervals that are dead at this position. - // Move inactive intervals that cover this position to active. - auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle; - auto inactive_kept_end = std::remove_if( - inactive_.begin(), - inactive_to_handle_end, - [this, position](LiveInterval* interval) { - DCHECK(interval->GetStart() < position || interval->IsFixed()); - if (interval->IsDeadAt(position)) { - handled_.push_back(interval); - return true; - } else if (interval->Covers(position)) { - active_.push_back(interval); - return true; - } else { - return false; // Keep this interval. - } - }); - inactive_.erase(inactive_kept_end, inactive_to_handle_end); - - if (current->IsSlowPathSafepoint()) { - // Synthesized interval to record the maximum number of live registers - // at safepoints. No need to allocate a register for it. - if (processing_core_registers_) { - maximum_number_of_live_core_registers_ = - std::max(maximum_number_of_live_core_registers_, active_.size()); - } else { - maximum_number_of_live_fp_registers_ = - std::max(maximum_number_of_live_fp_registers_, active_.size()); - } - DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart()); - continue; - } - - if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) { - DCHECK(!current->HasRegister()); - // Allocating the low part was unsucessful. The splitted interval for the high part - // will be handled next (it is in the `unhandled_` list). - continue; - } - - // (4) Try to find an available register. - bool success = TryAllocateFreeReg(current); - - // (5) If no register could be found, we need to spill. - if (!success) { - success = AllocateBlockedReg(current); - } - - // (6) If the interval had a register allocated, add it to the list of active - // intervals. - if (success) { - codegen_->AddAllocatedRegister(processing_core_registers_ - ? Location::RegisterLocation(current->GetRegister()) - : Location::FpuRegisterLocation(current->GetRegister())); - active_.push_back(current); - if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) { - current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister())); - } - } - } -} - -static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) { - DCHECK(!interval->IsHighInterval()); - // Note that the same instruction may occur multiple times in the input list, - // so `free_until` may have changed already. - // Since `position` is not the current scan position, we need to use CoversSlow. - if (interval->IsDeadAt(position)) { - // Set the register to be free. Note that inactive intervals might later - // update this. - free_until[interval->GetRegister()] = kMaxLifetimePosition; - if (interval->HasHighInterval()) { - DCHECK(interval->GetHighInterval()->IsDeadAt(position)); - free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition; - } - } else if (!interval->CoversSlow(position)) { - // The interval becomes inactive at `defined_by`. We make its register - // available only until the next use strictly after `defined_by`. - free_until[interval->GetRegister()] = interval->FirstUseAfter(position); +LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) { + DCHECK_GE(position, interval->GetStart()); + DCHECK(!interval->IsDeadAt(position)); + if (position == interval->GetStart()) { + // Spill slot will be allocated when handling `interval` again. + interval->ClearRegister(); if (interval->HasHighInterval()) { - DCHECK(!interval->GetHighInterval()->CoversSlow(position)); - free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()]; - } - } -} - -// Find a free register. If multiple are found, pick the register that -// is free the longest. -bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { - size_t* free_until = registers_array_; - - // First set all registers to be free. - for (size_t i = 0; i < number_of_registers_; ++i) { - free_until[i] = kMaxLifetimePosition; - } - - // For each active interval, set its register to not free. - for (LiveInterval* interval : active_) { - DCHECK(interval->HasRegister()); - free_until[interval->GetRegister()] = 0; - } - - // An interval that starts an instruction (that is, it is not split), may - // re-use the registers used by the inputs of that instruciton, based on the - // location summary. - HInstruction* defined_by = current->GetDefinedBy(); - if (defined_by != nullptr && !current->IsSplit()) { - LocationSummary* locations = defined_by->GetLocations(); - if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) { - HInputsRef inputs = defined_by->GetInputs(); - for (size_t i = 0; i < inputs.size(); ++i) { - // Take the last interval of the input. It is the location of that interval - // that will be used at `defined_by`. - LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling(); - // Note that interval may have not been processed yet. - // TODO: Handle non-split intervals last in the work list. - if (locations->InAt(i).IsValid() - && interval->HasRegister() - && interval->SameRegisterKind(*current)) { - // The input must be live until the end of `defined_by`, to comply to - // the linear scan algorithm. So we use `defined_by`'s end lifetime - // position to check whether the input is dead or is inactive after - // `defined_by`. - DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition())); - size_t position = defined_by->GetLifetimePosition() + 1; - FreeIfNotCoverAt(interval, position, free_until); - } - } - } - } - - // For each inactive interval, set its register to be free until - // the next intersection with `current`. - for (LiveInterval* inactive : inactive_) { - // Temp/Slow-path-safepoint interval has no holes. - DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); - if (!current->IsSplit() && !inactive->IsFixed()) { - // Neither current nor inactive are fixed. - // Thanks to SSA, a non-split interval starting in a hole of an - // inactive interval should never intersect with that inactive interval. - // Only if it's not fixed though, because fixed intervals don't come from SSA. - DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); - continue; - } - - DCHECK(inactive->HasRegister()); - if (free_until[inactive->GetRegister()] == 0) { - // Already used by some active interval. No need to intersect. - continue; - } - size_t next_intersection = inactive->FirstIntersectionWith(current); - if (next_intersection != kNoLifetime) { - free_until[inactive->GetRegister()] = - std::min(free_until[inactive->GetRegister()], next_intersection); - } - } - - int reg = kNoRegister; - if (current->HasRegister()) { - // Some instructions have a fixed register output. - reg = current->GetRegister(); - if (free_until[reg] == 0) { - DCHECK(current->IsHighInterval()); - // AllocateBlockedReg will spill the holder of the register. - return false; - } - } else { - DCHECK(!current->IsHighInterval()); - int hint = current->FindFirstRegisterHint(free_until, liveness_); - if ((hint != kNoRegister) - // For simplicity, if the hint we are getting for a pair cannot be used, - // we are just going to allocate a new pair. - && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) { - DCHECK(!IsBlocked(hint)); - reg = hint; - } else if (current->IsLowInterval()) { - reg = FindAvailableRegisterPair(free_until, current->GetStart()); - } else { - reg = FindAvailableRegister(free_until, current); - } - } - - DCHECK_NE(reg, kNoRegister); - // If we could not find a register, we need to spill. - if (free_until[reg] == 0) { - return false; - } - - if (current->IsLowInterval()) { - // If the high register of this interval is not available, we need to spill. - int high_reg = current->GetHighInterval()->GetRegister(); - if (high_reg == kNoRegister) { - high_reg = GetHighForLowRegister(reg); - } - if (free_until[high_reg] == 0) { - return false; - } - } - - current->SetRegister(reg); - if (!current->IsDeadAt(free_until[reg])) { - // If the register is only available for a subset of live ranges - // covered by `current`, split `current` before the position where - // the register is not available anymore. - LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]); - DCHECK(split != nullptr); - AddSorted(unhandled_, split); - } - return true; -} - -bool RegisterAllocator::IsBlocked(int reg) const { - return processing_core_registers_ - ? blocked_core_registers_[reg] - : blocked_fp_registers_[reg]; -} - -int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const { - int reg = kNoRegister; - // Pick the register pair that is used the last. - for (size_t i = 0; i < number_of_registers_; ++i) { - if (IsBlocked(i)) continue; - if (!IsLowRegister(i)) continue; - int high_register = GetHighForLowRegister(i); - if (IsBlocked(high_register)) continue; - int existing_high_register = GetHighForLowRegister(reg); - if ((reg == kNoRegister) || (next_use[i] >= next_use[reg] - && next_use[high_register] >= next_use[existing_high_register])) { - reg = i; - if (next_use[i] == kMaxLifetimePosition - && next_use[high_register] == kMaxLifetimePosition) { - break; - } - } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) { - // If one of the current register is known to be unavailable, just unconditionally - // try a new one. - reg = i; - } - } - return reg; -} - -bool RegisterAllocator::IsCallerSaveRegister(int reg) const { - return processing_core_registers_ - ? !codegen_->IsCoreCalleeSaveRegister(reg) - : !codegen_->IsFloatingPointCalleeSaveRegister(reg); -} - -int RegisterAllocator::FindAvailableRegister(size_t* next_use, LiveInterval* current) const { - // We special case intervals that do not span a safepoint to try to find a caller-save - // register if one is available. We iterate from 0 to the number of registers, - // so if there are caller-save registers available at the end, we continue the iteration. - bool prefers_caller_save = !current->HasWillCallSafepoint(); - int reg = kNoRegister; - for (size_t i = 0; i < number_of_registers_; ++i) { - if (IsBlocked(i)) { - // Register cannot be used. Continue. - continue; - } - - // Best case: we found a register fully available. - if (next_use[i] == kMaxLifetimePosition) { - if (prefers_caller_save && !IsCallerSaveRegister(i)) { - // We can get shorter encodings on some platforms by using - // small register numbers. So only update the candidate if the previous - // one was not available for the whole method. - if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) { - reg = i; - } - // Continue the iteration in the hope of finding a caller save register. - continue; - } else { - reg = i; - // We know the register is good enough. Return it. - break; - } - } - - // If we had no register before, take this one as a reference. - if (reg == kNoRegister) { - reg = i; - continue; - } - - // Pick the register that is used the last. - if (next_use[i] > next_use[reg]) { - reg = i; - continue; - } - } - return reg; -} - -// Remove interval and its other half if any. Return iterator to the following element. -static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf( - ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) { - DCHECK(intervals->begin() <= pos && pos < intervals->end()); - LiveInterval* interval = *pos; - if (interval->IsLowInterval()) { - DCHECK(pos + 1 < intervals->end()); - DCHECK_EQ(*(pos + 1), interval->GetHighInterval()); - return intervals->erase(pos, pos + 2); - } else if (interval->IsHighInterval()) { - DCHECK(intervals->begin() < pos); - DCHECK_EQ(*(pos - 1), interval->GetLowInterval()); - return intervals->erase(pos - 1, pos + 1); - } else { - return intervals->erase(pos); - } -} - -bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, - size_t first_register_use, - size_t* next_use) { - for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { - LiveInterval* active = *it; - DCHECK(active->HasRegister()); - if (active->IsFixed()) continue; - if (active->IsHighInterval()) continue; - if (first_register_use > next_use[active->GetRegister()]) continue; - - // Split the first interval found that is either: - // 1) A non-pair interval. - // 2) A pair interval whose high is not low + 1. - // 3) A pair interval whose low is not even. - if (!active->IsLowInterval() || - IsLowOfUnalignedPairInterval(active) || - !IsLowRegister(active->GetRegister())) { - LiveInterval* split = Split(active, position); - if (split != active) { - handled_.push_back(active); - } - RemoveIntervalAndPotentialOtherHalf(&active_, it); - AddSorted(unhandled_, split); - return true; - } - } - return false; -} - -// Find the register that is used the last, and spill the interval -// that holds it. If the first use of `current` is after that register -// we spill `current` instead. -bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { - size_t first_register_use = current->FirstRegisterUse(); - if (current->HasRegister()) { - DCHECK(current->IsHighInterval()); - // The low interval has allocated the register for the high interval. In - // case the low interval had to split both intervals, we may end up in a - // situation where the high interval does not have a register use anymore. - // We must still proceed in order to split currently active and inactive - // uses of the high interval's register, and put the high interval in the - // active set. - DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr)); - } else if (first_register_use == kNoLifetime) { - AllocateSpillSlotFor(current); - return false; - } - - // First set all registers as not being used. - size_t* next_use = registers_array_; - for (size_t i = 0; i < number_of_registers_; ++i) { - next_use[i] = kMaxLifetimePosition; - } - - // For each active interval, find the next use of its register after the - // start of current. - for (LiveInterval* active : active_) { - DCHECK(active->HasRegister()); - if (active->IsFixed()) { - next_use[active->GetRegister()] = current->GetStart(); - } else { - size_t use = active->FirstRegisterUseAfter(current->GetStart()); - if (use != kNoLifetime) { - next_use[active->GetRegister()] = use; - } - } - } - - // For each inactive interval, find the next use of its register after the - // start of current. - for (LiveInterval* inactive : inactive_) { - // Temp/Slow-path-safepoint interval has no holes. - DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); - if (!current->IsSplit() && !inactive->IsFixed()) { - // Neither current nor inactive are fixed. - // Thanks to SSA, a non-split interval starting in a hole of an - // inactive interval should never intersect with that inactive interval. - // Only if it's not fixed though, because fixed intervals don't come from SSA. - DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); - continue; - } - DCHECK(inactive->HasRegister()); - size_t next_intersection = inactive->FirstIntersectionWith(current); - if (next_intersection != kNoLifetime) { - if (inactive->IsFixed()) { - next_use[inactive->GetRegister()] = - std::min(next_intersection, next_use[inactive->GetRegister()]); - } else { - size_t use = inactive->FirstUseAfter(current->GetStart()); - if (use != kNoLifetime) { - next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]); - } - } - } - } - - int reg = kNoRegister; - bool should_spill = false; - if (current->HasRegister()) { - DCHECK(current->IsHighInterval()); - reg = current->GetRegister(); - // When allocating the low part, we made sure the high register was available. - DCHECK_LT(first_register_use, next_use[reg]); - } else if (current->IsLowInterval()) { - reg = FindAvailableRegisterPair(next_use, first_register_use); - // We should spill if both registers are not available. - should_spill = (first_register_use >= next_use[reg]) - || (first_register_use >= next_use[GetHighForLowRegister(reg)]); - } else { - DCHECK(!current->IsHighInterval()); - reg = FindAvailableRegister(next_use, current); - should_spill = (first_register_use >= next_use[reg]); - } - - DCHECK_NE(reg, kNoRegister); - if (should_spill) { - DCHECK(!current->IsHighInterval()); - bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1)); - if (is_allocation_at_use_site) { - if (!current->IsLowInterval()) { - DumpInterval(std::cerr, current); - DumpAllIntervals(std::cerr); - // This situation has the potential to infinite loop, so we make it a non-debug CHECK. - HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2); - CHECK(false) << "There is not enough registers available for " - << current->GetParent()->GetDefinedBy()->DebugName() << " " - << current->GetParent()->GetDefinedBy()->GetId() - << " at " << first_register_use - 1 << " " - << (at == nullptr ? "" : at->DebugName()); - } - - // If we're allocating a register for `current` because the instruction at - // that position requires it, but we think we should spill, then there are - // non-pair intervals or unaligned pair intervals blocking the allocation. - // We split the first interval found, and put ourselves first in the - // `unhandled_` list. - bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(), - first_register_use, - next_use); - DCHECK(success); - LiveInterval* existing = unhandled_->back(); - DCHECK(existing->IsHighInterval()); - DCHECK_EQ(existing->GetLowInterval(), current); - unhandled_->push_back(current); - } else { - // If the first use of that instruction is after the last use of the found - // register, we split this interval just before its first register use. - AllocateSpillSlotFor(current); - LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1); - DCHECK(current != split); - AddSorted(unhandled_, split); + interval->GetHighInterval()->ClearRegister(); + } else if (interval->HasLowInterval()) { + interval->GetLowInterval()->ClearRegister(); } - return false; + return interval; } else { - // Use this register and spill the active and inactives interval that - // have that register. - current->SetRegister(reg); - - for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { - LiveInterval* active = *it; - if (active->GetRegister() == reg) { - DCHECK(!active->IsFixed()); - LiveInterval* split = Split(active, current->GetStart()); - if (split != active) { - handled_.push_back(active); - } - RemoveIntervalAndPotentialOtherHalf(&active_, it); - AddSorted(unhandled_, split); - break; - } - } - - // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body. - for (auto it = inactive_.begin(); it != inactive_.end(); ) { - LiveInterval* inactive = *it; - bool erased = false; - if (inactive->GetRegister() == reg) { - if (!current->IsSplit() && !inactive->IsFixed()) { - // Neither current nor inactive are fixed. - // Thanks to SSA, a non-split interval starting in a hole of an - // inactive interval should never intersect with that inactive interval. - // Only if it's not fixed though, because fixed intervals don't come from SSA. - DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); - } else { - size_t next_intersection = inactive->FirstIntersectionWith(current); - if (next_intersection != kNoLifetime) { - if (inactive->IsFixed()) { - LiveInterval* split = Split(current, next_intersection); - DCHECK_NE(split, current); - AddSorted(unhandled_, split); - } else { - // Split at the start of `current`, which will lead to splitting - // at the end of the lifetime hole of `inactive`. - LiveInterval* split = Split(inactive, current->GetStart()); - // If it's inactive, it must start before the current interval. - DCHECK_NE(split, inactive); - it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it); - erased = true; - handled_.push_back(inactive); - AddSorted(unhandled_, split); - } - } - } - } - // If we have erased the element, `it` already points to the next element. - // Otherwise we need to move to the next element. - if (!erased) { - ++it; - } - } - - return true; - } -} - -void RegisterAllocator::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) { - DCHECK(!interval->IsFixed() && !interval->HasSpillSlot()); - size_t insert_at = 0; - for (size_t i = array->size(); i > 0; --i) { - LiveInterval* current = (*array)[i - 1u]; - // High intervals must be processed right after their low equivalent. - if (current->StartsAfter(interval) && !current->IsHighInterval()) { - insert_at = i; - break; - } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) { - // Ensure the slow path interval is the last to be processed at its location: we want the - // interval to know all live registers at this location. - DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current)); - insert_at = i; - break; + LiveInterval* new_interval = interval->SplitAt(position); + if (interval->HasHighInterval()) { + LiveInterval* high = interval->GetHighInterval()->SplitAt(position); + new_interval->SetHighInterval(high); + high->SetLowInterval(new_interval); + } else if (interval->HasLowInterval()) { + LiveInterval* low = interval->GetLowInterval()->SplitAt(position); + new_interval->SetLowInterval(low); + low->SetHighInterval(new_interval); } - } - - // Insert the high interval before the low, to ensure the low is processed before. - auto insert_pos = array->begin() + insert_at; - if (interval->HasHighInterval()) { - array->insert(insert_pos, { interval->GetHighInterval(), interval }); - } else if (interval->HasLowInterval()) { - array->insert(insert_pos, { interval, interval->GetLowInterval() }); - } else { - array->insert(insert_pos, interval); + return new_interval; } } @@ -1258,754 +263,4 @@ LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t fro return Split(interval, block_to->GetLifetimeStart()); } -LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) { - DCHECK_GE(position, interval->GetStart()); - DCHECK(!interval->IsDeadAt(position)); - if (position == interval->GetStart()) { - // Spill slot will be allocated when handling `interval` again. - interval->ClearRegister(); - if (interval->HasHighInterval()) { - interval->GetHighInterval()->ClearRegister(); - } else if (interval->HasLowInterval()) { - interval->GetLowInterval()->ClearRegister(); - } - return interval; - } else { - LiveInterval* new_interval = interval->SplitAt(position); - if (interval->HasHighInterval()) { - LiveInterval* high = interval->GetHighInterval()->SplitAt(position); - new_interval->SetHighInterval(high); - high->SetLowInterval(new_interval); - } else if (interval->HasLowInterval()) { - LiveInterval* low = interval->GetLowInterval()->SplitAt(position); - new_interval->SetLowInterval(low); - low->SetHighInterval(new_interval); - } - return new_interval; - } -} - -void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { - if (interval->IsHighInterval()) { - // The low interval already took care of allocating the spill slot. - DCHECK(!interval->GetLowInterval()->HasRegister()); - DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot()); - return; - } - - LiveInterval* parent = interval->GetParent(); - - // An instruction gets a spill slot for its entire lifetime. If the parent - // of this interval already has a spill slot, there is nothing to do. - if (parent->HasSpillSlot()) { - return; - } - - HInstruction* defined_by = parent->GetDefinedBy(); - DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi()); - - if (defined_by->IsParameterValue()) { - // Parameters have their own stack slot. - parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue())); - return; - } - - if (defined_by->IsCurrentMethod()) { - parent->SetSpillSlot(0); - return; - } - - if (defined_by->IsConstant()) { - // Constants don't need a spill slot. - return; - } - - ArenaVector<size_t>* spill_slots = nullptr; - switch (interval->GetType()) { - case Primitive::kPrimDouble: - spill_slots = &double_spill_slots_; - break; - case Primitive::kPrimLong: - spill_slots = &long_spill_slots_; - break; - case Primitive::kPrimFloat: - spill_slots = &float_spill_slots_; - break; - case Primitive::kPrimNot: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimShort: - spill_slots = &int_spill_slots_; - break; - case Primitive::kPrimVoid: - LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); - } - - // Find an available spill slot. - size_t slot = 0; - for (size_t e = spill_slots->size(); slot < e; ++slot) { - if ((*spill_slots)[slot] <= parent->GetStart()) { - if (!parent->NeedsTwoSpillSlots()) { - // One spill slot is sufficient. - break; - } - if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) { - // Two spill slots are available. - break; - } - } - } - - size_t end = interval->GetLastSibling()->GetEnd(); - if (parent->NeedsTwoSpillSlots()) { - if (slot + 2u > spill_slots->size()) { - // We need a new spill slot. - spill_slots->resize(slot + 2u, end); - } - (*spill_slots)[slot] = end; - (*spill_slots)[slot + 1] = end; - } else { - if (slot == spill_slots->size()) { - // We need a new spill slot. - spill_slots->push_back(end); - } else { - (*spill_slots)[slot] = end; - } - } - - // Note that the exact spill slot location will be computed when we resolve, - // that is when we know the number of spill slots for each type. - parent->SetSpillSlot(slot); -} - -static bool IsValidDestination(Location destination) { - return destination.IsRegister() - || destination.IsRegisterPair() - || destination.IsFpuRegister() - || destination.IsFpuRegisterPair() - || destination.IsStackSlot() - || destination.IsDoubleStackSlot(); -} - -void RegisterAllocator::AllocateSpillSlotForCatchPhi(HPhi* phi) { - LiveInterval* interval = phi->GetLiveInterval(); - - HInstruction* previous_phi = phi->GetPrevious(); - DCHECK(previous_phi == nullptr || - previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber()) - << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent."; - - if (phi->IsVRegEquivalentOf(previous_phi)) { - // This is an equivalent of the previous phi. We need to assign the same - // catch phi slot. - DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot()); - interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); - } else { - // Allocate a new spill slot for this catch phi. - // TODO: Reuse spill slots when intervals of phis from different catch - // blocks do not overlap. - interval->SetSpillSlot(catch_phi_spill_slots_); - catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1; - } -} - -void RegisterAllocator::AddMove(HParallelMove* move, - Location source, - Location destination, - HInstruction* instruction, - Primitive::Type type) const { - if (type == Primitive::kPrimLong - && codegen_->ShouldSplitLongMoves() - // The parallel move resolver knows how to deal with long constants. - && !source.IsConstant()) { - move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction); - move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr); - } else { - move->AddMove(source, destination, type, instruction); - } -} - -void RegisterAllocator::AddInputMoveFor(HInstruction* input, - HInstruction* user, - Location source, - Location destination) const { - if (source.Equals(destination)) return; - - DCHECK(!user->IsPhi()); - - HInstruction* previous = user->GetPrevious(); - HParallelMove* move = nullptr; - if (previous == nullptr - || !previous->IsParallelMove() - || previous->GetLifetimePosition() < user->GetLifetimePosition()) { - move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(user->GetLifetimePosition()); - user->GetBlock()->InsertInstructionBefore(move, user); - } else { - move = previous->AsParallelMove(); - } - DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition()); - AddMove(move, source, destination, nullptr, input->GetType()); -} - -static bool IsInstructionStart(size_t position) { - return (position & 1) == 0; -} - -static bool IsInstructionEnd(size_t position) { - return (position & 1) == 1; -} - -void RegisterAllocator::InsertParallelMoveAt(size_t position, - HInstruction* instruction, - Location source, - Location destination) const { - DCHECK(IsValidDestination(destination)) << destination; - if (source.Equals(destination)) return; - - HInstruction* at = liveness_.GetInstructionFromPosition(position / 2); - HParallelMove* move; - if (at == nullptr) { - if (IsInstructionStart(position)) { - // Block boundary, don't do anything the connection of split siblings will handle it. - return; - } else { - // Move must happen before the first instruction of the block. - at = liveness_.GetInstructionFromPosition((position + 1) / 2); - // Note that parallel moves may have already been inserted, so we explicitly - // ask for the first instruction of the block: `GetInstructionFromPosition` does - // not contain the `HParallelMove` instructions. - at = at->GetBlock()->GetFirstInstruction(); - - if (at->GetLifetimePosition() < position) { - // We may insert moves for split siblings and phi spills at the beginning of the block. - // Since this is a different lifetime position, we need to go to the next instruction. - DCHECK(at->IsParallelMove()); - at = at->GetNext(); - } - - if (at->GetLifetimePosition() != position) { - DCHECK_GT(at->GetLifetimePosition(), position); - move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(position); - at->GetBlock()->InsertInstructionBefore(move, at); - } else { - DCHECK(at->IsParallelMove()); - move = at->AsParallelMove(); - } - } - } else if (IsInstructionEnd(position)) { - // Move must happen after the instruction. - DCHECK(!at->IsControlFlow()); - move = at->GetNext()->AsParallelMove(); - // This is a parallel move for connecting siblings in a same block. We need to - // differentiate it with moves for connecting blocks, and input moves. - if (move == nullptr || move->GetLifetimePosition() > position) { - move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(position); - at->GetBlock()->InsertInstructionBefore(move, at->GetNext()); - } - } else { - // Move must happen before the instruction. - HInstruction* previous = at->GetPrevious(); - if (previous == nullptr - || !previous->IsParallelMove() - || previous->GetLifetimePosition() != position) { - // If the previous is a parallel move, then its position must be lower - // than the given `position`: it was added just after the non-parallel - // move instruction that precedes `instruction`. - DCHECK(previous == nullptr - || !previous->IsParallelMove() - || previous->GetLifetimePosition() < position); - move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(position); - at->GetBlock()->InsertInstructionBefore(move, at); - } else { - move = previous->AsParallelMove(); - } - } - DCHECK_EQ(move->GetLifetimePosition(), position); - AddMove(move, source, destination, instruction, instruction->GetType()); -} - -void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, - HInstruction* instruction, - Location source, - Location destination) const { - DCHECK(IsValidDestination(destination)) << destination; - if (source.Equals(destination)) return; - - DCHECK_EQ(block->GetNormalSuccessors().size(), 1u); - HInstruction* last = block->GetLastInstruction(); - // We insert moves at exit for phi predecessors and connecting blocks. - // A block ending with an if or a packed switch cannot branch to a block - // with phis because we do not allow critical edges. It can also not connect - // a split interval between two blocks: the move has to happen in the successor. - DCHECK(!last->IsIf() && !last->IsPackedSwitch()); - HInstruction* previous = last->GetPrevious(); - HParallelMove* move; - // This is a parallel move for connecting blocks. We need to differentiate - // it with moves for connecting siblings in a same block, and output moves. - size_t position = last->GetLifetimePosition(); - if (previous == nullptr || !previous->IsParallelMove() - || previous->AsParallelMove()->GetLifetimePosition() != position) { - move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(position); - block->InsertInstructionBefore(move, last); - } else { - move = previous->AsParallelMove(); - } - AddMove(move, source, destination, instruction, instruction->GetType()); -} - -void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, - HInstruction* instruction, - Location source, - Location destination) const { - DCHECK(IsValidDestination(destination)) << destination; - if (source.Equals(destination)) return; - - HInstruction* first = block->GetFirstInstruction(); - HParallelMove* move = first->AsParallelMove(); - size_t position = block->GetLifetimeStart(); - // This is a parallel move for connecting blocks. We need to differentiate - // it with moves for connecting siblings in a same block, and input moves. - if (move == nullptr || move->GetLifetimePosition() != position) { - move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(position); - block->InsertInstructionBefore(move, first); - } - AddMove(move, source, destination, instruction, instruction->GetType()); -} - -void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, - Location source, - Location destination) const { - DCHECK(IsValidDestination(destination)) << destination; - if (source.Equals(destination)) return; - - if (instruction->IsPhi()) { - InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination); - return; - } - - size_t position = instruction->GetLifetimePosition() + 1; - HParallelMove* move = instruction->GetNext()->AsParallelMove(); - // This is a parallel move for moving the output of an instruction. We need - // to differentiate with input moves, moves for connecting siblings in a - // and moves for connecting blocks. - if (move == nullptr || move->GetLifetimePosition() != position) { - move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(position); - instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext()); - } - AddMove(move, source, destination, instruction, instruction->GetType()); -} - -void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { - LiveInterval* current = interval; - if (current->HasSpillSlot() - && current->HasRegister() - // Currently, we spill unconditionnally the current method in the code generators. - && !interval->GetDefinedBy()->IsCurrentMethod()) { - // We spill eagerly, so move must be at definition. - InsertMoveAfter(interval->GetDefinedBy(), - interval->ToLocation(), - interval->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) - : Location::StackSlot(interval->GetParent()->GetSpillSlot())); - } - UsePosition* use = current->GetFirstUse(); - UsePosition* env_use = current->GetFirstEnvironmentUse(); - - // Walk over all siblings, updating locations of use positions, and - // connecting them when they are adjacent. - do { - Location source = current->ToLocation(); - - // Walk over all uses covered by this interval, and update the location - // information. - - LiveRange* range = current->GetFirstRange(); - while (range != nullptr) { - while (use != nullptr && use->GetPosition() < range->GetStart()) { - DCHECK(use->IsSynthesized()); - use = use->GetNext(); - } - while (use != nullptr && use->GetPosition() <= range->GetEnd()) { - DCHECK(!use->GetIsEnvironment()); - DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); - if (!use->IsSynthesized()) { - LocationSummary* locations = use->GetUser()->GetLocations(); - Location expected_location = locations->InAt(use->GetInputIndex()); - // The expected (actual) location may be invalid in case the input is unused. Currently - // this only happens for intrinsics. - if (expected_location.IsValid()) { - if (expected_location.IsUnallocated()) { - locations->SetInAt(use->GetInputIndex(), source); - } else if (!expected_location.IsConstant()) { - AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); - } - } else { - DCHECK(use->GetUser()->IsInvoke()); - DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); - } - } - use = use->GetNext(); - } - - // Walk over the environment uses, and update their locations. - while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) { - env_use = env_use->GetNext(); - } - - while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) { - DCHECK(current->CoversSlow(env_use->GetPosition()) - || (env_use->GetPosition() == range->GetEnd())); - HEnvironment* environment = env_use->GetEnvironment(); - environment->SetLocationAt(env_use->GetInputIndex(), source); - env_use = env_use->GetNext(); - } - - range = range->GetNext(); - } - - // If the next interval starts just after this one, and has a register, - // insert a move. - LiveInterval* next_sibling = current->GetNextSibling(); - if (next_sibling != nullptr - && next_sibling->HasRegister() - && current->GetEnd() == next_sibling->GetStart()) { - Location destination = next_sibling->ToLocation(); - InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination); - } - - for (SafepointPosition* safepoint_position = current->GetFirstSafepoint(); - safepoint_position != nullptr; - safepoint_position = safepoint_position->GetNext()) { - DCHECK(current->CoversSlow(safepoint_position->GetPosition())); - - LocationSummary* locations = safepoint_position->GetLocations(); - if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { - DCHECK(interval->GetDefinedBy()->IsActualObject()) - << interval->GetDefinedBy()->DebugName() - << "@" << safepoint_position->GetInstruction()->DebugName(); - locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); - } - - switch (source.GetKind()) { - case Location::kRegister: { - locations->AddLiveRegister(source); - if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) { - DCHECK_LE(locations->GetNumberOfLiveRegisters(), - maximum_number_of_live_core_registers_ + - maximum_number_of_live_fp_registers_); - } - if (current->GetType() == Primitive::kPrimNot) { - DCHECK(interval->GetDefinedBy()->IsActualObject()) - << interval->GetDefinedBy()->DebugName() - << "@" << safepoint_position->GetInstruction()->DebugName(); - locations->SetRegisterBit(source.reg()); - } - break; - } - case Location::kFpuRegister: { - locations->AddLiveRegister(source); - break; - } - - case Location::kRegisterPair: - case Location::kFpuRegisterPair: { - locations->AddLiveRegister(source.ToLow()); - locations->AddLiveRegister(source.ToHigh()); - break; - } - case Location::kStackSlot: // Fall-through - case Location::kDoubleStackSlot: // Fall-through - case Location::kConstant: { - // Nothing to do. - break; - } - default: { - LOG(FATAL) << "Unexpected location for object"; - } - } - } - current = next_sibling; - } while (current != nullptr); - - if (kIsDebugBuild) { - // Following uses can only be synthesized uses. - while (use != nullptr) { - DCHECK(use->IsSynthesized()); - use = use->GetNext(); - } - } -} - -static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop( - HInstruction* instruction) { - return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() && - (instruction->IsConstant() || instruction->IsCurrentMethod()); -} - -void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, - HBasicBlock* from, - HBasicBlock* to) const { - if (interval->GetNextSibling() == nullptr) { - // Nothing to connect. The whole range was allocated to the same location. - return; - } - - // Find the intervals that cover `from` and `to`. - size_t destination_position = to->GetLifetimeStart(); - size_t source_position = from->GetLifetimeEnd() - 1; - LiveInterval* destination = interval->GetSiblingAt(destination_position); - LiveInterval* source = interval->GetSiblingAt(source_position); - - if (destination == source) { - // Interval was not split. - return; - } - - LiveInterval* parent = interval->GetParent(); - HInstruction* defined_by = parent->GetDefinedBy(); - if (codegen_->GetGraph()->HasIrreducibleLoops() && - (destination == nullptr || !destination->CoversSlow(destination_position))) { - // Our live_in fixed point calculation has found that the instruction is live - // in the `to` block because it will eventually enter an irreducible loop. Our - // live interval computation however does not compute a fixed point, and - // therefore will not have a location for that instruction for `to`. - // Because the instruction is a constant or the ArtMethod, we don't need to - // do anything: it will be materialized in the irreducible loop. - DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by)) - << defined_by->DebugName() << ":" << defined_by->GetId() - << " " << from->GetBlockId() << " -> " << to->GetBlockId(); - return; - } - - if (!destination->HasRegister()) { - // Values are eagerly spilled. Spill slot already contains appropriate value. - return; - } - - Location location_source; - // `GetSiblingAt` returns the interval whose start and end cover `position`, - // but does not check whether the interval is inactive at that position. - // The only situation where the interval is inactive at that position is in the - // presence of irreducible loops for constants and ArtMethod. - if (codegen_->GetGraph()->HasIrreducibleLoops() && - (source == nullptr || !source->CoversSlow(source_position))) { - DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by)); - if (defined_by->IsConstant()) { - location_source = defined_by->GetLocations()->Out(); - } else { - DCHECK(defined_by->IsCurrentMethod()); - location_source = parent->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(parent->GetSpillSlot()) - : Location::StackSlot(parent->GetSpillSlot()); - } - } else { - DCHECK(source != nullptr); - DCHECK(source->CoversSlow(source_position)); - DCHECK(destination->CoversSlow(destination_position)); - location_source = source->ToLocation(); - } - - // If `from` has only one successor, we can put the moves at the exit of it. Otherwise - // we need to put the moves at the entry of `to`. - if (from->GetNormalSuccessors().size() == 1) { - InsertParallelMoveAtExitOf(from, - defined_by, - location_source, - destination->ToLocation()); - } else { - DCHECK_EQ(to->GetPredecessors().size(), 1u); - InsertParallelMoveAtEntryOf(to, - defined_by, - location_source, - destination->ToLocation()); - } -} - -void RegisterAllocator::Resolve() { - codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(), - maximum_number_of_live_core_registers_, - maximum_number_of_live_fp_registers_, - reserved_out_slots_, - codegen_->GetGraph()->GetLinearOrder()); - - // Adjust the Out Location of instructions. - // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. - for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { - HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); - LiveInterval* current = instruction->GetLiveInterval(); - LocationSummary* locations = instruction->GetLocations(); - Location location = locations->Out(); - if (instruction->IsParameterValue()) { - // Now that we know the frame size, adjust the parameter's location. - if (location.IsStackSlot()) { - location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); - current->SetSpillSlot(location.GetStackIndex()); - locations->UpdateOut(location); - } else if (location.IsDoubleStackSlot()) { - location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); - current->SetSpillSlot(location.GetStackIndex()); - locations->UpdateOut(location); - } else if (current->HasSpillSlot()) { - current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize()); - } - } else if (instruction->IsCurrentMethod()) { - // The current method is always at offset 0. - DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0)); - } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) { - DCHECK(current->HasSpillSlot()); - size_t slot = current->GetSpillSlot() - + GetNumberOfSpillSlots() - + reserved_out_slots_ - - catch_phi_spill_slots_; - current->SetSpillSlot(slot * kVRegSize); - } else if (current->HasSpillSlot()) { - // Adjust the stack slot, now that we know the number of them for each type. - // The way this implementation lays out the stack is the following: - // [parameter slots ] - // [catch phi spill slots ] - // [double spill slots ] - // [long spill slots ] - // [float spill slots ] - // [int/ref values ] - // [maximum out values ] (number of arguments for calls) - // [art method ]. - size_t slot = current->GetSpillSlot(); - switch (current->GetType()) { - case Primitive::kPrimDouble: - slot += long_spill_slots_.size(); - FALLTHROUGH_INTENDED; - case Primitive::kPrimLong: - slot += float_spill_slots_.size(); - FALLTHROUGH_INTENDED; - case Primitive::kPrimFloat: - slot += int_spill_slots_.size(); - FALLTHROUGH_INTENDED; - case Primitive::kPrimNot: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimShort: - slot += reserved_out_slots_; - break; - case Primitive::kPrimVoid: - LOG(FATAL) << "Unexpected type for interval " << current->GetType(); - } - current->SetSpillSlot(slot * kVRegSize); - } - - Location source = current->ToLocation(); - - if (location.IsUnallocated()) { - if (location.GetPolicy() == Location::kSameAsFirstInput) { - if (locations->InAt(0).IsUnallocated()) { - locations->SetInAt(0, source); - } else { - DCHECK(locations->InAt(0).Equals(source)); - } - } - locations->UpdateOut(source); - } else { - DCHECK(source.Equals(location)); - } - } - - // Connect siblings. - for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { - HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); - ConnectSiblings(instruction->GetLiveInterval()); - } - - // Resolve non-linear control flow across branches. Order does not matter. - for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - if (block->IsCatchBlock() || - (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { - // Instructions live at the top of catch blocks or irreducible loop header - // were forced to spill. - if (kIsDebugBuild) { - BitVector* live = liveness_.GetLiveInSet(*block); - for (uint32_t idx : live->Indexes()) { - LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval(); - LiveInterval* sibling = interval->GetSiblingAt(block->GetLifetimeStart()); - // `GetSiblingAt` returns the sibling that contains a position, but there could be - // a lifetime hole in it. `CoversSlow` returns whether the interval is live at that - // position. - if ((sibling != nullptr) && sibling->CoversSlow(block->GetLifetimeStart())) { - DCHECK(!sibling->HasRegister()); - } - } - } - } else { - BitVector* live = liveness_.GetLiveInSet(*block); - for (uint32_t idx : live->Indexes()) { - LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval(); - for (HBasicBlock* predecessor : block->GetPredecessors()) { - ConnectSplitSiblings(interval, predecessor, block); - } - } - } - } - - // Resolve phi inputs. Order does not matter. - for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); - if (current->IsCatchBlock()) { - // Catch phi values are set at runtime by the exception delivery mechanism. - } else { - for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) { - HInstruction* phi = inst_it.Current(); - for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) { - HBasicBlock* predecessor = current->GetPredecessors()[i]; - DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u); - HInstruction* input = phi->InputAt(i); - Location source = input->GetLiveInterval()->GetLocationAt( - predecessor->GetLifetimeEnd() - 1); - Location destination = phi->GetLiveInterval()->ToLocation(); - InsertParallelMoveAtExitOf(predecessor, phi, source, destination); - } - } - } - } - - // Assign temp locations. - for (LiveInterval* temp : temp_intervals_) { - if (temp->IsHighInterval()) { - // High intervals can be skipped, they are already handled by the low interval. - continue; - } - HInstruction* at = liveness_.GetTempUser(temp); - size_t temp_index = liveness_.GetTempIndex(temp); - LocationSummary* locations = at->GetLocations(); - switch (temp->GetType()) { - case Primitive::kPrimInt: - locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister())); - break; - - case Primitive::kPrimDouble: - if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { - Location location = Location::FpuRegisterPairLocation( - temp->GetRegister(), temp->GetHighInterval()->GetRegister()); - locations->SetTempAt(temp_index, location); - } else { - locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister())); - } - break; - - default: - LOG(FATAL) << "Unexpected type for temporary location " - << temp->GetType(); - } - } -} - } // namespace art diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 58600b789b..729eede66e 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 The Android Open Source Project + * Copyright (C) 2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include "arch/instruction_set.h" #include "base/arena_containers.h" +#include "base/arena_object.h" #include "base/macros.h" #include "primitive.h" @@ -29,36 +30,40 @@ class HBasicBlock; class HGraph; class HInstruction; class HParallelMove; -class HPhi; class LiveInterval; class Location; class SsaLivenessAnalysis; /** - * An implementation of a linear scan register allocator on an `HGraph` with SSA form. + * Base class for any register allocator. */ -class RegisterAllocator { +class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> { public: - RegisterAllocator(ArenaAllocator* allocator, - CodeGenerator* codegen, - const SsaLivenessAnalysis& analysis); + enum Strategy { + kRegisterAllocatorLinearScan + }; + + static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan; + + static RegisterAllocator* Create(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& analysis, + Strategy strategy = kRegisterAllocatorDefault); + + virtual ~RegisterAllocator() = default; // Main entry point for the register allocator. Given the liveness analysis, // allocates registers to live intervals. - void AllocateRegisters(); + virtual void AllocateRegisters() = 0; // Validate that the register allocator did not allocate the same register to - // intervals that intersect each other. Returns false if it did not. - bool Validate(bool log_fatal_on_failure) { - processing_core_registers_ = true; - if (!ValidateInternal(log_fatal_on_failure)) { - return false; - } - processing_core_registers_ = false; - return ValidateInternal(log_fatal_on_failure); - } - - // Helper method for validation. Used by unit testing. + // intervals that intersect each other. Returns false if it failed. + virtual bool Validate(bool log_fatal_on_failure) = 0; + + static bool CanAllocateRegistersFor(const HGraph& graph, + InstructionSet instruction_set); + + // Verifies that live intervals do not conflict. Used by unit testing. static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals, size_t number_of_spill_slots, size_t number_of_out_slots, @@ -67,178 +72,25 @@ class RegisterAllocator { bool processing_core_registers, bool log_fatal_on_failure); - static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set); - - size_t GetNumberOfSpillSlots() const { - return int_spill_slots_.size() - + long_spill_slots_.size() - + float_spill_slots_.size() - + double_spill_slots_.size() - + catch_phi_spill_slots_; - } - static constexpr const char* kRegisterAllocatorPassName = "register"; - private: - // Main methods of the allocator. - void LinearScan(); - bool TryAllocateFreeReg(LiveInterval* interval); - bool AllocateBlockedReg(LiveInterval* interval); - void Resolve(); - - // Add `interval` in the given sorted list. - static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval); + protected: + RegisterAllocator(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& analysis); // Split `interval` at the position `position`. The new interval starts at `position`. - LiveInterval* Split(LiveInterval* interval, size_t position); + // If `position` is at the start of `interval`, returns `interval` with its + // register location(s) cleared. + static LiveInterval* Split(LiveInterval* interval, size_t position); // Split `interval` at a position between `from` and `to`. The method will try // to find an optimal split position. LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to); - // Returns whether `reg` is blocked by the code generator. - bool IsBlocked(int reg) const; - - // Update the interval for the register in `location` to cover [start, end). - void BlockRegister(Location location, size_t start, size_t end); - void BlockRegisters(size_t start, size_t end, bool caller_save_only = false); - - // Allocate a spill slot for the given interval. Should be called in linear - // order of interval starting positions. - void AllocateSpillSlotFor(LiveInterval* interval); - - // Allocate a spill slot for the given catch phi. Will allocate the same slot - // for phis which share the same vreg. Must be called in reverse linear order - // of lifetime positions and ascending vreg numbers for correctness. - void AllocateSpillSlotForCatchPhi(HPhi* phi); - - // Connect adjacent siblings within blocks. - void ConnectSiblings(LiveInterval* interval); - - // Connect siblings between block entries and exits. - void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const; - - // Helper methods to insert parallel moves in the graph. - void InsertParallelMoveAtExitOf(HBasicBlock* block, - HInstruction* instruction, - Location source, - Location destination) const; - void InsertParallelMoveAtEntryOf(HBasicBlock* block, - HInstruction* instruction, - Location source, - Location destination) const; - void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const; - void AddInputMoveFor(HInstruction* input, - HInstruction* user, - Location source, - Location destination) const; - void InsertParallelMoveAt(size_t position, - HInstruction* instruction, - Location source, - Location destination) const; - - void AddMove(HParallelMove* move, - Location source, - Location destination, - HInstruction* instruction, - Primitive::Type type) const; - - // Helper methods. - void AllocateRegistersInternal(); - void ProcessInstruction(HInstruction* instruction); - bool ValidateInternal(bool log_fatal_on_failure) const; - void DumpInterval(std::ostream& stream, LiveInterval* interval) const; - void DumpAllIntervals(std::ostream& stream) const; - int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const; - int FindAvailableRegister(size_t* next_use, LiveInterval* current) const; - bool IsCallerSaveRegister(int reg) const; - - // Try splitting an active non-pair or unaligned pair interval at the given `position`. - // Returns whether it was successful at finding such an interval. - bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, - size_t first_register_use, - size_t* next_use); - ArenaAllocator* const allocator_; CodeGenerator* const codegen_; const SsaLivenessAnalysis& liveness_; - - // List of intervals for core registers that must be processed, ordered by start - // position. Last entry is the interval that has the lowest start position. - // This list is initially populated before doing the linear scan. - ArenaVector<LiveInterval*> unhandled_core_intervals_; - - // List of intervals for floating-point registers. Same comments as above. - ArenaVector<LiveInterval*> unhandled_fp_intervals_; - - // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_` - // or `unhandled_fp_intervals_`. - ArenaVector<LiveInterval*>* unhandled_; - - // List of intervals that have been processed. - ArenaVector<LiveInterval*> handled_; - - // List of intervals that are currently active when processing a new live interval. - // That is, they have a live range that spans the start of the new interval. - ArenaVector<LiveInterval*> active_; - - // List of intervals that are currently inactive when processing a new live interval. - // That is, they have a lifetime hole that spans the start of the new interval. - ArenaVector<LiveInterval*> inactive_; - - // Fixed intervals for physical registers. Such intervals cover the positions - // where an instruction requires a specific register. - ArenaVector<LiveInterval*> physical_core_register_intervals_; - ArenaVector<LiveInterval*> physical_fp_register_intervals_; - - // Intervals for temporaries. Such intervals cover the positions - // where an instruction requires a temporary. - ArenaVector<LiveInterval*> temp_intervals_; - - // The spill slots allocated for live intervals. We ensure spill slots - // are typed to avoid (1) doing moves and swaps between two different kinds - // of registers, and (2) swapping between a single stack slot and a double - // stack slot. This simplifies the parallel move resolver. - ArenaVector<size_t> int_spill_slots_; - ArenaVector<size_t> long_spill_slots_; - ArenaVector<size_t> float_spill_slots_; - ArenaVector<size_t> double_spill_slots_; - - // Spill slots allocated to catch phis. This category is special-cased because - // (1) slots are allocated prior to linear scan and in reverse linear order, - // (2) equivalent phis need to share slots despite having different types. - size_t catch_phi_spill_slots_; - - // Instructions that need a safepoint. - ArenaVector<HInstruction*> safepoints_; - - // True if processing core registers. False if processing floating - // point registers. - bool processing_core_registers_; - - // Number of registers for the current register kind (core or floating point). - size_t number_of_registers_; - - // Temporary array, allocated ahead of time for simplicity. - size_t* registers_array_; - - // Blocked registers, as decided by the code generator. - bool* const blocked_core_registers_; - bool* const blocked_fp_registers_; - - // Slots reserved for out arguments. - size_t reserved_out_slots_; - - // The maximum live core registers at safepoints. - size_t maximum_number_of_live_core_registers_; - - // The maximum live FP registers at safepoints. - size_t maximum_number_of_live_fp_registers_; - - ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil); - ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); - - DISALLOW_COPY_AND_ASSIGN(RegisterAllocator); }; } // namespace art diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc new file mode 100644 index 0000000000..a9151ba3c9 --- /dev/null +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -0,0 +1,1224 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "register_allocator_linear_scan.h" + +#include <iostream> +#include <sstream> + +#include "base/bit_vector-inl.h" +#include "code_generator.h" +#include "register_allocation_resolver.h" +#include "ssa_liveness_analysis.h" + +namespace art { + +static constexpr size_t kMaxLifetimePosition = -1; +static constexpr size_t kDefaultNumberOfSpillSlots = 4; + +// For simplicity, we implement register pairs as (reg, reg + 1). +// Note that this is a requirement for double registers on ARM, since we +// allocate SRegister. +static int GetHighForLowRegister(int reg) { return reg + 1; } +static bool IsLowRegister(int reg) { return (reg & 1) == 0; } +static bool IsLowOfUnalignedPairInterval(LiveInterval* low) { + return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister(); +} + +RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& liveness) + : RegisterAllocator(allocator, codegen, liveness), + unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + unhandled_(nullptr), + handled_(allocator->Adapter(kArenaAllocRegisterAllocator)), + active_(allocator->Adapter(kArenaAllocRegisterAllocator)), + inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), + long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), + float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), + double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), + catch_phi_spill_slots_(0), + safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), + processing_core_registers_(false), + number_of_registers_(-1), + registers_array_(nullptr), + blocked_core_registers_(codegen->GetBlockedCoreRegisters()), + blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()), + reserved_out_slots_(0), + maximum_number_of_live_core_registers_(0), + maximum_number_of_live_fp_registers_(0) { + temp_intervals_.reserve(4); + int_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + long_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + float_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + double_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + + codegen->SetupBlockedRegisters(); + physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr); + physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr); + // Always reserve for the current method and the graph's max out registers. + // TODO: compute it instead. + // ArtMethod* takes 2 vregs for 64 bits. + reserved_out_slots_ = InstructionSetPointerSize(codegen->GetInstructionSet()) / kVRegSize + + codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); +} + +static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) { + if (interval == nullptr) return false; + bool is_core_register = (interval->GetType() != Primitive::kPrimDouble) + && (interval->GetType() != Primitive::kPrimFloat); + return processing_core_registers == is_core_register; +} + +void RegisterAllocatorLinearScan::AllocateRegisters() { + AllocateRegistersInternal(); + RegisterAllocationResolver(allocator_, codegen_, liveness_) + .Resolve(maximum_number_of_live_core_registers_, + maximum_number_of_live_fp_registers_, + reserved_out_slots_, + int_spill_slots_.size(), + long_spill_slots_.size(), + float_spill_slots_.size(), + double_spill_slots_.size(), + catch_phi_spill_slots_, + temp_intervals_); + + if (kIsDebugBuild) { + processing_core_registers_ = true; + ValidateInternal(true); + processing_core_registers_ = false; + ValidateInternal(true); + // Check that the linear order is still correct with regards to lifetime positions. + // Since only parallel moves have been inserted during the register allocation, + // these checks are mostly for making sure these moves have been added correctly. + size_t current_liveness = 0; + for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + HInstruction* instruction = inst_it.Current(); + DCHECK_LE(current_liveness, instruction->GetLifetimePosition()); + current_liveness = instruction->GetLifetimePosition(); + } + for (HInstructionIterator inst_it(block->GetInstructions()); + !inst_it.Done(); + inst_it.Advance()) { + HInstruction* instruction = inst_it.Current(); + DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName(); + current_liveness = instruction->GetLifetimePosition(); + } + } + } +} + +void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start, size_t end) { + int reg = location.reg(); + DCHECK(location.IsRegister() || location.IsFpuRegister()); + LiveInterval* interval = location.IsRegister() + ? physical_core_register_intervals_[reg] + : physical_fp_register_intervals_[reg]; + Primitive::Type type = location.IsRegister() + ? Primitive::kPrimInt + : Primitive::kPrimFloat; + if (interval == nullptr) { + interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); + if (location.IsRegister()) { + physical_core_register_intervals_[reg] = interval; + } else { + physical_fp_register_intervals_[reg] = interval; + } + } + DCHECK(interval->GetRegister() == reg); + interval->AddRange(start, end); +} + +void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool caller_save_only) { + for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { + if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) { + BlockRegister(Location::RegisterLocation(i), start, end); + } + } + for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { + if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) { + BlockRegister(Location::FpuRegisterLocation(i), start, end); + } + } +} + +void RegisterAllocatorLinearScan::AllocateRegistersInternal() { + // Iterate post-order, to ensure the list is sorted, and the last added interval + // is the one with the lowest start position. + for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done(); + back_it.Advance()) { + ProcessInstruction(back_it.Current()); + } + for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + ProcessInstruction(inst_it.Current()); + } + + if (block->IsCatchBlock() || + (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { + // By blocking all registers at the top of each catch block or irreducible loop, we force + // intervals belonging to the live-in set of the catch/header block to be spilled. + // TODO(ngeoffray): Phis in this block could be allocated in register. + size_t position = block->GetLifetimeStart(); + BlockRegisters(position, position + 1); + } + } + + number_of_registers_ = codegen_->GetNumberOfCoreRegisters(); + registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_, + kArenaAllocRegisterAllocator); + processing_core_registers_ = true; + unhandled_ = &unhandled_core_intervals_; + for (LiveInterval* fixed : physical_core_register_intervals_) { + if (fixed != nullptr) { + // Fixed interval is added to inactive_ instead of unhandled_. + // It's also the only type of inactive interval whose start position + // can be after the current interval during linear scan. + // Fixed interval is never split and never moves to unhandled_. + inactive_.push_back(fixed); + } + } + LinearScan(); + + inactive_.clear(); + active_.clear(); + handled_.clear(); + + number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters(); + registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_, + kArenaAllocRegisterAllocator); + processing_core_registers_ = false; + unhandled_ = &unhandled_fp_intervals_; + for (LiveInterval* fixed : physical_fp_register_intervals_) { + if (fixed != nullptr) { + // Fixed interval is added to inactive_ instead of unhandled_. + // It's also the only type of inactive interval whose start position + // can be after the current interval during linear scan. + // Fixed interval is never split and never moves to unhandled_. + inactive_.push_back(fixed); + } + } + LinearScan(); +} + +void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) { + LocationSummary* locations = instruction->GetLocations(); + size_t position = instruction->GetLifetimePosition(); + + if (locations == nullptr) return; + + // Create synthesized intervals for temporaries. + for (size_t i = 0; i < locations->GetTempCount(); ++i) { + Location temp = locations->GetTemp(i); + if (temp.IsRegister() || temp.IsFpuRegister()) { + BlockRegister(temp, position, position + 1); + // Ensure that an explicit temporary register is marked as being allocated. + codegen_->AddAllocatedRegister(temp); + } else { + DCHECK(temp.IsUnallocated()); + switch (temp.GetPolicy()) { + case Location::kRequiresRegister: { + LiveInterval* interval = + LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); + temp_intervals_.push_back(interval); + interval->AddTempUse(instruction, i); + unhandled_core_intervals_.push_back(interval); + break; + } + + case Location::kRequiresFpuRegister: { + LiveInterval* interval = + LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); + temp_intervals_.push_back(interval); + interval->AddTempUse(instruction, i); + if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { + interval->AddHighInterval(/* is_temp */ true); + LiveInterval* high = interval->GetHighInterval(); + temp_intervals_.push_back(high); + unhandled_fp_intervals_.push_back(high); + } + unhandled_fp_intervals_.push_back(interval); + break; + } + + default: + LOG(FATAL) << "Unexpected policy for temporary location " + << temp.GetPolicy(); + } + } + } + + bool core_register = (instruction->GetType() != Primitive::kPrimDouble) + && (instruction->GetType() != Primitive::kPrimFloat); + + if (locations->NeedsSafepoint()) { + if (codegen_->IsLeafMethod()) { + // TODO: We do this here because we do not want the suspend check to artificially + // create live registers. We should find another place, but this is currently the + // simplest. + DCHECK(instruction->IsSuspendCheckEntry()); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + safepoints_.push_back(instruction); + if (locations->OnlyCallsOnSlowPath()) { + // We add a synthesized range at this position to record the live registers + // at this position. Ideally, we could just update the safepoints when locations + // are updated, but we currently need to know the full stack size before updating + // locations (because of parameters and the fact that we don't have a frame pointer). + // And knowing the full stack size requires to know the maximum number of live + // registers at calls in slow paths. + // By adding the following interval in the algorithm, we can compute this + // maximum before updating locations. + LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); + interval->AddRange(position, position + 1); + AddSorted(&unhandled_core_intervals_, interval); + AddSorted(&unhandled_fp_intervals_, interval); + } + } + + if (locations->WillCall()) { + BlockRegisters(position, position + 1, /* caller_save_only */ true); + } + + for (size_t i = 0; i < locations->GetInputCount(); ++i) { + Location input = locations->InAt(i); + if (input.IsRegister() || input.IsFpuRegister()) { + BlockRegister(input, position, position + 1); + } else if (input.IsPair()) { + BlockRegister(input.ToLow(), position, position + 1); + BlockRegister(input.ToHigh(), position, position + 1); + } + } + + LiveInterval* current = instruction->GetLiveInterval(); + if (current == nullptr) return; + + ArenaVector<LiveInterval*>& unhandled = core_register + ? unhandled_core_intervals_ + : unhandled_fp_intervals_; + + DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back())); + + if (codegen_->NeedsTwoRegisters(current->GetType())) { + current->AddHighInterval(); + } + + for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) { + HInstruction* safepoint = safepoints_[safepoint_index - 1u]; + size_t safepoint_position = safepoint->GetLifetimePosition(); + + // Test that safepoints are ordered in the optimal way. + DCHECK(safepoint_index == safepoints_.size() || + safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position); + + if (safepoint_position == current->GetStart()) { + // The safepoint is for this instruction, so the location of the instruction + // does not need to be saved. + DCHECK_EQ(safepoint_index, safepoints_.size()); + DCHECK_EQ(safepoint, instruction); + continue; + } else if (current->IsDeadAt(safepoint_position)) { + break; + } else if (!current->Covers(safepoint_position)) { + // Hole in the interval. + continue; + } + current->AddSafepoint(safepoint); + } + current->ResetSearchCache(); + + // Some instructions define their output in fixed register/stack slot. We need + // to ensure we know these locations before doing register allocation. For a + // given register, we create an interval that covers these locations. The register + // will be unavailable at these locations when trying to allocate one for an + // interval. + // + // The backwards walking ensures the ranges are ordered on increasing start positions. + Location output = locations->Out(); + if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) { + Location first = locations->InAt(0); + if (first.IsRegister() || first.IsFpuRegister()) { + current->SetFrom(position + 1); + current->SetRegister(first.reg()); + } else if (first.IsPair()) { + current->SetFrom(position + 1); + current->SetRegister(first.low()); + LiveInterval* high = current->GetHighInterval(); + high->SetRegister(first.high()); + high->SetFrom(position + 1); + } + } else if (output.IsRegister() || output.IsFpuRegister()) { + // Shift the interval's start by one to account for the blocked register. + current->SetFrom(position + 1); + current->SetRegister(output.reg()); + BlockRegister(output, position, position + 1); + } else if (output.IsPair()) { + current->SetFrom(position + 1); + current->SetRegister(output.low()); + LiveInterval* high = current->GetHighInterval(); + high->SetRegister(output.high()); + high->SetFrom(position + 1); + BlockRegister(output.ToLow(), position, position + 1); + BlockRegister(output.ToHigh(), position, position + 1); + } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) { + current->SetSpillSlot(output.GetStackIndex()); + } else { + DCHECK(output.IsUnallocated() || output.IsConstant()); + } + + if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) { + AllocateSpillSlotForCatchPhi(instruction->AsPhi()); + } + + // If needed, add interval to the list of unhandled intervals. + if (current->HasSpillSlot() || instruction->IsConstant()) { + // Split just before first register use. + size_t first_register_use = current->FirstRegisterUse(); + if (first_register_use != kNoLifetime) { + LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1); + // Don't add directly to `unhandled`, it needs to be sorted and the start + // of this new interval might be after intervals already in the list. + AddSorted(&unhandled, split); + } else { + // Nothing to do, we won't allocate a register for this value. + } + } else { + // Don't add directly to `unhandled`, temp or safepoint intervals + // for this instruction may have been added, and those can be + // processed first. + AddSorted(&unhandled, current); + } +} + +class AllRangesIterator : public ValueObject { + public: + explicit AllRangesIterator(LiveInterval* interval) + : current_interval_(interval), + current_range_(interval->GetFirstRange()) {} + + bool Done() const { return current_interval_ == nullptr; } + LiveRange* CurrentRange() const { return current_range_; } + LiveInterval* CurrentInterval() const { return current_interval_; } + + void Advance() { + current_range_ = current_range_->GetNext(); + if (current_range_ == nullptr) { + current_interval_ = current_interval_->GetNextSibling(); + if (current_interval_ != nullptr) { + current_range_ = current_interval_->GetFirstRange(); + } + } + } + + private: + LiveInterval* current_interval_; + LiveRange* current_range_; + + DISALLOW_COPY_AND_ASSIGN(AllRangesIterator); +}; + +bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) const { + // To simplify unit testing, we eagerly create the array of intervals, and + // call the helper method. + ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate)); + for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) { + intervals.push_back(instruction->GetLiveInterval()); + } + } + + const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_ + ? &physical_core_register_intervals_ + : &physical_fp_register_intervals_; + for (LiveInterval* fixed : *physical_register_intervals) { + if (fixed != nullptr) { + intervals.push_back(fixed); + } + } + + for (LiveInterval* temp : temp_intervals_) { + if (ShouldProcess(processing_core_registers_, temp)) { + intervals.push_back(temp); + } + } + + return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_, + allocator_, processing_core_registers_, log_fatal_on_failure); +} + +void RegisterAllocatorLinearScan::DumpInterval(std::ostream& stream, LiveInterval* interval) const { + interval->Dump(stream); + stream << ": "; + if (interval->HasRegister()) { + if (interval->IsFloatingPoint()) { + codegen_->DumpFloatingPointRegister(stream, interval->GetRegister()); + } else { + codegen_->DumpCoreRegister(stream, interval->GetRegister()); + } + } else { + stream << "spilled"; + } + stream << std::endl; +} + +void RegisterAllocatorLinearScan::DumpAllIntervals(std::ostream& stream) const { + stream << "inactive: " << std::endl; + for (LiveInterval* inactive_interval : inactive_) { + DumpInterval(stream, inactive_interval); + } + stream << "active: " << std::endl; + for (LiveInterval* active_interval : active_) { + DumpInterval(stream, active_interval); + } + stream << "unhandled: " << std::endl; + auto unhandled = (unhandled_ != nullptr) ? + unhandled_ : &unhandled_core_intervals_; + for (LiveInterval* unhandled_interval : *unhandled) { + DumpInterval(stream, unhandled_interval); + } + stream << "handled: " << std::endl; + for (LiveInterval* handled_interval : handled_) { + DumpInterval(stream, handled_interval); + } +} + +// By the book implementation of a linear scan register allocator. +void RegisterAllocatorLinearScan::LinearScan() { + while (!unhandled_->empty()) { + // (1) Remove interval with the lowest start position from unhandled. + LiveInterval* current = unhandled_->back(); + unhandled_->pop_back(); + + // Make sure the interval is an expected state. + DCHECK(!current->IsFixed() && !current->HasSpillSlot()); + // Make sure we are going in the right order. + DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart()); + // Make sure a low interval is always with a high. + DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval()); + // Make sure a high interval is always with a low. + DCHECK(current->IsLowInterval() || + unhandled_->empty() || + !unhandled_->back()->IsHighInterval()); + + size_t position = current->GetStart(); + + // Remember the inactive_ size here since the ones moved to inactive_ from + // active_ below shouldn't need to be re-checked. + size_t inactive_intervals_to_handle = inactive_.size(); + + // (2) Remove currently active intervals that are dead at this position. + // Move active intervals that have a lifetime hole at this position + // to inactive. + auto active_kept_end = std::remove_if( + active_.begin(), + active_.end(), + [this, position](LiveInterval* interval) { + if (interval->IsDeadAt(position)) { + handled_.push_back(interval); + return true; + } else if (!interval->Covers(position)) { + inactive_.push_back(interval); + return true; + } else { + return false; // Keep this interval. + } + }); + active_.erase(active_kept_end, active_.end()); + + // (3) Remove currently inactive intervals that are dead at this position. + // Move inactive intervals that cover this position to active. + auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle; + auto inactive_kept_end = std::remove_if( + inactive_.begin(), + inactive_to_handle_end, + [this, position](LiveInterval* interval) { + DCHECK(interval->GetStart() < position || interval->IsFixed()); + if (interval->IsDeadAt(position)) { + handled_.push_back(interval); + return true; + } else if (interval->Covers(position)) { + active_.push_back(interval); + return true; + } else { + return false; // Keep this interval. + } + }); + inactive_.erase(inactive_kept_end, inactive_to_handle_end); + + if (current->IsSlowPathSafepoint()) { + // Synthesized interval to record the maximum number of live registers + // at safepoints. No need to allocate a register for it. + if (processing_core_registers_) { + maximum_number_of_live_core_registers_ = + std::max(maximum_number_of_live_core_registers_, active_.size()); + } else { + maximum_number_of_live_fp_registers_ = + std::max(maximum_number_of_live_fp_registers_, active_.size()); + } + DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart()); + continue; + } + + if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) { + DCHECK(!current->HasRegister()); + // Allocating the low part was unsucessful. The splitted interval for the high part + // will be handled next (it is in the `unhandled_` list). + continue; + } + + // (4) Try to find an available register. + bool success = TryAllocateFreeReg(current); + + // (5) If no register could be found, we need to spill. + if (!success) { + success = AllocateBlockedReg(current); + } + + // (6) If the interval had a register allocated, add it to the list of active + // intervals. + if (success) { + codegen_->AddAllocatedRegister(processing_core_registers_ + ? Location::RegisterLocation(current->GetRegister()) + : Location::FpuRegisterLocation(current->GetRegister())); + active_.push_back(current); + if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) { + current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister())); + } + } + } +} + +static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) { + DCHECK(!interval->IsHighInterval()); + // Note that the same instruction may occur multiple times in the input list, + // so `free_until` may have changed already. + // Since `position` is not the current scan position, we need to use CoversSlow. + if (interval->IsDeadAt(position)) { + // Set the register to be free. Note that inactive intervals might later + // update this. + free_until[interval->GetRegister()] = kMaxLifetimePosition; + if (interval->HasHighInterval()) { + DCHECK(interval->GetHighInterval()->IsDeadAt(position)); + free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition; + } + } else if (!interval->CoversSlow(position)) { + // The interval becomes inactive at `defined_by`. We make its register + // available only until the next use strictly after `defined_by`. + free_until[interval->GetRegister()] = interval->FirstUseAfter(position); + if (interval->HasHighInterval()) { + DCHECK(!interval->GetHighInterval()->CoversSlow(position)); + free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()]; + } + } +} + +// Find a free register. If multiple are found, pick the register that +// is free the longest. +bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) { + size_t* free_until = registers_array_; + + // First set all registers to be free. + for (size_t i = 0; i < number_of_registers_; ++i) { + free_until[i] = kMaxLifetimePosition; + } + + // For each active interval, set its register to not free. + for (LiveInterval* interval : active_) { + DCHECK(interval->HasRegister()); + free_until[interval->GetRegister()] = 0; + } + + // An interval that starts an instruction (that is, it is not split), may + // re-use the registers used by the inputs of that instruciton, based on the + // location summary. + HInstruction* defined_by = current->GetDefinedBy(); + if (defined_by != nullptr && !current->IsSplit()) { + LocationSummary* locations = defined_by->GetLocations(); + if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) { + HInputsRef inputs = defined_by->GetInputs(); + for (size_t i = 0; i < inputs.size(); ++i) { + // Take the last interval of the input. It is the location of that interval + // that will be used at `defined_by`. + LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling(); + // Note that interval may have not been processed yet. + // TODO: Handle non-split intervals last in the work list. + if (locations->InAt(i).IsValid() + && interval->HasRegister() + && interval->SameRegisterKind(*current)) { + // The input must be live until the end of `defined_by`, to comply to + // the linear scan algorithm. So we use `defined_by`'s end lifetime + // position to check whether the input is dead or is inactive after + // `defined_by`. + DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition())); + size_t position = defined_by->GetLifetimePosition() + 1; + FreeIfNotCoverAt(interval, position, free_until); + } + } + } + } + + // For each inactive interval, set its register to be free until + // the next intersection with `current`. + for (LiveInterval* inactive : inactive_) { + // Temp/Slow-path-safepoint interval has no holes. + DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); + if (!current->IsSplit() && !inactive->IsFixed()) { + // Neither current nor inactive are fixed. + // Thanks to SSA, a non-split interval starting in a hole of an + // inactive interval should never intersect with that inactive interval. + // Only if it's not fixed though, because fixed intervals don't come from SSA. + DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); + continue; + } + + DCHECK(inactive->HasRegister()); + if (free_until[inactive->GetRegister()] == 0) { + // Already used by some active interval. No need to intersect. + continue; + } + size_t next_intersection = inactive->FirstIntersectionWith(current); + if (next_intersection != kNoLifetime) { + free_until[inactive->GetRegister()] = + std::min(free_until[inactive->GetRegister()], next_intersection); + } + } + + int reg = kNoRegister; + if (current->HasRegister()) { + // Some instructions have a fixed register output. + reg = current->GetRegister(); + if (free_until[reg] == 0) { + DCHECK(current->IsHighInterval()); + // AllocateBlockedReg will spill the holder of the register. + return false; + } + } else { + DCHECK(!current->IsHighInterval()); + int hint = current->FindFirstRegisterHint(free_until, liveness_); + if ((hint != kNoRegister) + // For simplicity, if the hint we are getting for a pair cannot be used, + // we are just going to allocate a new pair. + && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) { + DCHECK(!IsBlocked(hint)); + reg = hint; + } else if (current->IsLowInterval()) { + reg = FindAvailableRegisterPair(free_until, current->GetStart()); + } else { + reg = FindAvailableRegister(free_until, current); + } + } + + DCHECK_NE(reg, kNoRegister); + // If we could not find a register, we need to spill. + if (free_until[reg] == 0) { + return false; + } + + if (current->IsLowInterval()) { + // If the high register of this interval is not available, we need to spill. + int high_reg = current->GetHighInterval()->GetRegister(); + if (high_reg == kNoRegister) { + high_reg = GetHighForLowRegister(reg); + } + if (free_until[high_reg] == 0) { + return false; + } + } + + current->SetRegister(reg); + if (!current->IsDeadAt(free_until[reg])) { + // If the register is only available for a subset of live ranges + // covered by `current`, split `current` before the position where + // the register is not available anymore. + LiveInterval* split = SplitBetween(current, current->GetStart(), free_until[reg]); + DCHECK(split != nullptr); + AddSorted(unhandled_, split); + } + return true; +} + +bool RegisterAllocatorLinearScan::IsBlocked(int reg) const { + return processing_core_registers_ + ? blocked_core_registers_[reg] + : blocked_fp_registers_[reg]; +} + +int RegisterAllocatorLinearScan::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const { + int reg = kNoRegister; + // Pick the register pair that is used the last. + for (size_t i = 0; i < number_of_registers_; ++i) { + if (IsBlocked(i)) continue; + if (!IsLowRegister(i)) continue; + int high_register = GetHighForLowRegister(i); + if (IsBlocked(high_register)) continue; + int existing_high_register = GetHighForLowRegister(reg); + if ((reg == kNoRegister) || (next_use[i] >= next_use[reg] + && next_use[high_register] >= next_use[existing_high_register])) { + reg = i; + if (next_use[i] == kMaxLifetimePosition + && next_use[high_register] == kMaxLifetimePosition) { + break; + } + } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) { + // If one of the current register is known to be unavailable, just unconditionally + // try a new one. + reg = i; + } + } + return reg; +} + +bool RegisterAllocatorLinearScan::IsCallerSaveRegister(int reg) const { + return processing_core_registers_ + ? !codegen_->IsCoreCalleeSaveRegister(reg) + : !codegen_->IsFloatingPointCalleeSaveRegister(reg); +} + +int RegisterAllocatorLinearScan::FindAvailableRegister(size_t* next_use, LiveInterval* current) const { + // We special case intervals that do not span a safepoint to try to find a caller-save + // register if one is available. We iterate from 0 to the number of registers, + // so if there are caller-save registers available at the end, we continue the iteration. + bool prefers_caller_save = !current->HasWillCallSafepoint(); + int reg = kNoRegister; + for (size_t i = 0; i < number_of_registers_; ++i) { + if (IsBlocked(i)) { + // Register cannot be used. Continue. + continue; + } + + // Best case: we found a register fully available. + if (next_use[i] == kMaxLifetimePosition) { + if (prefers_caller_save && !IsCallerSaveRegister(i)) { + // We can get shorter encodings on some platforms by using + // small register numbers. So only update the candidate if the previous + // one was not available for the whole method. + if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) { + reg = i; + } + // Continue the iteration in the hope of finding a caller save register. + continue; + } else { + reg = i; + // We know the register is good enough. Return it. + break; + } + } + + // If we had no register before, take this one as a reference. + if (reg == kNoRegister) { + reg = i; + continue; + } + + // Pick the register that is used the last. + if (next_use[i] > next_use[reg]) { + reg = i; + continue; + } + } + return reg; +} + +// Remove interval and its other half if any. Return iterator to the following element. +static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf( + ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) { + DCHECK(intervals->begin() <= pos && pos < intervals->end()); + LiveInterval* interval = *pos; + if (interval->IsLowInterval()) { + DCHECK(pos + 1 < intervals->end()); + DCHECK_EQ(*(pos + 1), interval->GetHighInterval()); + return intervals->erase(pos, pos + 2); + } else if (interval->IsHighInterval()) { + DCHECK(intervals->begin() < pos); + DCHECK_EQ(*(pos - 1), interval->GetLowInterval()); + return intervals->erase(pos - 1, pos + 1); + } else { + return intervals->erase(pos); + } +} + +bool RegisterAllocatorLinearScan::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, + size_t first_register_use, + size_t* next_use) { + for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { + LiveInterval* active = *it; + DCHECK(active->HasRegister()); + if (active->IsFixed()) continue; + if (active->IsHighInterval()) continue; + if (first_register_use > next_use[active->GetRegister()]) continue; + + // Split the first interval found that is either: + // 1) A non-pair interval. + // 2) A pair interval whose high is not low + 1. + // 3) A pair interval whose low is not even. + if (!active->IsLowInterval() || + IsLowOfUnalignedPairInterval(active) || + !IsLowRegister(active->GetRegister())) { + LiveInterval* split = Split(active, position); + if (split != active) { + handled_.push_back(active); + } + RemoveIntervalAndPotentialOtherHalf(&active_, it); + AddSorted(unhandled_, split); + return true; + } + } + return false; +} + +// Find the register that is used the last, and spill the interval +// that holds it. If the first use of `current` is after that register +// we spill `current` instead. +bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) { + size_t first_register_use = current->FirstRegisterUse(); + if (current->HasRegister()) { + DCHECK(current->IsHighInterval()); + // The low interval has allocated the register for the high interval. In + // case the low interval had to split both intervals, we may end up in a + // situation where the high interval does not have a register use anymore. + // We must still proceed in order to split currently active and inactive + // uses of the high interval's register, and put the high interval in the + // active set. + DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr)); + } else if (first_register_use == kNoLifetime) { + AllocateSpillSlotFor(current); + return false; + } + + // First set all registers as not being used. + size_t* next_use = registers_array_; + for (size_t i = 0; i < number_of_registers_; ++i) { + next_use[i] = kMaxLifetimePosition; + } + + // For each active interval, find the next use of its register after the + // start of current. + for (LiveInterval* active : active_) { + DCHECK(active->HasRegister()); + if (active->IsFixed()) { + next_use[active->GetRegister()] = current->GetStart(); + } else { + size_t use = active->FirstRegisterUseAfter(current->GetStart()); + if (use != kNoLifetime) { + next_use[active->GetRegister()] = use; + } + } + } + + // For each inactive interval, find the next use of its register after the + // start of current. + for (LiveInterval* inactive : inactive_) { + // Temp/Slow-path-safepoint interval has no holes. + DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); + if (!current->IsSplit() && !inactive->IsFixed()) { + // Neither current nor inactive are fixed. + // Thanks to SSA, a non-split interval starting in a hole of an + // inactive interval should never intersect with that inactive interval. + // Only if it's not fixed though, because fixed intervals don't come from SSA. + DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); + continue; + } + DCHECK(inactive->HasRegister()); + size_t next_intersection = inactive->FirstIntersectionWith(current); + if (next_intersection != kNoLifetime) { + if (inactive->IsFixed()) { + next_use[inactive->GetRegister()] = + std::min(next_intersection, next_use[inactive->GetRegister()]); + } else { + size_t use = inactive->FirstUseAfter(current->GetStart()); + if (use != kNoLifetime) { + next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]); + } + } + } + } + + int reg = kNoRegister; + bool should_spill = false; + if (current->HasRegister()) { + DCHECK(current->IsHighInterval()); + reg = current->GetRegister(); + // When allocating the low part, we made sure the high register was available. + DCHECK_LT(first_register_use, next_use[reg]); + } else if (current->IsLowInterval()) { + reg = FindAvailableRegisterPair(next_use, first_register_use); + // We should spill if both registers are not available. + should_spill = (first_register_use >= next_use[reg]) + || (first_register_use >= next_use[GetHighForLowRegister(reg)]); + } else { + DCHECK(!current->IsHighInterval()); + reg = FindAvailableRegister(next_use, current); + should_spill = (first_register_use >= next_use[reg]); + } + + DCHECK_NE(reg, kNoRegister); + if (should_spill) { + DCHECK(!current->IsHighInterval()); + bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1)); + if (is_allocation_at_use_site) { + if (!current->IsLowInterval()) { + DumpInterval(std::cerr, current); + DumpAllIntervals(std::cerr); + // This situation has the potential to infinite loop, so we make it a non-debug CHECK. + HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2); + CHECK(false) << "There is not enough registers available for " + << current->GetParent()->GetDefinedBy()->DebugName() << " " + << current->GetParent()->GetDefinedBy()->GetId() + << " at " << first_register_use - 1 << " " + << (at == nullptr ? "" : at->DebugName()); + } + + // If we're allocating a register for `current` because the instruction at + // that position requires it, but we think we should spill, then there are + // non-pair intervals or unaligned pair intervals blocking the allocation. + // We split the first interval found, and put ourselves first in the + // `unhandled_` list. + bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(), + first_register_use, + next_use); + DCHECK(success); + LiveInterval* existing = unhandled_->back(); + DCHECK(existing->IsHighInterval()); + DCHECK_EQ(existing->GetLowInterval(), current); + unhandled_->push_back(current); + } else { + // If the first use of that instruction is after the last use of the found + // register, we split this interval just before its first register use. + AllocateSpillSlotFor(current); + LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1); + DCHECK(current != split); + AddSorted(unhandled_, split); + } + return false; + } else { + // Use this register and spill the active and inactives interval that + // have that register. + current->SetRegister(reg); + + for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { + LiveInterval* active = *it; + if (active->GetRegister() == reg) { + DCHECK(!active->IsFixed()); + LiveInterval* split = Split(active, current->GetStart()); + if (split != active) { + handled_.push_back(active); + } + RemoveIntervalAndPotentialOtherHalf(&active_, it); + AddSorted(unhandled_, split); + break; + } + } + + // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body. + for (auto it = inactive_.begin(); it != inactive_.end(); ) { + LiveInterval* inactive = *it; + bool erased = false; + if (inactive->GetRegister() == reg) { + if (!current->IsSplit() && !inactive->IsFixed()) { + // Neither current nor inactive are fixed. + // Thanks to SSA, a non-split interval starting in a hole of an + // inactive interval should never intersect with that inactive interval. + // Only if it's not fixed though, because fixed intervals don't come from SSA. + DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); + } else { + size_t next_intersection = inactive->FirstIntersectionWith(current); + if (next_intersection != kNoLifetime) { + if (inactive->IsFixed()) { + LiveInterval* split = Split(current, next_intersection); + DCHECK_NE(split, current); + AddSorted(unhandled_, split); + } else { + // Split at the start of `current`, which will lead to splitting + // at the end of the lifetime hole of `inactive`. + LiveInterval* split = Split(inactive, current->GetStart()); + // If it's inactive, it must start before the current interval. + DCHECK_NE(split, inactive); + it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it); + erased = true; + handled_.push_back(inactive); + AddSorted(unhandled_, split); + } + } + } + } + // If we have erased the element, `it` already points to the next element. + // Otherwise we need to move to the next element. + if (!erased) { + ++it; + } + } + + return true; + } +} + +void RegisterAllocatorLinearScan::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) { + DCHECK(!interval->IsFixed() && !interval->HasSpillSlot()); + size_t insert_at = 0; + for (size_t i = array->size(); i > 0; --i) { + LiveInterval* current = (*array)[i - 1u]; + // High intervals must be processed right after their low equivalent. + if (current->StartsAfter(interval) && !current->IsHighInterval()) { + insert_at = i; + break; + } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) { + // Ensure the slow path interval is the last to be processed at its location: we want the + // interval to know all live registers at this location. + DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current)); + insert_at = i; + break; + } + } + + // Insert the high interval before the low, to ensure the low is processed before. + auto insert_pos = array->begin() + insert_at; + if (interval->HasHighInterval()) { + array->insert(insert_pos, { interval->GetHighInterval(), interval }); + } else if (interval->HasLowInterval()) { + array->insert(insert_pos, { interval, interval->GetLowInterval() }); + } else { + array->insert(insert_pos, interval); + } +} + +void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) { + if (interval->IsHighInterval()) { + // The low interval already took care of allocating the spill slot. + DCHECK(!interval->GetLowInterval()->HasRegister()); + DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot()); + return; + } + + LiveInterval* parent = interval->GetParent(); + + // An instruction gets a spill slot for its entire lifetime. If the parent + // of this interval already has a spill slot, there is nothing to do. + if (parent->HasSpillSlot()) { + return; + } + + HInstruction* defined_by = parent->GetDefinedBy(); + DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi()); + + if (defined_by->IsParameterValue()) { + // Parameters have their own stack slot. + parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue())); + return; + } + + if (defined_by->IsCurrentMethod()) { + parent->SetSpillSlot(0); + return; + } + + if (defined_by->IsConstant()) { + // Constants don't need a spill slot. + return; + } + + ArenaVector<size_t>* spill_slots = nullptr; + switch (interval->GetType()) { + case Primitive::kPrimDouble: + spill_slots = &double_spill_slots_; + break; + case Primitive::kPrimLong: + spill_slots = &long_spill_slots_; + break; + case Primitive::kPrimFloat: + spill_slots = &float_spill_slots_; + break; + case Primitive::kPrimNot: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + case Primitive::kPrimShort: + spill_slots = &int_spill_slots_; + break; + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); + } + + // Find an available spill slot. + size_t slot = 0; + for (size_t e = spill_slots->size(); slot < e; ++slot) { + if ((*spill_slots)[slot] <= parent->GetStart()) { + if (!parent->NeedsTwoSpillSlots()) { + // One spill slot is sufficient. + break; + } + if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) { + // Two spill slots are available. + break; + } + } + } + + size_t end = interval->GetLastSibling()->GetEnd(); + if (parent->NeedsTwoSpillSlots()) { + if (slot + 2u > spill_slots->size()) { + // We need a new spill slot. + spill_slots->resize(slot + 2u, end); + } + (*spill_slots)[slot] = end; + (*spill_slots)[slot + 1] = end; + } else { + if (slot == spill_slots->size()) { + // We need a new spill slot. + spill_slots->push_back(end); + } else { + (*spill_slots)[slot] = end; + } + } + + // Note that the exact spill slot location will be computed when we resolve, + // that is when we know the number of spill slots for each type. + parent->SetSpillSlot(slot); +} + +void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) { + LiveInterval* interval = phi->GetLiveInterval(); + + HInstruction* previous_phi = phi->GetPrevious(); + DCHECK(previous_phi == nullptr || + previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber()) + << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent."; + + if (phi->IsVRegEquivalentOf(previous_phi)) { + // This is an equivalent of the previous phi. We need to assign the same + // catch phi slot. + DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot()); + interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); + } else { + // Allocate a new spill slot for this catch phi. + // TODO: Reuse spill slots when intervals of phis from different catch + // blocks do not overlap. + interval->SetSpillSlot(catch_phi_spill_slots_); + catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + } +} + +} // namespace art diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h new file mode 100644 index 0000000000..b6e4f92e42 --- /dev/null +++ b/compiler/optimizing/register_allocator_linear_scan.h @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_ +#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_ + +#include "arch/instruction_set.h" +#include "base/arena_containers.h" +#include "base/macros.h" +#include "primitive.h" +#include "register_allocator.h" + +namespace art { + +class CodeGenerator; +class HBasicBlock; +class HGraph; +class HInstruction; +class HParallelMove; +class HPhi; +class LiveInterval; +class Location; +class SsaLivenessAnalysis; + +/** + * An implementation of a linear scan register allocator on an `HGraph` with SSA form. + */ +class RegisterAllocatorLinearScan : public RegisterAllocator { + public: + RegisterAllocatorLinearScan(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& analysis); + + void AllocateRegisters() OVERRIDE; + + bool Validate(bool log_fatal_on_failure) OVERRIDE { + processing_core_registers_ = true; + if (!ValidateInternal(log_fatal_on_failure)) { + return false; + } + processing_core_registers_ = false; + return ValidateInternal(log_fatal_on_failure); + } + + size_t GetNumberOfSpillSlots() const { + return int_spill_slots_.size() + + long_spill_slots_.size() + + float_spill_slots_.size() + + double_spill_slots_.size() + + catch_phi_spill_slots_; + } + + private: + // Main methods of the allocator. + void LinearScan(); + bool TryAllocateFreeReg(LiveInterval* interval); + bool AllocateBlockedReg(LiveInterval* interval); + + // Add `interval` in the given sorted list. + static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval); + + // Returns whether `reg` is blocked by the code generator. + bool IsBlocked(int reg) const; + + // Update the interval for the register in `location` to cover [start, end). + void BlockRegister(Location location, size_t start, size_t end); + void BlockRegisters(size_t start, size_t end, bool caller_save_only = false); + + // Allocate a spill slot for the given interval. Should be called in linear + // order of interval starting positions. + void AllocateSpillSlotFor(LiveInterval* interval); + + // Allocate a spill slot for the given catch phi. Will allocate the same slot + // for phis which share the same vreg. Must be called in reverse linear order + // of lifetime positions and ascending vreg numbers for correctness. + void AllocateSpillSlotForCatchPhi(HPhi* phi); + + // Helper methods. + void AllocateRegistersInternal(); + void ProcessInstruction(HInstruction* instruction); + bool ValidateInternal(bool log_fatal_on_failure) const; + void DumpInterval(std::ostream& stream, LiveInterval* interval) const; + void DumpAllIntervals(std::ostream& stream) const; + int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const; + int FindAvailableRegister(size_t* next_use, LiveInterval* current) const; + bool IsCallerSaveRegister(int reg) const; + + // Try splitting an active non-pair or unaligned pair interval at the given `position`. + // Returns whether it was successful at finding such an interval. + bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, + size_t first_register_use, + size_t* next_use); + + // List of intervals for core registers that must be processed, ordered by start + // position. Last entry is the interval that has the lowest start position. + // This list is initially populated before doing the linear scan. + ArenaVector<LiveInterval*> unhandled_core_intervals_; + + // List of intervals for floating-point registers. Same comments as above. + ArenaVector<LiveInterval*> unhandled_fp_intervals_; + + // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_` + // or `unhandled_fp_intervals_`. + ArenaVector<LiveInterval*>* unhandled_; + + // List of intervals that have been processed. + ArenaVector<LiveInterval*> handled_; + + // List of intervals that are currently active when processing a new live interval. + // That is, they have a live range that spans the start of the new interval. + ArenaVector<LiveInterval*> active_; + + // List of intervals that are currently inactive when processing a new live interval. + // That is, they have a lifetime hole that spans the start of the new interval. + ArenaVector<LiveInterval*> inactive_; + + // Fixed intervals for physical registers. Such intervals cover the positions + // where an instruction requires a specific register. + ArenaVector<LiveInterval*> physical_core_register_intervals_; + ArenaVector<LiveInterval*> physical_fp_register_intervals_; + + // Intervals for temporaries. Such intervals cover the positions + // where an instruction requires a temporary. + ArenaVector<LiveInterval*> temp_intervals_; + + // The spill slots allocated for live intervals. We ensure spill slots + // are typed to avoid (1) doing moves and swaps between two different kinds + // of registers, and (2) swapping between a single stack slot and a double + // stack slot. This simplifies the parallel move resolver. + ArenaVector<size_t> int_spill_slots_; + ArenaVector<size_t> long_spill_slots_; + ArenaVector<size_t> float_spill_slots_; + ArenaVector<size_t> double_spill_slots_; + + // Spill slots allocated to catch phis. This category is special-cased because + // (1) slots are allocated prior to linear scan and in reverse linear order, + // (2) equivalent phis need to share slots despite having different types. + size_t catch_phi_spill_slots_; + + // Instructions that need a safepoint. + ArenaVector<HInstruction*> safepoints_; + + // True if processing core registers. False if processing floating + // point registers. + bool processing_core_registers_; + + // Number of registers for the current register kind (core or floating point). + size_t number_of_registers_; + + // Temporary array, allocated ahead of time for simplicity. + size_t* registers_array_; + + // Blocked registers, as decided by the code generator. + bool* const blocked_core_registers_; + bool* const blocked_fp_registers_; + + // Slots reserved for out arguments. + size_t reserved_out_slots_; + + // The maximum live core registers at safepoints. + size_t maximum_number_of_live_core_registers_; + + // The maximum live FP registers at safepoints. + size_t maximum_number_of_live_fp_registers_; + + ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil); + ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); + + DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorLinearScan); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_ diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index a9de7c3e59..cbb7b2f1c5 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -25,6 +25,7 @@ #include "nodes.h" #include "optimizing_unit_test.h" #include "register_allocator.h" +#include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" #include "ssa_phi_elimination.h" @@ -44,9 +45,9 @@ static bool Check(const uint16_t* data) { x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); - return register_allocator.Validate(false); + RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); + return register_allocator->Validate(false); } /** @@ -295,9 +296,9 @@ TEST_F(RegisterAllocatorTest, Loop3) { x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); - ASSERT_TRUE(register_allocator.Validate(false)); + RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); + ASSERT_TRUE(register_allocator->Validate(false)); HBasicBlock* loop_header = graph->GetBlocks()[2]; HPhi* phi = loop_header->GetFirstPhi()->AsPhi(); @@ -384,9 +385,9 @@ TEST_F(RegisterAllocatorTest, DeadPhi) { x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); - ASSERT_TRUE(register_allocator.Validate(false)); + RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); + ASSERT_TRUE(register_allocator->Validate(false)); } /** @@ -408,7 +409,7 @@ TEST_F(RegisterAllocatorTest, FreeUntil) { x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); + RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness); // Add an artifical range to cover the temps that will be put in the unhandled list. LiveInterval* unhandled = graph->GetEntryBlock()->GetFirstInstruction()->GetLiveInterval(); @@ -541,8 +542,9 @@ TEST_F(RegisterAllocatorTest, PhiHint) { liveness.Analyze(); // Check that the register allocator is deterministic. - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0); ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 0); @@ -560,8 +562,9 @@ TEST_F(RegisterAllocatorTest, PhiHint) { // Set the phi to a specific register, and check that the inputs get allocated // the same register. phi->GetLocations()->UpdateOut(Location::RegisterLocation(2)); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2); ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2); @@ -579,8 +582,9 @@ TEST_F(RegisterAllocatorTest, PhiHint) { // Set input1 to a specific register, and check that the phi and other input get allocated // the same register. input1->GetLocations()->UpdateOut(Location::RegisterLocation(2)); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2); ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2); @@ -598,8 +602,9 @@ TEST_F(RegisterAllocatorTest, PhiHint) { // Set input2 to a specific register, and check that the phi and other input get allocated // the same register. input2->GetLocations()->UpdateOut(Location::RegisterLocation(2)); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2); ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2); @@ -658,8 +663,9 @@ TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) { SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); // Sanity check that in normal conditions, the register should be hinted to 0 (EAX). ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 0); @@ -677,8 +683,9 @@ TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) { // Don't use SetInAt because we are overriding an already allocated location. ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2); } @@ -726,8 +733,9 @@ TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) { SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); // Sanity check that in normal conditions, the registers are the same. ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 1); @@ -748,8 +756,9 @@ TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) { ASSERT_EQ(first_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput); ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2); ASSERT_EQ(second_sub->GetLiveInterval()->GetRegister(), 2); @@ -795,8 +804,9 @@ TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); - RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.AllocateRegisters(); + RegisterAllocator* register_allocator = + RegisterAllocator::Create(&allocator, &codegen, liveness); + register_allocator->AllocateRegisters(); // div on x86 requires its first input in eax and the output be the same as the first input. ASSERT_EQ(div->GetLiveInterval()->GetRegister(), 0); @@ -892,7 +902,7 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { liveness.instructions_from_lifetime_position_.push_back(user); } - RegisterAllocator register_allocator(&allocator, &codegen, liveness); + RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness); register_allocator.unhandled_core_intervals_.push_back(fourth); register_allocator.unhandled_core_intervals_.push_back(third); register_allocator.unhandled_core_intervals_.push_back(second); diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 8747dad5e5..353c729249 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -2456,6 +2456,9 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, } else if (!byte) { encoding |= B22; } + if (load && is_signed && (byte || half)) { + encoding |= B24; + } Emit32(encoding); } else { // 16 bit register offset. diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index f3fa72ccc6..abb09f726f 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -1450,4 +1450,23 @@ TEST_F(AssemblerThumb2Test, vpaddl) { DriverStr(expected, "vpaddl"); } +TEST_F(AssemblerThumb2Test, LoadFromShiftedRegOffset) { + arm::Address mem_address(arm::R0, arm::R1, arm::Shift::LSL, 2); + + __ ldrsb(arm::R2, mem_address); + __ ldrb(arm::R2, mem_address); + __ ldrsh(arm::R2, mem_address); + __ ldrh(arm::R2, mem_address); + __ ldr(arm::R2, mem_address); + + std::string expected = + "ldrsb r2, [r0, r1, LSL #2]\n" + "ldrb r2, [r0, r1, LSL #2]\n" + "ldrsh r2, [r0, r1, LSL #2]\n" + "ldrh r2, [r0, r1, LSL #2]\n" + "ldr r2, [r0, r1, LSL #2]\n"; + + DriverStr(expected, "LoadFromShiftedRegOffset"); +} + } // namespace art diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc index 93351e9f28..a8f15d06a8 100644 --- a/dex2oat/dex2oat_test.cc +++ b/dex2oat/dex2oat_test.cc @@ -14,9 +14,10 @@ * limitations under the License. */ +#include <regex> +#include <sstream> #include <string> #include <vector> -#include <sstream> #include "common_runtime_test.h" @@ -207,7 +208,7 @@ class Dex2oatSwapTest : public Dex2oatTest { std::string dex_location = GetScratchDir() + "/Dex2OatSwapTest.jar"; std::string odex_location = GetOdexDir() + "/Dex2OatSwapTest.odex"; - Copy(GetDexSrc1(), dex_location); + Copy(GetTestDexFileName(), dex_location); std::vector<std::string> copy(extra_args); @@ -226,7 +227,11 @@ class Dex2oatSwapTest : public Dex2oatTest { CheckResult(expect_use); } - void CheckResult(bool expect_use) { + virtual std::string GetTestDexFileName() { + return GetDexSrc1(); + } + + virtual void CheckResult(bool expect_use) { if (kIsTargetBuild) { CheckTargetResult(expect_use); } else { @@ -234,13 +239,13 @@ class Dex2oatSwapTest : public Dex2oatTest { } } - void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) { + virtual void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) { // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do // something for variants with file descriptor where we can control the lifetime of // the swap file and thus take a look at it. } - void CheckHostResult(bool expect_use) { + virtual void CheckHostResult(bool expect_use) { if (!kIsTargetBuild) { if (expect_use) { EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos) @@ -253,7 +258,7 @@ class Dex2oatSwapTest : public Dex2oatTest { } // Check whether the dex2oat run was really successful. - void CheckValidity() { + virtual void CheckValidity() { if (kIsTargetBuild) { CheckTargetValidity(); } else { @@ -261,14 +266,14 @@ class Dex2oatSwapTest : public Dex2oatTest { } } - void CheckTargetValidity() { + virtual void CheckTargetValidity() { // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do // something for variants with file descriptor where we can control the lifetime of // the swap file and thus take a look at it. } // On the host, we can get the dex2oat output. Here, look for "dex2oat took." - void CheckHostValidity() { + virtual void CheckHostValidity() { EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_; } }; @@ -297,6 +302,96 @@ TEST_F(Dex2oatSwapTest, DoUseSwapSingleSmall) { { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" }); } +class Dex2oatSwapUseTest : public Dex2oatSwapTest { + protected: + void CheckHostResult(bool expect_use) OVERRIDE { + if (!kIsTargetBuild) { + if (expect_use) { + EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos) + << output_; + } else { + EXPECT_EQ(output_.find("Large app, accepted running with swap."), std::string::npos) + << output_; + } + } + } + + std::string GetTestDexFileName() OVERRIDE { + // Use Statics as it has a handful of functions. + return CommonRuntimeTest::GetTestDexFileName("Statics"); + } + + size_t ParseNativeAlloc() { + std::regex native_alloc_regex("dex2oat took.*native alloc=[^ ]+ \\(([0-9]+)B\\)"); + std::smatch native_alloc_match; + bool found = std::regex_search(output_, native_alloc_match, native_alloc_regex); + if (!found) { + EXPECT_TRUE(found); + return 0; + } + if (native_alloc_match.size() != 2U) { + EXPECT_EQ(native_alloc_match.size(), 2U); + return 0; + } + + std::istringstream stream(native_alloc_match[1].str()); + size_t value; + stream >> value; + + return value; + } + + size_t ParseSwap(bool expected) { + std::regex swap_regex("dex2oat took[^\\n]+swap=[^ ]+ \\(([0-9]+)B\\)"); + std::smatch swap_match; + bool found = std::regex_search(output_, swap_match, swap_regex); + if (found != expected) { + EXPECT_EQ(expected, found); + return 0; + } + + if (!found) { + return 0; + } + + if (swap_match.size() != 2U) { + EXPECT_EQ(swap_match.size(), 2U); + return 0; + } + + std::istringstream stream(swap_match[1].str()); + size_t value; + stream >> value; + + return value; + } +}; + +TEST_F(Dex2oatSwapUseTest, CheckSwapUsage) { + RunTest(false /* use_fd */, + false /* expect_use */); + size_t native_without = ParseNativeAlloc(); + size_t swap_without = ParseSwap(false /* expected */); + std::string output_without = output_; + + output_ = ""; + + RunTest(false /* use_fd */, + true /* expect_use */, + { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" }); + size_t native_with = ParseNativeAlloc(); + size_t swap_with = ParseSwap(true /* expected */); + std::string output_with = output_; + + if (native_with >= native_without || swap_without >= swap_with) { + EXPECT_LT(native_with, native_without); + EXPECT_LT(swap_without, swap_with); + + LOG(ERROR) << output_without; + LOG(ERROR) << output_with; + } +} + class Dex2oatVeryLargeTest : public Dex2oatTest { protected: void CheckFilter(CompilerFilter::Filter input ATTRIBUTE_UNUSED, diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc index 214222dd84..f5669d77df 100644 --- a/imgdiag/imgdiag.cc +++ b/imgdiag/imgdiag.cc @@ -729,7 +729,7 @@ class ImgDiagDumper { os << " " << reinterpret_cast<void*>(obj) << " "; os << " entryPointFromJni: " << reinterpret_cast<const void*>( - art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", "; + art_method->GetDataPtrSize(pointer_size)) << ", "; os << " entryPointFromQuickCompiledCode: " << reinterpret_cast<const void*>( art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)) @@ -810,7 +810,7 @@ class ImgDiagDumper { os << " " << reinterpret_cast<void*>(obj) << " "; os << " entryPointFromJni: " << reinterpret_cast<const void*>( - art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", "; + art_method->GetDataPtrSize(pointer_size)) << ", "; os << " entryPointFromQuickCompiledCode: " << reinterpret_cast<const void*>( art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)) diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc index 5bb61bb829..569c5e94e6 100644 --- a/patchoat/patchoat.cc +++ b/patchoat/patchoat.cc @@ -748,8 +748,8 @@ void PatchOat::FixupMethod(ArtMethod* object, ArtMethod* copy) { copy->SetEntryPointFromQuickCompiledCodePtrSize(RelocatedAddressOfPointer( object->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)), pointer_size); // No special handling for IMT conflict table since all pointers are moved by the same offset. - copy->SetEntryPointFromJniPtrSize(RelocatedAddressOfPointer( - object->GetEntryPointFromJniPtrSize(pointer_size)), pointer_size); + copy->SetDataPtrSize(RelocatedAddressOfPointer( + object->GetDataPtrSize(pointer_size)), pointer_size); } bool PatchOat::Patch(File* input_oat, off_t delta, File* output_oat, TimingLogger* timings, diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index 8f18ff3204..966587d772 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -27,13 +27,15 @@ namespace art { // Cast entrypoints. -extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass, - const mirror::Class* ref_class); +extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass, + const mirror::Class* ref_class); // Read barrier entrypoints. // art_quick_read_barrier_mark_regX uses an non-standard calling // convention: it expects its input in register X and returns its -// result in that same register. +// result in that same register, and saves and restores all +// caller-save registers. +extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*); @@ -119,7 +121,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - qpoints->pReadBarrierMarkReg00 = artReadBarrierMark; + qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00; qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01; qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02; qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03; diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 82d64b90d7..34d3158c62 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -672,6 +672,12 @@ END art_quick_check_cast .endif .endm +// Save rReg's value to [sp, #offset]. +.macro PUSH_REG rReg, offset + str \rReg, [sp, #\offset] @ save rReg + .cfi_rel_offset \rReg, \offset +.endm + /* * Macro to insert read barrier, only used in art_quick_aput_obj. * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET. @@ -1042,6 +1048,18 @@ ENTRY art_quick_alloc_object_rosalloc #endif POISON_HEAP_REF r2 str r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET] + // Fence. This is "ish" not "ishst" so + // that it also ensures ordering of + // the class status load with respect + // to later accesses to the class + // object. Alternatively we could use + // "ishst" if we use load-acquire for + // the class status load.) + // Needs to be done before pushing on + // allocation since Heap::VisitObjects + // relies on seeing the class pointer. + // b/28790624 + dmb ish // Push the new object onto the thread // local allocation stack and // increment the thread local @@ -1056,14 +1074,7 @@ ENTRY art_quick_alloc_object_rosalloc // and the list head store above using // strd. str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] - // Fence. This is "ish" not "ishst" so - // that the code after this allocation - // site will see the right values in - // the fields of the class. - // Alternatively we could use "ishst" - // if we use load-acquire for the - // class status load.) - dmb ish + mov r0, r3 // Set the return value and return. bx lr @@ -1747,30 +1758,83 @@ END art_quick_l2f /* * Create a function `name` calling the ReadBarrier::Mark routine, * getting its argument and returning its result through register - * `reg`, thus following a non-standard runtime calling convention: - * - `reg` is used to pass the (sole) argument of this function + * `reg`, saving and restoring all caller-save registers. + * + * If `reg` is different from `r0`, the generated function follows a + * non-standard runtime calling convention: + * - register `reg` is used to pass the (sole) argument of this + * function (instead of R0); + * - register `reg` is used to return the result of this function * (instead of R0); - * - `reg` is used to return the result of this function (instead of R0); * - R0 is treated like a normal (non-argument) caller-save register; * - everything else is the same as in the standard runtime calling - * convention (e.g. same callee-save registers). + * convention (e.g. standard callee-save registers are preserved). */ .macro READ_BARRIER_MARK_REG name, reg ENTRY \name - push {lr} @ save return address - .cfi_adjust_cfa_offset 4 - .cfi_rel_offset lr, 0 - sub sp, #4 @ push padding (native calling convention 8-byte alignment) - .cfi_adjust_cfa_offset 4 - mov r0, \reg @ pass arg1 - obj from `reg` - bl artReadBarrierMark @ artReadBarrierMark(obj) - mov \reg, r0 @ return result into `reg` - add sp, #4 @ pop padding - .cfi_adjust_cfa_offset -4 - pop {pc} @ return + push {r0-r4, r9, r12, lr} @ save return address and core caller-save registers + .cfi_adjust_cfa_offset 32 + .cfi_rel_offset r0, 0 + .cfi_rel_offset r1, 4 + .cfi_rel_offset r2, 8 + .cfi_rel_offset r3, 12 + .cfi_rel_offset r4, 16 + .cfi_rel_offset r9, 20 + .cfi_rel_offset r12, 24 + .cfi_rel_offset lr, 28 + vpush {s0-s15} @ save floating-point caller-save registers + .cfi_adjust_cfa_offset 64 + + .ifnc \reg, r0 + mov r0, \reg @ pass arg1 - obj from `reg` + .endif + bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj) + + vpop {s0-s15} @ restore floating-point registers + .cfi_adjust_cfa_offset -64 + @ If `reg` is a caller-save register, save the result to its + @ corresponding stack slot; it will be restored by the "pop" + @ instruction below. Otherwise, move result into `reg`. + @ + @ (Note that saving `reg` to its stack slot will overwrite the value + @ previously stored by the "push" instruction above. That is + @ alright, as in that case we know that `reg` is not a live + @ register, as it is used to pass the argument and return the result + @ of this function.) + .ifc \reg, r0 + PUSH_REG r0, 0 @ copy result to r0's stack location + .else + .ifc \reg, r1 + PUSH_REG r0, 4 @ copy result to r1's stack location + .else + .ifc \reg, r2 + PUSH_REG r0, 8 @ copy result to r2's stack location + .else + .ifc \reg, r3 + PUSH_REG r0, 12 @ copy result to r3's stack location + .else + .ifc \reg, r4 + PUSH_REG r0, 16 @ copy result to r4's stack location + .else + .ifc \reg, r9 + PUSH_REG r0, 20 @ copy result to r9's stack location + .else + .ifc \reg, r12 + PUSH_REG r0, 24 @ copy result to r12's stack location + .else + mov \reg, r0 @ return result into `reg` + .endif + .endif + .endif + .endif + .endif + .endif + .endif + pop {r0-r4, r9, r12, pc} @ restore caller-save registers and return END \name .endm +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3 diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index c3188b6aad..2e5f5ad89f 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -27,13 +27,15 @@ namespace art { // Cast entrypoints. -extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass, - const mirror::Class* ref_class); +extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass, + const mirror::Class* ref_class); // Read barrier entrypoints. // art_quick_read_barrier_mark_regX uses an non-standard calling // convention: it expects its input in register X and returns its -// result in that same register. +// result in that same register, and saves and restores all +// caller-save registers. +extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*); @@ -122,7 +124,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - qpoints->pReadBarrierMarkReg00 = artReadBarrierMark; + qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00; qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01; qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02; qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03; diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index e9ad1f4080..6173ae71e1 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1253,6 +1253,22 @@ END art_quick_check_cast .endif .endm +// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude. +// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude. +.macro POP_REGS_NE xReg1, xReg2, offset, xExclude + .ifc \xReg1, \xExclude + ldr \xReg2, [sp, #(\offset + 8)] // restore xReg2 + .else + .ifc \xReg2, \xExclude + ldr \xReg1, [sp, #\offset] // restore xReg1 + .else + ldp \xReg1, \xReg2, [sp, #\offset] // restore xReg1 and xReg2 + .endif + .endif + .cfi_restore \xReg1 + .cfi_restore \xReg2 +.endm + /* * Macro to insert read barrier, only used in art_quick_aput_obj. * xDest, wDest and xObj are registers, offset is a defined literal such as @@ -1633,6 +1649,18 @@ ENTRY art_quick_alloc_object_rosalloc #endif POISON_HEAP_REF w2 str w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET] + // Fence. This is "ish" not "ishst" so + // that it also ensures ordering of + // the class status load with respect + // to later accesses to the class + // object. Alternatively we could use + // "ishst" if we use load-acquire for + // the class status load.) + // Needs to be done before pushing on + // allocation since Heap::VisitObjects + // relies on seeing the class pointer. + // b/28790624 + dmb ish // Push the new object onto the thread // local allocation stack and // increment the thread local @@ -1647,14 +1675,7 @@ ENTRY art_quick_alloc_object_rosalloc // and the list head store above using // strd. str w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] - // Fence. This is "ish" not "ishst" so - // that the code after this allocation - // site will see the right values in - // the fields of the class. - // Alternatively we could use "ishst" - // if we use load-acquire for the - // class status load.) - dmb ish + mov x0, x3 // Set the return value and return. ret .Lart_quick_alloc_object_rosalloc_slow_path: @@ -2217,56 +2238,148 @@ END art_quick_indexof /* * Create a function `name` calling the ReadBarrier::Mark routine, - * getting its argument and returning its result through register - * `reg`, thus following a non-standard runtime calling convention: - * - `reg` is used to pass the (sole) argument of this function + * getting its argument and returning its result through W register + * `wreg` (corresponding to X register `xreg`), saving and restoring + * all caller-save registers. + * + * If `wreg` is different from `w0`, the generated function follows a + * non-standard runtime calling convention: + * - register `wreg` is used to pass the (sole) argument of this + * function (instead of W0); + * - register `wreg` is used to return the result of this function * (instead of W0); - * - `reg` is used to return the result of this function (instead of W0); * - W0 is treated like a normal (non-argument) caller-save register; * - everything else is the same as in the standard runtime calling - * convention (e.g. same callee-save registers). + * convention (e.g. standard callee-save registers are preserved). */ -.macro READ_BARRIER_MARK_REG name, reg +.macro READ_BARRIER_MARK_REG name, wreg, xreg ENTRY \name - str xLR, [sp, #-16]! // Save return address and add padding (16B align stack). - .cfi_adjust_cfa_offset 16 - .cfi_rel_offset x30, 0 - mov w0, \reg // Pass arg1 - obj from `reg` + /* + * Allocate 46 stack slots * 8 = 368 bytes: + * - 20 slots for core registers X0-X19 + * - 24 slots for floating-point registers D0-D7 and D16-D31 + * - 1 slot for return address register XLR + * - 1 padding slot for 16-byte stack alignment + */ + // Save all potentially live caller-save core registers. + stp x0, x1, [sp, #-368]! + .cfi_adjust_cfa_offset 368 + .cfi_rel_offset x0, 0 + .cfi_rel_offset x1, 8 + stp x2, x3, [sp, #16] + .cfi_rel_offset x2, 16 + .cfi_rel_offset x3, 24 + stp x4, x5, [sp, #32] + .cfi_rel_offset x4, 32 + .cfi_rel_offset x5, 40 + stp x6, x7, [sp, #48] + .cfi_rel_offset x6, 48 + .cfi_rel_offset x7, 56 + stp x8, x9, [sp, #64] + .cfi_rel_offset x8, 64 + .cfi_rel_offset x9, 72 + stp x10, x11, [sp, #80] + .cfi_rel_offset x10, 80 + .cfi_rel_offset x11, 88 + stp x12, x13, [sp, #96] + .cfi_rel_offset x12, 96 + .cfi_rel_offset x13, 104 + stp x14, x15, [sp, #112] + .cfi_rel_offset x14, 112 + .cfi_rel_offset x15, 120 + stp x16, x17, [sp, #128] + .cfi_rel_offset x16, 128 + .cfi_rel_offset x17, 136 + stp x18, x19, [sp, #144] + .cfi_rel_offset x18, 144 + .cfi_rel_offset x19, 152 + // Save all potentially live caller-save floating-point registers. + stp d0, d1, [sp, #160] + stp d2, d3, [sp, #176] + stp d4, d5, [sp, #192] + stp d6, d7, [sp, #208] + stp d16, d17, [sp, #224] + stp d18, d19, [sp, #240] + stp d20, d21, [sp, #256] + stp d22, d23, [sp, #272] + stp d24, d25, [sp, #288] + stp d26, d27, [sp, #304] + stp d28, d29, [sp, #320] + stp d30, d31, [sp, #336] + // Save return address. + str xLR, [sp, #352] + .cfi_rel_offset x30, 352 + // (sp + #360 is a padding slot) + + .ifnc \wreg, w0 + mov w0, \wreg // Pass arg1 - obj from `wreg` + .endif bl artReadBarrierMark // artReadBarrierMark(obj) - mov \reg, w0 // Return result into `reg` - ldr xLR, [sp], #16 // Restore return address and remove padding. + .ifnc \wreg, w0 + mov \wreg, w0 // Return result into `wreg` + .endif + + // Restore core regs, except `xreg`, as `wreg` is used to return the + // result of this function (simply remove it from the stack instead). + POP_REGS_NE x0, x1, 0, \xreg + POP_REGS_NE x2, x3, 16, \xreg + POP_REGS_NE x4, x5, 32, \xreg + POP_REGS_NE x6, x7, 48, \xreg + POP_REGS_NE x8, x9, 64, \xreg + POP_REGS_NE x10, x11, 80, \xreg + POP_REGS_NE x12, x13, 96, \xreg + POP_REGS_NE x14, x15, 112, \xreg + POP_REGS_NE x16, x17, 128, \xreg + POP_REGS_NE x18, x19, 144, \xreg + // Restore floating-point registers. + ldp d0, d1, [sp, #160] + ldp d2, d3, [sp, #176] + ldp d4, d5, [sp, #192] + ldp d6, d7, [sp, #208] + ldp d16, d17, [sp, #224] + ldp d18, d19, [sp, #240] + ldp d20, d21, [sp, #256] + ldp d22, d23, [sp, #272] + ldp d24, d25, [sp, #288] + ldp d26, d27, [sp, #304] + ldp d28, d29, [sp, #320] + ldp d30, d31, [sp, #336] + // Restore return address and remove padding. + ldr xLR, [sp, #352] .cfi_restore x30 - .cfi_adjust_cfa_offset -16 + add sp, sp, #368 + .cfi_adjust_cfa_offset -368 ret END \name .endm -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0, x0 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1, x1 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2, x2 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3, x3 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4, x4 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5, x5 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6, x6 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7, x7 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8, x8 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9, x9 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29 diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc index e3cc0e0b67..22efd199cf 100644 --- a/runtime/arch/mips/entrypoints_init_mips.cc +++ b/runtime/arch/mips/entrypoints_init_mips.cc @@ -28,8 +28,8 @@ namespace art { // Cast entrypoints. -extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass, - const mirror::Class* ref_class); +extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass, + const mirror::Class* ref_class); // Math entrypoints. extern int32_t CmpgDouble(double a, double b); diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc index b19aa01712..b02edb6aba 100644 --- a/runtime/arch/mips64/entrypoints_init_mips64.cc +++ b/runtime/arch/mips64/entrypoints_init_mips64.cc @@ -28,8 +28,8 @@ namespace art { // Cast entrypoints. -extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass, - const mirror::Class* ref_class); +extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass, + const mirror::Class* ref_class); // Math entrypoints. extern int32_t CmpgDouble(double a, double b); extern int32_t CmplDouble(double a, double b); diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc index 69c939e4cb..4e9756c54e 100644 --- a/runtime/arch/x86/entrypoints_init_x86.cc +++ b/runtime/arch/x86/entrypoints_init_x86.cc @@ -25,13 +25,14 @@ namespace art { // Cast entrypoints. -extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass, - const mirror::Class* ref_class); +extern "C" size_t art_quick_is_assignable(const mirror::Class* klass, + const mirror::Class* ref_class); // Read barrier entrypoints. // art_quick_read_barrier_mark_regX uses an non-standard calling // convention: it expects its input in register X and returns its -// result in that same register. +// result in that same register, and saves and restores all +// caller-save registers. extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*); diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index e75fecba4b..77e04e7981 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1908,41 +1908,73 @@ DEFINE_FUNCTION art_nested_signal_return UNREACHABLE END_FUNCTION art_nested_signal_return -// Call the ReadBarrierMark entry point, getting input and returning -// result through EAX (register 0), following the standard runtime -// calling convention. -DEFINE_FUNCTION art_quick_read_barrier_mark_reg00 - subl LITERAL(8), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(8) - PUSH eax // pass arg1 - obj - call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) - addl LITERAL(12), %esp // pop argument and remove padding - CFI_ADJUST_CFA_OFFSET(-12) - ret -END_FUNCTION art_quick_read_barrier_mark_reg00 - // Create a function `name` calling the ReadBarrier::Mark routine, // getting its argument and returning its result through register -// `reg`, thus following a non-standard runtime calling convention: -// - `reg` is used to pass the (sole) argument of this function +// `reg`, saving and restoring all caller-save registers. +// +// If `reg` is different from `eax`, the generated function follows a +// non-standard runtime calling convention: +// - register `reg` is used to pass the (sole) argument of this function +// (instead of EAX); +// - register `reg` is used to return the result of this function // (instead of EAX); -// - `reg` is used to return the result of this function (instead of EAX); // - EAX is treated like a normal (non-argument) caller-save register; // - everything else is the same as in the standard runtime calling -// convention (e.g. same callee-save registers). +// convention (e.g. standard callee-save registers are preserved). MACRO2(READ_BARRIER_MARK_REG, name, reg) DEFINE_FUNCTION VAR(name) - subl LITERAL(8), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(8) + // Save all potentially live caller-save core registers. + PUSH eax + PUSH ecx + PUSH edx + PUSH ebx + // 8-byte align the stack to improve (8-byte) XMM register saving and restoring. + // and create space for caller-save floating-point registers. + subl MACRO_LITERAL(4 + 8 * 8), %esp + CFI_ADJUST_CFA_OFFSET(4 + 8 * 8) + // Save all potentially live caller-save floating-point registers. + movsd %xmm0, 0(%esp) + movsd %xmm1, 8(%esp) + movsd %xmm2, 16(%esp) + movsd %xmm3, 24(%esp) + movsd %xmm4, 32(%esp) + movsd %xmm5, 40(%esp) + movsd %xmm6, 48(%esp) + movsd %xmm7, 56(%esp) + + subl LITERAL(4), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(4) PUSH RAW_VAR(reg) // pass arg1 - obj from `reg` call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) - movl %eax, REG_VAR(reg) // return result into `reg` - addl LITERAL(12), %esp // pop argument and remove padding - CFI_ADJUST_CFA_OFFSET(-12) + .ifnc RAW_VAR(reg), eax + movl %eax, REG_VAR(reg) // return result into `reg` + .endif + addl LITERAL(8), %esp // pop argument and remove padding + CFI_ADJUST_CFA_OFFSET(-8) + + // Restore floating-point registers. + movsd 0(%esp), %xmm0 + movsd 8(%esp), %xmm1 + movsd 16(%esp), %xmm2 + movsd 24(%esp), %xmm3 + movsd 32(%esp), %xmm4 + movsd 40(%esp), %xmm5 + movsd 48(%esp), %xmm6 + movsd 56(%esp), %xmm7 + // Remove floating-point registers and padding. + addl MACRO_LITERAL(8 * 8 + 4), %esp + CFI_ADJUST_CFA_OFFSET(-(8 * 8 + 4)) + // Restore core regs, except `reg`, as it is used to return the + // result of this function (simply remove it from the stack instead). + POP_REG_NE ebx, RAW_VAR(reg) + POP_REG_NE edx, RAW_VAR(reg) + POP_REG_NE ecx, RAW_VAR(reg) + POP_REG_NE eax, RAW_VAR(reg) ret END_FUNCTION VAR(name) END_MACRO +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, eax READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, ecx READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, edx READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, ebx diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S index cf0039c84e..c4e723c483 100644 --- a/runtime/arch/x86_64/asm_support_x86_64.S +++ b/runtime/arch/x86_64/asm_support_x86_64.S @@ -52,7 +52,7 @@ #define LITERAL(value) $value #if defined(__APPLE__) - #define MACRO_LITERAL(value) $$(value) + #define MACRO_LITERAL(value) $(value) #else #define MACRO_LITERAL(value) $value #endif diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index 2bea3dbf61..c2e3023b87 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -28,13 +28,14 @@ namespace art { // Cast entrypoints. -extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass, - const mirror::Class* ref_class); +extern "C" size_t art_quick_assignable_from_code(const mirror::Class* klass, + const mirror::Class* ref_class); // Read barrier entrypoints. // art_quick_read_barrier_mark_regX uses an non-standard calling // convention: it expects its input in register X and returns its -// result in that same register. +// result in that same register, and saves and restores all +// caller-save registers. extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*); diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 496e6a8b4a..784ec394a8 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1815,73 +1815,93 @@ DEFINE_FUNCTION art_nested_signal_return UNREACHABLE END_FUNCTION art_nested_signal_return -// Call the ReadBarrier::Mark routine, getting argument and returning -// result through RAX (register 0), thus following a non-standard -// runtime calling convention: -// - RAX is used to pass the (sole) argument of this function (instead -// of RDI); -// - RDI is treated like a normal (non-argument) caller-save register; -// - everything else is the same as in the standard runtime calling -// convention; in particular, RAX is still used to return the result -// of this function. -DEFINE_FUNCTION art_quick_read_barrier_mark_reg00 - SETUP_FP_CALLEE_SAVE_FRAME - subq LITERAL(8), %rsp // Alignment padding. - CFI_ADJUST_CFA_OFFSET(8) - movq %rax, %rdi // Pass arg1 - obj from RAX. - call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) - addq LITERAL(8), %rsp // Remove padding. - CFI_ADJUST_CFA_OFFSET(-8) - RESTORE_FP_CALLEE_SAVE_FRAME - ret -END_FUNCTION art_quick_read_barrier_mark_reg00 - -// Call the ReadBarrier::Mark routine, getting argument and returning -// result through RDI (register 7), thus following a non-standard -// runtime calling convention: -// - RDI is used to return the result of this function (instead of RAX); -// - RAX is treated like a normal (non-result) caller-save register; -// - everything else is the same as in the standard runtime calling -// convention; in particular, RDI is still used to pass the (sole) -// argument of this function. -DEFINE_FUNCTION art_quick_read_barrier_mark_reg07 - SETUP_FP_CALLEE_SAVE_FRAME - subq LITERAL(8), %rsp // Alignment padding. - CFI_ADJUST_CFA_OFFSET(8) - call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) - movq %rax, %rdi // Return result into RDI. - addq LITERAL(8), %rsp // Remove padding. - CFI_ADJUST_CFA_OFFSET(-8) - RESTORE_FP_CALLEE_SAVE_FRAME - ret -END_FUNCTION art_quick_read_barrier_mark_reg07 - // Create a function `name` calling the ReadBarrier::Mark routine, // getting its argument and returning its result through register -// `reg`, thus following a non-standard runtime calling convention: -// - `reg` is used to pass the (sole) argument of this function (instead -// of RDI); -// - `reg` is used to return the result of this function (instead of RAX); -// - RDI is treated like a normal (non-argument) caller-save register; -// - RAX is treated like a normal (non-result) caller-save register; +// `reg`, saving and restoring all caller-save registers. +// +// The generated function follows a non-standard runtime calling +// convention: +// - register `reg` (which may be different from RDI) is used to pass +// the (sole) argument of this function; +// - register `reg` (which may be different from RAX) is used to return +// the result of this function (instead of RAX); +// - if `reg` is different from `rdi`, RDI is treated like a normal +// (non-argument) caller-save register; +// - if `reg` is different from `rax`, RAX is treated like a normal +// (non-result) caller-save register; // - everything else is the same as in the standard runtime calling -// convention (e.g. same callee-save registers). +// convention (e.g. standard callee-save registers are preserved). MACRO2(READ_BARRIER_MARK_REG, name, reg) DEFINE_FUNCTION VAR(name) + // Save all potentially live caller-save core registers. + PUSH rax + PUSH rcx + PUSH rdx + PUSH rsi + PUSH rdi + PUSH r8 + PUSH r9 + PUSH r10 + PUSH r11 + // Create space for caller-save floating-point registers. + subq MACRO_LITERAL(12 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(12 * 8) + // Save all potentially live caller-save floating-point registers. + movq %xmm0, 0(%rsp) + movq %xmm1, 8(%rsp) + movq %xmm2, 16(%rsp) + movq %xmm3, 24(%rsp) + movq %xmm4, 32(%rsp) + movq %xmm5, 40(%rsp) + movq %xmm6, 48(%rsp) + movq %xmm7, 56(%rsp) + movq %xmm8, 64(%rsp) + movq %xmm9, 72(%rsp) + movq %xmm10, 80(%rsp) + movq %xmm11, 88(%rsp) SETUP_FP_CALLEE_SAVE_FRAME - subq LITERAL(8), %rsp // Alignment padding. - CFI_ADJUST_CFA_OFFSET(8) - movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`. + + .ifnc RAW_VAR(reg), rdi + movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`. + .endif call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) - movq %rax, REG_VAR(reg) // Return result into `reg`. - addq LITERAL(8), %rsp // Remove padding. - CFI_ADJUST_CFA_OFFSET(-8) + .ifnc RAW_VAR(reg), rax + movq %rax, REG_VAR(reg) // Return result into `reg`. + .endif + RESTORE_FP_CALLEE_SAVE_FRAME + // Restore floating-point registers. + movq 0(%rsp), %xmm0 + movq 8(%rsp), %xmm1 + movq 16(%rsp), %xmm2 + movq 24(%rsp), %xmm3 + movq 32(%rsp), %xmm4 + movq 40(%rsp), %xmm5 + movq 48(%rsp), %xmm6 + movq 56(%rsp), %xmm7 + movq 64(%rsp), %xmm8 + movq 72(%rsp), %xmm9 + movq 80(%rsp), %xmm10 + movq 88(%rsp), %xmm11 + // Remove floating-point registers. + addq MACRO_LITERAL(12 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-(12 * 8)) + // Restore core regs, except `reg`, as it is used to return the + // result of this function (simply remove it from the stack instead). + POP_REG_NE r11, RAW_VAR(reg) + POP_REG_NE r10, RAW_VAR(reg) + POP_REG_NE r9, RAW_VAR(reg) + POP_REG_NE r8, RAW_VAR(reg) + POP_REG_NE rdi, RAW_VAR(reg) + POP_REG_NE rsi, RAW_VAR(reg) + POP_REG_NE rdx, RAW_VAR(reg) + POP_REG_NE rcx, RAW_VAR(reg) + POP_REG_NE rax, RAW_VAR(reg) ret END_FUNCTION VAR(name) END_MACRO -// Note: art_quick_read_barrier_mark_reg00 is implemented above. +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx @@ -1889,7 +1909,7 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx // cannot be used to pass arguments. READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi -// Note: art_quick_read_barrier_mark_reg07 is implemented above. +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h index 26450c41c7..32425d89a0 100644 --- a/runtime/art_method-inl.h +++ b/runtime/art_method-inl.h @@ -120,6 +120,10 @@ inline uint32_t ArtMethod::GetDexMethodIndex() { return dex_method_index_; } +inline uint32_t ArtMethod::GetImtIndex() { + return GetDexMethodIndex() % ImTable::kSize; +} + inline ArtMethod** ArtMethod::GetDexCacheResolvedMethods(size_t pointer_size) { return GetNativePointer<ArtMethod**>(DexCacheResolvedMethodsOffset(pointer_size), pointer_size); @@ -503,7 +507,7 @@ inline void ArtMethod::UpdateEntrypoints(const Visitor& visitor, size_t pointer_ SetEntryPointFromJniPtrSize(new_native_code, pointer_size); } } else { - DCHECK(GetEntryPointFromJniPtrSize(pointer_size) == nullptr); + DCHECK(GetDataPtrSize(pointer_size) == nullptr); } const void* old_code = GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); const void* new_code = visitor(old_code); diff --git a/runtime/art_method.cc b/runtime/art_method.cc index f86cb13512..113827ac69 100644 --- a/runtime/art_method.cc +++ b/runtime/art_method.cc @@ -16,6 +16,8 @@ #include "art_method.h" +#include <cstddef> + #include "arch/context.h" #include "art_field-inl.h" #include "art_method-inl.h" @@ -497,4 +499,24 @@ void ArtMethod::CopyFrom(ArtMethod* src, size_t image_pointer_size) { hotness_count_ = 0; } +bool ArtMethod::IsImagePointerSize(size_t pointer_size) { + // Hijack this function to get access to PtrSizedFieldsOffset. + // + // Ensure that PrtSizedFieldsOffset is correct. We rely here on usually having both 32-bit and + // 64-bit builds. + static_assert(std::is_standard_layout<ArtMethod>::value, "ArtMethod is not standard layout."); + static_assert((sizeof(void*) != 4) || + (offsetof(ArtMethod, ptr_sized_fields_) == PtrSizedFieldsOffset(4)), + "Unexpected 32-bit class layout."); + static_assert((sizeof(void*) != 8) || + (offsetof(ArtMethod, ptr_sized_fields_) == PtrSizedFieldsOffset(8)), + "Unexpected 64-bit class layout."); + + Runtime* runtime = Runtime::Current(); + if (runtime == nullptr) { + return true; + } + return runtime->GetClassLinker()->GetImagePointerSize() == pointer_size; +} + } // namespace art diff --git a/runtime/art_method.h b/runtime/art_method.h index 90b2406a1d..1d14203b81 100644 --- a/runtime/art_method.h +++ b/runtime/art_method.h @@ -17,6 +17,8 @@ #ifndef ART_RUNTIME_ART_METHOD_H_ #define ART_RUNTIME_ART_METHOD_H_ +#include <cstddef> + #include "base/bit_utils.h" #include "base/casts.h" #include "dex_file.h" @@ -219,7 +221,7 @@ class ImtConflictTable { class ArtMethod FINAL { public: ArtMethod() : access_flags_(0), dex_code_item_offset_(0), dex_method_index_(0), - method_index_(0) { } + method_index_(0), hotness_count_(0) { } ArtMethod(ArtMethod* src, size_t image_pointer_size) { CopyFrom(src, image_pointer_size); @@ -419,6 +421,8 @@ class ArtMethod FINAL { ALWAYS_INLINE uint32_t GetDexMethodIndex() SHARED_REQUIRES(Locks::mutator_lock_); + ALWAYS_INLINE uint32_t GetImtIndex() SHARED_REQUIRES(Locks::mutator_lock_); + void SetDexMethodIndex(uint32_t new_idx) { // Not called within a transaction. dex_method_index_ = new_idx; @@ -506,9 +510,13 @@ class ArtMethod FINAL { PtrSizedFields, dex_cache_resolved_types_) / sizeof(void*) * pointer_size); } - static MemberOffset EntryPointFromJniOffset(size_t pointer_size) { + static MemberOffset DataOffset(size_t pointer_size) { return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER( - PtrSizedFields, entry_point_from_jni_) / sizeof(void*) * pointer_size); + PtrSizedFields, data_) / sizeof(void*) * pointer_size); + } + + static MemberOffset EntryPointFromJniOffset(size_t pointer_size) { + return DataOffset(pointer_size); } static MemberOffset EntryPointFromQuickCompiledCodeOffset(size_t pointer_size) { @@ -516,37 +524,40 @@ class ArtMethod FINAL { PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size); } - ProfilingInfo* GetProfilingInfo(size_t pointer_size) { - return reinterpret_cast<ProfilingInfo*>(GetEntryPointFromJniPtrSize(pointer_size)); - } - ImtConflictTable* GetImtConflictTable(size_t pointer_size) { DCHECK(IsRuntimeMethod()); - return reinterpret_cast<ImtConflictTable*>(GetEntryPointFromJniPtrSize(pointer_size)); + return reinterpret_cast<ImtConflictTable*>(GetDataPtrSize(pointer_size)); } ALWAYS_INLINE void SetImtConflictTable(ImtConflictTable* table, size_t pointer_size) { - SetEntryPointFromJniPtrSize(table, pointer_size); + DCHECK(IsRuntimeMethod()); + SetDataPtrSize(table, pointer_size); + } + + ProfilingInfo* GetProfilingInfo(size_t pointer_size) { + return reinterpret_cast<ProfilingInfo*>(GetDataPtrSize(pointer_size)); } ALWAYS_INLINE void SetProfilingInfo(ProfilingInfo* info) { - SetEntryPointFromJniPtrSize(info, sizeof(void*)); + SetDataPtrSize(info, sizeof(void*)); } ALWAYS_INLINE void SetProfilingInfoPtrSize(ProfilingInfo* info, size_t pointer_size) { - SetEntryPointFromJniPtrSize(info, pointer_size); + SetDataPtrSize(info, pointer_size); } static MemberOffset ProfilingInfoOffset() { - return EntryPointFromJniOffset(sizeof(void*)); + DCHECK(IsImagePointerSize(sizeof(void*))); + return DataOffset(sizeof(void*)); } void* GetEntryPointFromJni() { + DCHECK(IsNative()); return GetEntryPointFromJniPtrSize(sizeof(void*)); } ALWAYS_INLINE void* GetEntryPointFromJniPtrSize(size_t pointer_size) { - return GetNativePointer<void*>(EntryPointFromJniOffset(pointer_size), pointer_size); + return GetDataPtrSize(pointer_size); } void SetEntryPointFromJni(const void* entrypoint) { @@ -555,7 +566,17 @@ class ArtMethod FINAL { } ALWAYS_INLINE void SetEntryPointFromJniPtrSize(const void* entrypoint, size_t pointer_size) { - SetNativePointer(EntryPointFromJniOffset(pointer_size), entrypoint, pointer_size); + SetDataPtrSize(entrypoint, pointer_size); + } + + ALWAYS_INLINE void* GetDataPtrSize(size_t pointer_size) { + DCHECK(IsImagePointerSize(pointer_size)); + return GetNativePointer<void*>(DataOffset(pointer_size), pointer_size); + } + + ALWAYS_INLINE void SetDataPtrSize(const void* data, size_t pointer_size) { + DCHECK(IsImagePointerSize(pointer_size)); + SetNativePointer(DataOffset(pointer_size), data, pointer_size); } // Is this a CalleSaveMethod or ResolutionMethod and therefore doesn't adhere to normal @@ -640,7 +661,7 @@ class ArtMethod FINAL { // Size of an instance of this native class. static size_t Size(size_t pointer_size) { - return RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size) + + return PtrSizedFieldsOffset(pointer_size) + (sizeof(PtrSizedFields) / sizeof(void*)) * pointer_size; } @@ -727,9 +748,7 @@ class ArtMethod FINAL { // Fake padding field gets inserted here. // Must be the last fields in the method. - // PACKED(4) is necessary for the correctness of - // RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size). - struct PACKED(4) PtrSizedFields { + struct PtrSizedFields { // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access. ArtMethod** dex_cache_resolved_methods_; @@ -738,7 +757,7 @@ class ArtMethod FINAL { // Pointer to JNI function registered to this method, or a function to resolve the JNI function, // or the profiling data for non-native methods, or an ImtConflictTable. - void* entry_point_from_jni_; + void* data_; // Method dispatch from quick compiled code invokes this pointer which may cause bridging into // the interpreter. @@ -746,11 +765,14 @@ class ArtMethod FINAL { } ptr_sized_fields_; private: - static size_t PtrSizedFieldsOffset(size_t pointer_size) { - // Round up to pointer size for padding field. - return RoundUp(OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_), pointer_size); + static constexpr size_t PtrSizedFieldsOffset(size_t pointer_size) { + // Round up to pointer size for padding field. Tested in art_method.cc. + return RoundUp(offsetof(ArtMethod, hotness_count_) + sizeof(hotness_count_), pointer_size); } + // Compare given pointer size to the image pointer size. + static bool IsImagePointerSize(size_t pointer_size); + template<typename T> ALWAYS_INLINE T GetNativePointer(MemberOffset offset, size_t pointer_size) const { static_assert(std::is_pointer<T>::value, "T must be a pointer type"); diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index cb97faab12..d0dad6494e 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -6159,11 +6159,6 @@ void ClassLinker::FillIMTAndConflictTables(mirror::Class* klass) { } } -static inline uint32_t GetIMTIndex(ArtMethod* interface_method) - SHARED_REQUIRES(Locks::mutator_lock_) { - return interface_method->GetDexMethodIndex() % ImTable::kSize; -} - ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count, LinearAlloc* linear_alloc, size_t image_pointer_size) { @@ -6215,7 +6210,7 @@ void ClassLinker::FillIMTFromIfTable(mirror::IfTable* if_table, // or interface methods in the IMT here they will not create extra conflicts since we compare // names and signatures in SetIMTRef. ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_); - const uint32_t imt_index = GetIMTIndex(interface_method); + const uint32_t imt_index = interface_method->GetImtIndex(); // There is only any conflicts if all of the interface methods for an IMT slot don't have // the same implementation method, keep track of this to avoid creating a conflict table in @@ -6269,7 +6264,7 @@ void ClassLinker::FillIMTFromIfTable(mirror::IfTable* if_table, } DCHECK(implementation_method != nullptr); ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_); - const uint32_t imt_index = GetIMTIndex(interface_method); + const uint32_t imt_index = interface_method->GetImtIndex(); if (!imt[imt_index]->IsRuntimeMethod() || imt[imt_index] == unimplemented_method || imt[imt_index] == imt_conflict_method) { @@ -6675,7 +6670,7 @@ bool ClassLinker::LinkInterfaceMethods( auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_); MethodNameAndSignatureComparator interface_name_comparator( interface_method->GetInterfaceMethodIfProxy(image_pointer_size_)); - uint32_t imt_index = GetIMTIndex(interface_method); + uint32_t imt_index = interface_method->GetImtIndex(); ArtMethod** imt_ptr = &out_imt[imt_index]; // For each method listed in the interface's method list, find the // matching method in our class's method list. We want to favor the @@ -7700,7 +7695,7 @@ ArtField* ClassLinker::ResolveField(const DexFile& dex_file, } if (is_static) { - resolved = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx); + resolved = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx); } else { resolved = klass->FindInstanceField(dex_cache.Get(), field_idx); } diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index ab14655ab7..7ecd59527b 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -19,7 +19,7 @@ #include "entrypoint_utils.h" -#include "art_method.h" +#include "art_method-inl.h" #include "class_linker-inl.h" #include "common_throws.h" #include "dex_file.h" @@ -600,7 +600,7 @@ inline ArtMethod* FindMethodFromCode(uint32_t method_idx, mirror::Object** this_ } } case kInterface: { - uint32_t imt_index = resolved_method->GetDexMethodIndex() % ImTable::kSize; + uint32_t imt_index = resolved_method->GetImtIndex(); size_t pointer_size = class_linker->GetImagePointerSize(); ArtMethod* imt_method = (*this_object)->GetClass()->GetImt(pointer_size)-> Get(imt_index, pointer_size); diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc index 968ac534b3..8db69a376f 100644 --- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc @@ -20,7 +20,7 @@ namespace art { // Assignable test for code, won't throw. Null and equality tests already performed -extern "C" uint32_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class) +extern "C" size_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class) SHARED_REQUIRES(Locks::mutator_lock_) { DCHECK(klass != nullptr); DCHECK(ref_class != nullptr); diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h index d0dad34789..86fb8818ec 100644 --- a/runtime/entrypoints/quick/quick_default_externs.h +++ b/runtime/entrypoints/quick/quick_default_externs.h @@ -50,16 +50,16 @@ extern "C" int art_quick_set64_instance(uint32_t, void*, int64_t); extern "C" int art_quick_set64_static(uint32_t, int64_t); extern "C" int art_quick_set_obj_instance(uint32_t, void*, void*); extern "C" int art_quick_set_obj_static(uint32_t, void*); -extern "C" int8_t art_quick_get_byte_instance(uint32_t, void*); -extern "C" uint8_t art_quick_get_boolean_instance(uint32_t, void*); -extern "C" int8_t art_quick_get_byte_static(uint32_t); -extern "C" uint8_t art_quick_get_boolean_static(uint32_t); -extern "C" int16_t art_quick_get_short_instance(uint32_t, void*); -extern "C" uint16_t art_quick_get_char_instance(uint32_t, void*); -extern "C" int16_t art_quick_get_short_static(uint32_t); -extern "C" uint16_t art_quick_get_char_static(uint32_t); -extern "C" int32_t art_quick_get32_instance(uint32_t, void*); -extern "C" int32_t art_quick_get32_static(uint32_t); +extern "C" ssize_t art_quick_get_byte_instance(uint32_t, void*); +extern "C" size_t art_quick_get_boolean_instance(uint32_t, void*); +extern "C" ssize_t art_quick_get_byte_static(uint32_t); +extern "C" size_t art_quick_get_boolean_static(uint32_t); +extern "C" ssize_t art_quick_get_short_instance(uint32_t, void*); +extern "C" size_t art_quick_get_char_instance(uint32_t, void*); +extern "C" ssize_t art_quick_get_short_static(uint32_t); +extern "C" size_t art_quick_get_char_static(uint32_t); +extern "C" ssize_t art_quick_get32_instance(uint32_t, void*); +extern "C" ssize_t art_quick_get32_static(uint32_t); extern "C" int64_t art_quick_get64_instance(uint32_t, void*); extern "C" int64_t art_quick_get64_static(uint32_t); extern "C" void* art_quick_get_obj_instance(uint32_t, void*); diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index d6b7d9ef2e..e0ec68ee87 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -33,7 +33,7 @@ V(AllocStringFromChars, void*, int32_t, int32_t, void*) \ V(AllocStringFromString, void*, void*) \ \ - V(InstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*) \ + V(InstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*) \ V(CheckCast, void, const mirror::Class*, const mirror::Class*) \ \ V(InitializeStaticStorage, void*, uint32_t) \ @@ -51,16 +51,16 @@ V(Set64Static, int, uint32_t, int64_t) \ V(SetObjInstance, int, uint32_t, void*, void*) \ V(SetObjStatic, int, uint32_t, void*) \ - V(GetByteInstance, int8_t, uint32_t, void*) \ - V(GetBooleanInstance, uint8_t, uint32_t, void*) \ - V(GetByteStatic, int8_t, uint32_t) \ - V(GetBooleanStatic, uint8_t, uint32_t) \ - V(GetShortInstance, int16_t, uint32_t, void*) \ - V(GetCharInstance, uint16_t, uint32_t, void*) \ - V(GetShortStatic, int16_t, uint32_t) \ - V(GetCharStatic, uint16_t, uint32_t) \ - V(Get32Instance, int32_t, uint32_t, void*) \ - V(Get32Static, int32_t, uint32_t) \ + V(GetByteInstance, ssize_t, uint32_t, void*) \ + V(GetBooleanInstance, size_t, uint32_t, void*) \ + V(GetByteStatic, ssize_t, uint32_t) \ + V(GetBooleanStatic, size_t, uint32_t) \ + V(GetShortInstance, ssize_t, uint32_t, void*) \ + V(GetCharInstance, size_t, uint32_t, void*) \ + V(GetShortStatic, ssize_t, uint32_t) \ + V(GetCharStatic, size_t, uint32_t) \ + V(Get32Instance, ssize_t, uint32_t, void*) \ + V(Get32Static, ssize_t, uint32_t) \ V(Get64Instance, int64_t, uint32_t, void*) \ V(Get64Static, int64_t, uint32_t) \ V(GetObjInstance, void*, uint32_t, void*) \ diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc index a245f18d3f..1a12bd45de 100644 --- a/runtime/entrypoints/quick/quick_field_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc @@ -55,9 +55,7 @@ ALWAYS_INLINE static inline ArtField* FindInstanceField(uint32_t field_idx, return field; } -extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx, - ArtMethod* referrer, - Thread* self) +extern "C" ssize_t artGetByteStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t)); @@ -71,9 +69,7 @@ extern "C" int8_t artGetByteStaticFromCode(uint32_t field_idx, return 0; // Will throw exception by checking with Thread::Current. } -extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx, - ArtMethod* referrer, - Thread* self) +extern "C" size_t artGetBooleanStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t)); @@ -87,9 +83,7 @@ extern "C" uint8_t artGetBooleanStaticFromCode(uint32_t field_idx, return 0; // Will throw exception by checking with Thread::Current. } -extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx, - ArtMethod* referrer, - Thread* self) +extern "C" ssize_t artGetShortStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t)); @@ -103,9 +97,7 @@ extern "C" int16_t artGetShortStaticFromCode(uint32_t field_idx, return 0; // Will throw exception by checking with Thread::Current. } -extern "C" uint16_t artGetCharStaticFromCode(uint32_t field_idx, - ArtMethod* referrer, - Thread* self) +extern "C" size_t artGetCharStaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t)); @@ -119,9 +111,7 @@ extern "C" uint16_t artGetCharStaticFromCode(uint32_t field_idx, return 0; // Will throw exception by checking with Thread::Current. } -extern "C" uint32_t artGet32StaticFromCode(uint32_t field_idx, - ArtMethod* referrer, - Thread* self) +extern "C" size_t artGet32StaticFromCode(uint32_t field_idx, ArtMethod* referrer, Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int32_t)); @@ -173,10 +163,10 @@ extern "C" mirror::Object* artGetObjStaticFromCode(uint32_t field_idx, return nullptr; // Will throw exception by checking with Thread::Current. } -extern "C" int8_t artGetByteInstanceFromCode(uint32_t field_idx, - mirror::Object* obj, - ArtMethod* referrer, - Thread* self) +extern "C" ssize_t artGetByteInstanceFromCode(uint32_t field_idx, + mirror::Object* obj, + ArtMethod* referrer, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t)); @@ -194,10 +184,10 @@ extern "C" int8_t artGetByteInstanceFromCode(uint32_t field_idx, return 0; // Will throw exception by checking with Thread::Current. } -extern "C" uint8_t artGetBooleanInstanceFromCode(uint32_t field_idx, - mirror::Object* obj, - ArtMethod* referrer, - Thread* self) +extern "C" size_t artGetBooleanInstanceFromCode(uint32_t field_idx, + mirror::Object* obj, + ArtMethod* referrer, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t)); @@ -214,7 +204,7 @@ extern "C" uint8_t artGetBooleanInstanceFromCode(uint32_t field_idx, } return 0; // Will throw exception by checking with Thread::Current. } -extern "C" int16_t artGetShortInstanceFromCode(uint32_t field_idx, +extern "C" ssize_t artGetShortInstanceFromCode(uint32_t field_idx, mirror::Object* obj, ArtMethod* referrer, Thread* self) @@ -235,10 +225,10 @@ extern "C" int16_t artGetShortInstanceFromCode(uint32_t field_idx, return 0; // Will throw exception by checking with Thread::Current. } -extern "C" uint16_t artGetCharInstanceFromCode(uint32_t field_idx, - mirror::Object* obj, - ArtMethod* referrer, - Thread* self) +extern "C" size_t artGetCharInstanceFromCode(uint32_t field_idx, + mirror::Object* obj, + ArtMethod* referrer, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t)); @@ -256,10 +246,10 @@ extern "C" uint16_t artGetCharInstanceFromCode(uint32_t field_idx, return 0; // Will throw exception by checking with Thread::Current. } -extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx, - mirror::Object* obj, - ArtMethod* referrer, - Thread* self) +extern "C" size_t artGet32InstanceFromCode(uint32_t field_idx, + mirror::Object* obj, + ArtMethod* referrer, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int32_t)); diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 1152b940d8..49043f6741 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -2170,8 +2170,7 @@ extern "C" TwoWordReturn artInvokeInterfaceTrampoline(uint32_t deadbeef ATTRIBUT if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) { // If the dex cache already resolved the interface method, look whether we have // a match in the ImtConflictTable. - uint32_t imt_index = interface_method->GetDexMethodIndex(); - ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*)); + ArtMethod* conflict_method = imt->Get(interface_method->GetImtIndex(), sizeof(void*)); if (LIKELY(conflict_method->IsRuntimeMethod())) { ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*)); DCHECK(current_table != nullptr); @@ -2222,8 +2221,8 @@ extern "C" TwoWordReturn artInvokeInterfaceTrampoline(uint32_t deadbeef ATTRIBUT // We arrive here if we have found an implementation, and it is not in the ImtConflictTable. // We create a new table with the new pair { interface_method, method }. - uint32_t imt_index = interface_method->GetDexMethodIndex(); - ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*)); + uint32_t imt_index = interface_method->GetImtIndex(); + ArtMethod* conflict_method = imt->Get(imt_index, sizeof(void*)); if (conflict_method->IsRuntimeMethod()) { ArtMethod* new_conflict_method = Runtime::Current()->GetClassLinker()->AddMethodToConflictTable( cls.Get(), @@ -2234,7 +2233,7 @@ extern "C" TwoWordReturn artInvokeInterfaceTrampoline(uint32_t deadbeef ATTRIBUT if (new_conflict_method != conflict_method) { // Update the IMT if we create a new conflict method. No fence needed here, as the // data is consistent. - imt->Set(imt_index % ImTable::kSize, + imt->Set(imt_index, new_conflict_method, sizeof(void*)); } diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h index 301111251a..4019a5b536 100644 --- a/runtime/gc/collector/concurrent_copying-inl.h +++ b/runtime/gc/collector/concurrent_copying-inl.h @@ -153,6 +153,14 @@ inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { } } +inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) { + // TODO: Consider removing this check when we are done investigating slow paths. b/30162165 + if (UNLIKELY(mark_from_read_barrier_measurements_)) { + return MarkFromReadBarrierWithMeasurements(from_ref); + } + return Mark(from_ref); +} + inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) { DCHECK(region_space_->IsInFromSpace(from_ref)); LockWord lw = from_ref->GetLockWord(false); diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index b7b5aa0059..d2d2f234ab 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -17,7 +17,9 @@ #include "concurrent_copying.h" #include "art_field-inl.h" +#include "base/histogram-inl.h" #include "base/stl_util.h" +#include "base/systrace.h" #include "debugger.h" #include "gc/accounting/heap_bitmap-inl.h" #include "gc/accounting/space_bitmap-inl.h" @@ -39,7 +41,9 @@ namespace collector { static constexpr size_t kDefaultGcMarkStackSize = 2 * MB; -ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix) +ConcurrentCopying::ConcurrentCopying(Heap* heap, + const std::string& name_prefix, + bool measure_read_barrier_slow_path) : GarbageCollector(heap, name_prefix + (name_prefix.empty() ? "" : " ") + "concurrent copying + mark sweep"), @@ -54,6 +58,14 @@ ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix) heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff), weak_ref_access_enabled_(true), skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock), + measure_read_barrier_slow_path_(measure_read_barrier_slow_path), + rb_slow_path_ns_(0), + rb_slow_path_count_(0), + rb_slow_path_count_gc_(0), + rb_slow_path_histogram_lock_("Read barrier histogram lock"), + rb_slow_path_time_histogram_("Mutator time in read barrier slow path", 500, 32), + rb_slow_path_count_total_(0), + rb_slow_path_count_gc_total_(0), rb_table_(heap_->GetReadBarrierTable()), force_evacuate_all_(false), immune_gray_stack_lock_("concurrent copying immune gray stack lock", @@ -162,6 +174,14 @@ void ConcurrentCopying::InitializePhase() { MutexLock mu(Thread::Current(), mark_stack_lock_); CHECK(false_gray_stack_.empty()); } + + mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_; + if (measure_read_barrier_slow_path_) { + rb_slow_path_ns_.StoreRelaxed(0); + rb_slow_path_count_.StoreRelaxed(0); + rb_slow_path_count_gc_.StoreRelaxed(0); + } + immune_spaces_.Reset(); bytes_moved_.StoreRelaxed(0); objects_moved_.StoreRelaxed(0); @@ -194,7 +214,7 @@ void ConcurrentCopying::InitializePhase() { } // Used to switch the thread roots of a thread from from-space refs to to-space refs. -class ConcurrentCopying::ThreadFlipVisitor : public Closure { +class ConcurrentCopying::ThreadFlipVisitor : public Closure, public RootVisitor { public: ThreadFlipVisitor(ConcurrentCopying* concurrent_copying, bool use_tlab) : concurrent_copying_(concurrent_copying), use_tlab_(use_tlab) { @@ -221,10 +241,44 @@ class ConcurrentCopying::ThreadFlipVisitor : public Closure { thread->RevokeThreadLocalAllocationStack(); } ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); - thread->VisitRoots(concurrent_copying_); + // We can use the non-CAS VisitRoots functions below because we update thread-local GC roots + // only. + thread->VisitRoots(this); concurrent_copying_->GetBarrier().Pass(self); } + void VisitRoots(mirror::Object*** roots, + size_t count, + const RootInfo& info ATTRIBUTE_UNUSED) + SHARED_REQUIRES(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + mirror::Object** root = roots[i]; + mirror::Object* ref = *root; + if (ref != nullptr) { + mirror::Object* to_ref = concurrent_copying_->Mark(ref); + if (to_ref != ref) { + *root = to_ref; + } + } + } + } + + void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, + size_t count, + const RootInfo& info ATTRIBUTE_UNUSED) + SHARED_REQUIRES(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + mirror::CompressedReference<mirror::Object>* const root = roots[i]; + if (!root->IsNull()) { + mirror::Object* ref = root->AsMirrorPtr(); + mirror::Object* to_ref = concurrent_copying_->Mark(ref); + if (to_ref != ref) { + root->Assign(to_ref); + } + } + } + } + private: ConcurrentCopying* const concurrent_copying_; const bool use_tlab_; @@ -1996,9 +2050,17 @@ void ConcurrentCopying::FinishPhase() { MutexLock mu(Thread::Current(), skipped_blocks_lock_); skipped_blocks_map_.clear(); } - ReaderMutexLock mu(self, *Locks::mutator_lock_); - WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_); - heap_->ClearMarkedObjects(); + { + ReaderMutexLock mu(self, *Locks::mutator_lock_); + WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_); + heap_->ClearMarkedObjects(); + } + if (measure_read_barrier_slow_path_) { + MutexLock mu(self, rb_slow_path_histogram_lock_); + rb_slow_path_time_histogram_.AdjustAndAddValue(rb_slow_path_ns_.LoadRelaxed()); + rb_slow_path_count_total_ += rb_slow_path_count_.LoadRelaxed(); + rb_slow_path_count_gc_total_ += rb_slow_path_count_gc_.LoadRelaxed(); + } } bool ConcurrentCopying::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) { @@ -2036,6 +2098,37 @@ void ConcurrentCopying::RevokeAllThreadLocalBuffers() { region_space_->RevokeAllThreadLocalBuffers(); } +mirror::Object* ConcurrentCopying::MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) { + if (Thread::Current() != thread_running_gc_) { + rb_slow_path_count_.FetchAndAddRelaxed(1u); + } else { + rb_slow_path_count_gc_.FetchAndAddRelaxed(1u); + } + ScopedTrace tr(__FUNCTION__); + const uint64_t start_time = measure_read_barrier_slow_path_ ? NanoTime() : 0u; + mirror::Object* ret = Mark(from_ref); + if (measure_read_barrier_slow_path_) { + rb_slow_path_ns_.FetchAndAddRelaxed(NanoTime() - start_time); + } + return ret; +} + +void ConcurrentCopying::DumpPerformanceInfo(std::ostream& os) { + GarbageCollector::DumpPerformanceInfo(os); + MutexLock mu(Thread::Current(), rb_slow_path_histogram_lock_); + if (rb_slow_path_time_histogram_.SampleSize() > 0) { + Histogram<uint64_t>::CumulativeData cumulative_data; + rb_slow_path_time_histogram_.CreateHistogram(&cumulative_data); + rb_slow_path_time_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data); + } + if (rb_slow_path_count_total_ > 0) { + os << "Slow path count " << rb_slow_path_count_total_ << "\n"; + } + if (rb_slow_path_count_gc_total_ > 0) { + os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n"; + } +} + } // namespace collector } // namespace gc } // namespace art diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 166a1f0b2a..6a8d052cb8 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -58,17 +58,24 @@ class ConcurrentCopying : public GarbageCollector { // Enable verbose mode. static constexpr bool kVerboseMode = false; - ConcurrentCopying(Heap* heap, const std::string& name_prefix = ""); + ConcurrentCopying(Heap* heap, + const std::string& name_prefix = "", + bool measure_read_barrier_slow_path = false); ~ConcurrentCopying(); virtual void RunPhases() OVERRIDE - REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); + REQUIRES(!immune_gray_stack_lock_, + !mark_stack_lock_, + !rb_slow_path_histogram_lock_, + !skipped_blocks_lock_); void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !immune_gray_stack_lock_); void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); - void FinishPhase() REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); + void FinishPhase() REQUIRES(!mark_stack_lock_, + !rb_slow_path_histogram_lock_, + !skipped_blocks_lock_); void BindBitmaps() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); @@ -95,7 +102,11 @@ class ConcurrentCopying : public GarbageCollector { return IsMarked(ref) == ref; } template<bool kGrayImmuneObject = true> - ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_) + ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); + ALWAYS_INLINE mirror::Object* MarkFromReadBarrier(mirror::Object* from_ref) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); bool IsMarking() const { return is_marking_; @@ -203,6 +214,10 @@ class ConcurrentCopying : public GarbageCollector { REQUIRES(!mark_stack_lock_); void ScanImmuneObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + mirror::Object* MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); + void DumpPerformanceInfo(std::ostream& os) OVERRIDE REQUIRES(!rb_slow_path_histogram_lock_); space::RegionSpace* region_space_; // The underlying region space. std::unique_ptr<Barrier> gc_barrier_; @@ -251,6 +266,20 @@ class ConcurrentCopying : public GarbageCollector { Atomic<size_t> to_space_bytes_skipped_; Atomic<size_t> to_space_objects_skipped_; + // If measure_read_barrier_slow_path_ is true, we count how long is spent in MarkFromReadBarrier + // and also log. + bool measure_read_barrier_slow_path_; + // mark_from_read_barrier_measurements_ is true if systrace is enabled or + // measure_read_barrier_time_ is true. + bool mark_from_read_barrier_measurements_; + Atomic<uint64_t> rb_slow_path_ns_; + Atomic<uint64_t> rb_slow_path_count_; + Atomic<uint64_t> rb_slow_path_count_gc_; + mutable Mutex rb_slow_path_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + Histogram<uint64_t> rb_slow_path_time_histogram_ GUARDED_BY(rb_slow_path_histogram_lock_); + uint64_t rb_slow_path_count_total_ GUARDED_BY(rb_slow_path_histogram_lock_); + uint64_t rb_slow_path_count_gc_total_ GUARDED_BY(rb_slow_path_histogram_lock_); + accounting::ReadBarrierTable* rb_table_; bool force_evacuate_all_; // True if all regions are evacuated. Atomic<bool> updated_all_immune_objects_; diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index 580486aa68..e0b71a7e24 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -181,7 +181,7 @@ class GarbageCollector : public RootVisitor, public IsMarkedVisitor, public Mark void RecordFree(const ObjectBytePair& freed); // Record a free of large objects. void RecordFreeLOS(const ObjectBytePair& freed); - void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_); + virtual void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_); // Helper functions for querying if objects are marked. These are used for processing references, // and will be used for reading system weaks while the GC is running. diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 8da1493db6..6f4767e391 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -121,6 +121,10 @@ static constexpr bool kDumpRosAllocStatsOnSigQuit = false; static constexpr size_t kNativeAllocationHistogramBuckets = 16; +// Extra added to the heap growth multiplier. Used to adjust the GC ergonomics for the read barrier +// config. +static constexpr double kExtraHeapGrowthMultiplier = kUseReadBarrier ? 1.0 : 0.0; + static inline bool CareAboutPauseTimes() { return Runtime::Current()->InJankPerceptibleProcessState(); } @@ -153,6 +157,7 @@ Heap::Heap(size_t initial_size, bool verify_pre_sweeping_rosalloc, bool verify_post_gc_rosalloc, bool gc_stress_mode, + bool measure_gc_performance, bool use_homogeneous_space_compaction_for_oom, uint64_t min_interval_homogeneous_space_compaction_by_oom) : non_moving_space_(nullptr), @@ -220,7 +225,8 @@ Heap::Heap(size_t initial_size, min_free_(min_free), max_free_(max_free), target_utilization_(target_utilization), - foreground_heap_growth_multiplier_(foreground_heap_growth_multiplier), + foreground_heap_growth_multiplier_( + foreground_heap_growth_multiplier + kExtraHeapGrowthMultiplier), total_wait_time_(0), verify_object_mode_(kVerifyObjectModeDisabled), disable_moving_gc_count_(0), @@ -594,7 +600,9 @@ Heap::Heap(size_t initial_size, garbage_collectors_.push_back(semi_space_collector_); } if (MayUseCollector(kCollectorTypeCC)) { - concurrent_copying_collector_ = new collector::ConcurrentCopying(this); + concurrent_copying_collector_ = new collector::ConcurrentCopying(this, + "", + measure_gc_performance); garbage_collectors_.push_back(concurrent_copying_collector_); } if (MayUseCollector(kCollectorTypeMC)) { diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 6fb048a5d7..bb0d11a1d7 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -182,6 +182,7 @@ class Heap { bool verify_pre_sweeping_rosalloc, bool verify_post_gc_rosalloc, bool gc_stress_mode, + bool measure_gc_performance, bool use_homogeneous_space_compaction, uint64_t min_interval_homogeneous_space_compaction_by_oom); diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc index 8c42b3abce..f1f7f42117 100644 --- a/runtime/interpreter/interpreter.cc +++ b/runtime/interpreter/interpreter.cc @@ -324,7 +324,7 @@ static inline JValue Execute( } else { while (true) { // Mterp does not support all instrumentation/debugging. - if (MterpShouldSwitchInterpreters()) { + if (MterpShouldSwitchInterpreters() != 0) { return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register, false); } diff --git a/runtime/interpreter/mterp/arm64/fbinop2addr.S b/runtime/interpreter/mterp/arm64/fbinop2addr.S index 0d57cbf2cf..04236adb81 100644 --- a/runtime/interpreter/mterp/arm64/fbinop2addr.S +++ b/runtime/interpreter/mterp/arm64/fbinop2addr.S @@ -7,8 +7,7 @@ */ /* binop/2addr vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 $instr // s2<- op diff --git a/runtime/interpreter/mterp/arm64/fcmp.S b/runtime/interpreter/mterp/arm64/fcmp.S index a45e789f68..cad63189af 100644 --- a/runtime/interpreter/mterp/arm64/fcmp.S +++ b/runtime/interpreter/mterp/arm64/fcmp.S @@ -1,4 +1,4 @@ -%default {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1","cond":"le"} +%default {"wide":"", "r1":"s1", "r2":"s2", "cond":"lt"} /* * Compare two floating-point values. Puts 0, 1, or -1 into the * destination register based on the results of the comparison. @@ -10,10 +10,9 @@ lsr w3, w0, #8 // w3<- CC GET_VREG$wide $r1, w2 GET_VREG$wide $r2, w3 - mov w0, #$default_val fcmp $r1, $r2 - csneg w0, w0, w0, $cond - csel w0, wzr, w0, eq + cset w0, ne + cneg w0, w0, $cond FETCH_ADVANCE_INST 2 // advance rPC, load rINST GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w4 // vAA<- w0 diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S index 2d3a11eafa..7628ed3c47 100644 --- a/runtime/interpreter/mterp/arm64/footer.S +++ b/runtime/interpreter/mterp/arm64/footer.S @@ -234,7 +234,7 @@ MterpOnStackReplacement: #if MTERP_LOGGING mov x0, xSELF add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 + sxtw x2, wINST bl MterpLogOSR #endif mov x0, #1 // Signal normal return diff --git a/runtime/interpreter/mterp/arm64/funopNarrow.S b/runtime/interpreter/mterp/arm64/funopNarrow.S index 9f5ad1e87a..aed830bc23 100644 --- a/runtime/interpreter/mterp/arm64/funopNarrow.S +++ b/runtime/interpreter/mterp/arm64/funopNarrow.S @@ -8,10 +8,9 @@ */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG $srcreg, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A $instr // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG $tgtreg, w4 // vA<- d0 diff --git a/runtime/interpreter/mterp/arm64/funopNarrower.S b/runtime/interpreter/mterp/arm64/funopNarrower.S index 411396b290..6fddfea979 100644 --- a/runtime/interpreter/mterp/arm64/funopNarrower.S +++ b/runtime/interpreter/mterp/arm64/funopNarrower.S @@ -7,10 +7,9 @@ */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE $srcreg, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A $instr // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG $tgtreg, w4 // vA<- d0 diff --git a/runtime/interpreter/mterp/arm64/funopWide.S b/runtime/interpreter/mterp/arm64/funopWide.S index d83b39c251..409e26b6ec 100644 --- a/runtime/interpreter/mterp/arm64/funopWide.S +++ b/runtime/interpreter/mterp/arm64/funopWide.S @@ -7,10 +7,9 @@ */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE $srcreg, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A $instr // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE $tgtreg, w4 // vA<- d0 diff --git a/runtime/interpreter/mterp/arm64/funopWider.S b/runtime/interpreter/mterp/arm64/funopWider.S index 50a73f1997..4c91ebcdc6 100644 --- a/runtime/interpreter/mterp/arm64/funopWider.S +++ b/runtime/interpreter/mterp/arm64/funopWider.S @@ -7,10 +7,9 @@ */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG $srcreg, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A $instr // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE $tgtreg, w4 // vA<- d0 diff --git a/runtime/interpreter/mterp/arm64/op_cmp_long.S b/runtime/interpreter/mterp/arm64/op_cmp_long.S index 982e5b161d..c4ad984084 100644 --- a/runtime/interpreter/mterp/arm64/op_cmp_long.S +++ b/runtime/interpreter/mterp/arm64/op_cmp_long.S @@ -5,8 +5,8 @@ GET_VREG_WIDE x1, w2 GET_VREG_WIDE x2, w3 cmp x1, x2 - csinc w0, wzr, wzr, eq - csneg w0, w0, w0, ge + cset w0, ne + cneg w0, w0, lt FETCH_ADVANCE_INST 2 // advance rPC, load wINST SET_VREG w0, w4 GET_INST_OPCODE ip // extract opcode from wINST diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_double.S b/runtime/interpreter/mterp/arm64/op_cmpg_double.S index 14f9ff8a05..30cb7ebbe2 100644 --- a/runtime/interpreter/mterp/arm64/op_cmpg_double.S +++ b/runtime/interpreter/mterp/arm64/op_cmpg_double.S @@ -1 +1 @@ -%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"1", "cond":"pl"} +%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "cond":"cc"} diff --git a/runtime/interpreter/mterp/arm64/op_cmpg_float.S b/runtime/interpreter/mterp/arm64/op_cmpg_float.S index 3a20cbae1e..ba23f43462 100644 --- a/runtime/interpreter/mterp/arm64/op_cmpg_float.S +++ b/runtime/interpreter/mterp/arm64/op_cmpg_float.S @@ -1 +1 @@ -%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"1", "cond":"pl"} +%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "cond":"cc"} diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_double.S b/runtime/interpreter/mterp/arm64/op_cmpl_double.S index 06d59179e1..c73968588e 100644 --- a/runtime/interpreter/mterp/arm64/op_cmpl_double.S +++ b/runtime/interpreter/mterp/arm64/op_cmpl_double.S @@ -1 +1 @@ -%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "default_val":"-1", "cond":"le"} +%include "arm64/fcmp.S" {"wide":"_WIDE", "r1":"d1", "r2":"d2", "cond":"lt"} diff --git a/runtime/interpreter/mterp/arm64/op_cmpl_float.S b/runtime/interpreter/mterp/arm64/op_cmpl_float.S index d87d086259..32a931935b 100644 --- a/runtime/interpreter/mterp/arm64/op_cmpl_float.S +++ b/runtime/interpreter/mterp/arm64/op_cmpl_float.S @@ -1 +1 @@ -%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "default_val":"-1", "cond":"le"} +%include "arm64/fcmp.S" {"wide":"", "r1":"s1", "r2":"s2", "cond":"lt"} diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_16.S b/runtime/interpreter/mterp/arm64/op_const_wide_16.S index e43628bccd..553d481541 100644 --- a/runtime/interpreter/mterp/arm64/op_const_wide_16.S +++ b/runtime/interpreter/mterp/arm64/op_const_wide_16.S @@ -1,8 +1,7 @@ /* const-wide/16 vAA, #+BBBB */ - FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended + FETCH_S x0, 1 // x0<- ssssssssssssBBBB (sign-extended) lsr w3, wINST, #8 // w3<- AA FETCH_ADVANCE_INST 2 // advance rPC, load rINST - sbfm x0, x0, 0, 31 GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_32.S b/runtime/interpreter/mterp/arm64/op_const_wide_32.S index 527f7d8167..9dc4fc3867 100644 --- a/runtime/interpreter/mterp/arm64/op_const_wide_32.S +++ b/runtime/interpreter/mterp/arm64/op_const_wide_32.S @@ -1,10 +1,9 @@ /* const-wide/32 vAA, #+BBBBbbbb */ - FETCH w0, 1 // w0<- 0000bbbb (low) + FETCH w0, 1 // x0<- 000000000000bbbb (low) lsr w3, wINST, #8 // w3<- AA - FETCH_S w2, 2 // w2<- ssssBBBB (high) + FETCH_S x2, 2 // x2<- ssssssssssssBBBB (high) FETCH_ADVANCE_INST 3 // advance rPC, load wINST GET_INST_OPCODE ip // extract opcode from wINST - orr w0, w0, w2, lsl #16 // w0<- BBBBbbbb - sbfm x0, x0, 0, 31 + orr x0, x0, x2, lsl #16 // x0<- ssssssssBBBBbbbb SET_VREG_WIDE x0, w3 GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/arm64/op_fill_array_data.S b/runtime/interpreter/mterp/arm64/op_fill_array_data.S index f50d9e40ad..86fa6dbbd2 100644 --- a/runtime/interpreter/mterp/arm64/op_fill_array_data.S +++ b/runtime/interpreter/mterp/arm64/op_fill_array_data.S @@ -1,11 +1,11 @@ /* fill-array-data vAA, +BBBBBBBB */ EXPORT_PC - FETCH w0, 1 // w0<- bbbb (lo) - FETCH w1, 2 // w1<- BBBB (hi) + FETCH w0, 1 // x0<- 000000000000bbbb (lo) + FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi) lsr w3, wINST, #8 // w3<- AA - orr w1, w0, w1, lsl #16 // w1<- BBBBbbbb + orr x1, x0, x1, lsl #16 // x1<- ssssssssBBBBbbbb GET_VREG w0, w3 // w0<- vAA (array object) - add x1, xPC, w1, lsl #1 // w1<- PC + BBBBbbbb*2 (array data off.) + add x1, xPC, x1, lsl #1 // x1<- PC + ssssssssBBBBbbbb*2 (array data off.) bl MterpFillArrayData // (obj, payload) cbz w0, MterpPossibleException // exception? FETCH_ADVANCE_INST 3 // advance rPC, load rINST diff --git a/runtime/interpreter/mterp/arm64/op_iget_quick.S b/runtime/interpreter/mterp/arm64/op_iget_quick.S index 45c68a3a79..699b2c4229 100644 --- a/runtime/interpreter/mterp/arm64/op_iget_quick.S +++ b/runtime/interpreter/mterp/arm64/op_iget_quick.S @@ -5,8 +5,7 @@ FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null $load w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST $extend diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S index 2480d2d222..30b30c2d4d 100644 --- a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S +++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S @@ -3,7 +3,7 @@ FETCH w4, 1 // w4<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cbz w3, common_errNullObject // object was null + cbz w3, common_errNullObject // object was null add x4, x3, x4 // create direct pointer ldr x0, [x4] FETCH_ADVANCE_INST 2 // advance rPC, load wINST diff --git a/runtime/interpreter/mterp/arm64/op_instance_of.S b/runtime/interpreter/mterp/arm64/op_instance_of.S index 647bc75cfd..a56705a68b 100644 --- a/runtime/interpreter/mterp/arm64/op_instance_of.S +++ b/runtime/interpreter/mterp/arm64/op_instance_of.S @@ -13,8 +13,7 @@ mov x3, xSELF // w3<- self bl MterpInstanceOf // (index, &obj, method, self) ldr x1, [xSELF, #THREAD_EXCEPTION_OFFSET] - lsr w2, wINST, #8 // w2<- A+ - and w2, w2, #15 // w2<- A + ubfx w2, wINST, #8, #4 // w2<- A PREFETCH_INST 2 cbnz x1, MterpException ADVANCE 2 // advance rPC diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S index 13d2120392..35830f3881 100644 --- a/runtime/interpreter/mterp/arm64/op_int_to_long.S +++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S @@ -1 +1 @@ -%include "arm64/funopWider.S" {"instr":"sbfm x0, x0, 0, 31", "srcreg":"w0", "tgtreg":"x0"} +%include "arm64/funopWider.S" {"instr":"sxtw x0, w0", "srcreg":"w0", "tgtreg":"x0"} diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S index 27b5dc57b7..566e2bfdd4 100644 --- a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S +++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S @@ -3,8 +3,7 @@ FETCH w3, 1 // w3<- field byte offset GET_VREG w2, w2 // w2<- fp[B], the object pointer ubfx w0, wINST, #8, #4 // w0<- A - cmp w2, #0 // check object for null - beq common_errNullObject // object was null + cbz w2, common_errNullObject // object was null GET_VREG_WIDE x0, w0 // x0-< fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load wINST add x1, x2, x3 // create a direct pointer diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S index 1456f1a650..408e03069b 100644 --- a/runtime/interpreter/mterp/arm64/op_packed_switch.S +++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S @@ -9,12 +9,12 @@ * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ - FETCH w0, 1 // w0<- bbbb (lo) - FETCH w1, 2 // w1<- BBBB (hi) + FETCH w0, 1 // x0<- 000000000000bbbb (lo) + FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi) lsr w3, wINST, #8 // w3<- AA - orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb + orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb GET_VREG w1, w3 // w1<- vAA - add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2 + add x0, xPC, x0, lsl #1 // x0<- PC + ssssssssBBBBbbbb*2 bl $func // w0<- code-unit branch offset - sbfm xINST, x0, 0, 31 + sxtw xINST, w0 b MterpCommonTakenBranchNoFlags diff --git a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S index 0b918910c7..95f81c5a23 100644 --- a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S +++ b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S @@ -1,12 +1,10 @@ /* rem vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 bl fmodf - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A FETCH_ADVANCE_INST 1 // advance rPC, load rINST GET_INST_OPCODE ip // extract opcode from rINST SET_VREG s0, w9 diff --git a/runtime/interpreter/mterp/arm64/op_shl_int.S b/runtime/interpreter/mterp/arm64/op_shl_int.S index bd0f237cfe..3062a3fad8 100644 --- a/runtime/interpreter/mterp/arm64/op_shl_int.S +++ b/runtime/interpreter/mterp/arm64/op_shl_int.S @@ -1 +1 @@ -%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"} +%include "arm64/binop.S" {"instr":"lsl w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S index b4671d2f1c..9a7e09f016 100644 --- a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S +++ b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S @@ -1 +1 @@ -%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"} +%include "arm64/binop2addr.S" {"instr":"lsl w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S index 4dd32e08a2..17f57f9f5c 100644 --- a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S +++ b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S @@ -1 +1 @@ -%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"} +%include "arm64/binopLit8.S" {"instr":"lsl w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_shr_int.S b/runtime/interpreter/mterp/arm64/op_shr_int.S index c214a18fd0..493b7407f7 100644 --- a/runtime/interpreter/mterp/arm64/op_shr_int.S +++ b/runtime/interpreter/mterp/arm64/op_shr_int.S @@ -1 +1 @@ -%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"} +%include "arm64/binop.S" {"instr":"asr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S index 3c1484b0c7..6efe8ee010 100644 --- a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S +++ b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S @@ -1 +1 @@ -%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"} +%include "arm64/binop2addr.S" {"instr":"asr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S index 26d5024a2c..274080c4b2 100644 --- a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S +++ b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S @@ -1 +1 @@ -%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"} +%include "arm64/binopLit8.S" {"instr":"asr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int.S b/runtime/interpreter/mterp/arm64/op_ushr_int.S index bb8382b925..005452b554 100644 --- a/runtime/interpreter/mterp/arm64/op_ushr_int.S +++ b/runtime/interpreter/mterp/arm64/op_ushr_int.S @@ -1 +1 @@ -%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"} +%include "arm64/binop.S" {"instr":"lsr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S index dbccb9952a..1cb8cb7442 100644 --- a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S +++ b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S @@ -1 +1 @@ -%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"} +%include "arm64/binop2addr.S" {"instr":"lsr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S index 35090c46d7..ff30e1f1a8 100644 --- a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S +++ b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S @@ -1 +1 @@ -%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"} +%include "arm64/binopLit8.S" {"instr":"lsr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/shiftWide.S b/runtime/interpreter/mterp/arm64/shiftWide.S index 6306fca5cb..dcb2fb701a 100644 --- a/runtime/interpreter/mterp/arm64/shiftWide.S +++ b/runtime/interpreter/mterp/arm64/shiftWide.S @@ -12,8 +12,7 @@ and w1, w0, #255 // w1<- BB GET_VREG_WIDE x1, w1 // x1<- vBB FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and x2, x2, #63 // Mask low 6 - $opcode x0, x1, x2 // Do the shift. + $opcode x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 // vAA<- x0 GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/arm64/shiftWide2addr.S b/runtime/interpreter/mterp/arm64/shiftWide2addr.S index 77d104a62b..b860dfddd3 100644 --- a/runtime/interpreter/mterp/arm64/shiftWide2addr.S +++ b/runtime/interpreter/mterp/arm64/shiftWide2addr.S @@ -8,8 +8,7 @@ GET_VREG w1, w1 // x1<- vB GET_VREG_WIDE x0, w2 // x0<- vA FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and x1, x1, #63 // Mask low 6 bits. - $opcode x0, x0, x1 + $opcode x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w2 // vAA<- result GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc index 8aa87b1a8c..c25cd78309 100644 --- a/runtime/interpreter/mterp/mterp.cc +++ b/runtime/interpreter/mterp/mterp.cc @@ -57,7 +57,7 @@ void InitMterpTls(Thread* self) { * Returns 3 if we don't find a match (it's the size of the sparse-switch * instruction). */ -extern "C" int32_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) { +extern "C" ssize_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) { const int kInstrLen = 3; uint16_t size; const int32_t* keys; @@ -109,7 +109,7 @@ extern "C" int32_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testV return kInstrLen; } -extern "C" int32_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) { +extern "C" ssize_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) { const int kInstrLen = 3; /* @@ -142,7 +142,7 @@ extern "C" int32_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testV return entries[index]; } -extern "C" bool MterpShouldSwitchInterpreters() +extern "C" size_t MterpShouldSwitchInterpreters() SHARED_REQUIRES(Locks::mutator_lock_) { const instrumentation::Instrumentation* const instrumentation = Runtime::Current()->GetInstrumentation(); @@ -150,8 +150,10 @@ extern "C" bool MterpShouldSwitchInterpreters() } -extern "C" bool MterpInvokeVirtual(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeVirtual(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -159,8 +161,10 @@ extern "C" bool MterpInvokeVirtual(Thread* self, ShadowFrame* shadow_frame, self, *shadow_frame, inst, inst_data, result_register); } -extern "C" bool MterpInvokeSuper(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeSuper(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -168,8 +172,10 @@ extern "C" bool MterpInvokeSuper(Thread* self, ShadowFrame* shadow_frame, self, *shadow_frame, inst, inst_data, result_register); } -extern "C" bool MterpInvokeInterface(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeInterface(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -177,8 +183,10 @@ extern "C" bool MterpInvokeInterface(Thread* self, ShadowFrame* shadow_frame, self, *shadow_frame, inst, inst_data, result_register); } -extern "C" bool MterpInvokeDirect(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeDirect(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -186,8 +194,10 @@ extern "C" bool MterpInvokeDirect(Thread* self, ShadowFrame* shadow_frame, self, *shadow_frame, inst, inst_data, result_register); } -extern "C" bool MterpInvokeStatic(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeStatic(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -195,8 +205,10 @@ extern "C" bool MterpInvokeStatic(Thread* self, ShadowFrame* shadow_frame, self, *shadow_frame, inst, inst_data, result_register); } -extern "C" bool MterpInvokeVirtualRange(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeVirtualRange(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -204,8 +216,10 @@ extern "C" bool MterpInvokeVirtualRange(Thread* self, ShadowFrame* shadow_frame, self, *shadow_frame, inst, inst_data, result_register); } -extern "C" bool MterpInvokeSuperRange(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeSuperRange(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -213,8 +227,10 @@ extern "C" bool MterpInvokeSuperRange(Thread* self, ShadowFrame* shadow_frame, self, *shadow_frame, inst, inst_data, result_register); } -extern "C" bool MterpInvokeInterfaceRange(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeInterfaceRange(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -222,8 +238,10 @@ extern "C" bool MterpInvokeInterfaceRange(Thread* self, ShadowFrame* shadow_fram self, *shadow_frame, inst, inst_data, result_register); } -extern "C" bool MterpInvokeDirectRange(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeDirectRange(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -231,8 +249,10 @@ extern "C" bool MterpInvokeDirectRange(Thread* self, ShadowFrame* shadow_frame, self, *shadow_frame, inst, inst_data, result_register); } -extern "C" bool MterpInvokeStaticRange(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeStaticRange(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -240,8 +260,10 @@ extern "C" bool MterpInvokeStaticRange(Thread* self, ShadowFrame* shadow_frame, self, *shadow_frame, inst, inst_data, result_register); } -extern "C" bool MterpInvokeVirtualQuick(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeVirtualQuick(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -249,8 +271,10 @@ extern "C" bool MterpInvokeVirtualQuick(Thread* self, ShadowFrame* shadow_frame, self, *shadow_frame, inst, inst_data, result_register); } -extern "C" bool MterpInvokeVirtualQuickRange(Thread* self, ShadowFrame* shadow_frame, - uint16_t* dex_pc_ptr, uint16_t inst_data ) +extern "C" size_t MterpInvokeVirtualQuickRange(Thread* self, + ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint16_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { JValue* result_register = shadow_frame->GetResultRegister(); const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -262,8 +286,10 @@ extern "C" void MterpThreadFenceForConstructor() { QuasiAtomic::ThreadFenceForConstructor(); } -extern "C" bool MterpConstString(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame, - Thread* self) +extern "C" size_t MterpConstString(uint32_t index, + uint32_t tgt_vreg, + ShadowFrame* shadow_frame, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { String* s = ResolveString(self, *shadow_frame, index); if (UNLIKELY(s == nullptr)) { @@ -273,8 +299,10 @@ extern "C" bool MterpConstString(uint32_t index, uint32_t tgt_vreg, ShadowFrame* return false; } -extern "C" bool MterpConstClass(uint32_t index, uint32_t tgt_vreg, ShadowFrame* shadow_frame, - Thread* self) +extern "C" size_t MterpConstClass(uint32_t index, + uint32_t tgt_vreg, + ShadowFrame* shadow_frame, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { Class* c = ResolveVerifyAndClinit(index, shadow_frame->GetMethod(), self, false, false); if (UNLIKELY(c == nullptr)) { @@ -284,8 +312,10 @@ extern "C" bool MterpConstClass(uint32_t index, uint32_t tgt_vreg, ShadowFrame* return false; } -extern "C" bool MterpCheckCast(uint32_t index, StackReference<mirror::Object>* vreg_addr, - art::ArtMethod* method, Thread* self) +extern "C" size_t MterpCheckCast(uint32_t index, + StackReference<mirror::Object>* vreg_addr, + art::ArtMethod* method, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { Class* c = ResolveVerifyAndClinit(index, method, self, false, false); if (UNLIKELY(c == nullptr)) { @@ -300,8 +330,10 @@ extern "C" bool MterpCheckCast(uint32_t index, StackReference<mirror::Object>* v return false; } -extern "C" bool MterpInstanceOf(uint32_t index, StackReference<mirror::Object>* vreg_addr, - art::ArtMethod* method, Thread* self) +extern "C" size_t MterpInstanceOf(uint32_t index, + StackReference<mirror::Object>* vreg_addr, + art::ArtMethod* method, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { Class* c = ResolveVerifyAndClinit(index, method, self, false, false); if (UNLIKELY(c == nullptr)) { @@ -312,12 +344,12 @@ extern "C" bool MterpInstanceOf(uint32_t index, StackReference<mirror::Object>* return (obj != nullptr) && obj->InstanceOf(c); } -extern "C" bool MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload) +extern "C" size_t MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload) SHARED_REQUIRES(Locks::mutator_lock_) { return FillArrayData(obj, payload); } -extern "C" bool MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data) +extern "C" size_t MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr()); Object* obj = nullptr; @@ -342,7 +374,7 @@ extern "C" bool MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32 return true; } -extern "C" bool MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr, +extern "C" size_t MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr, uint32_t inst_data, Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { const Instruction* inst = Instruction::At(dex_pc_ptr); @@ -350,23 +382,27 @@ extern "C" bool MterpSputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr, (self, *shadow_frame, inst, inst_data); } -extern "C" bool MterpIputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr, - uint32_t inst_data, Thread* self) +extern "C" size_t MterpIputObject(ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint32_t inst_data, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { const Instruction* inst = Instruction::At(dex_pc_ptr); return DoFieldPut<InstanceObjectWrite, Primitive::kPrimNot, false, false> (self, *shadow_frame, inst, inst_data); } -extern "C" bool MterpIputObjectQuick(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr, - uint32_t inst_data) +extern "C" size_t MterpIputObjectQuick(ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint32_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { const Instruction* inst = Instruction::At(dex_pc_ptr); return DoIPutQuick<Primitive::kPrimNot, false>(*shadow_frame, inst, inst_data); } -extern "C" bool MterpAputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr, - uint32_t inst_data) +extern "C" size_t MterpAputObject(ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint32_t inst_data) SHARED_REQUIRES(Locks::mutator_lock_) { const Instruction* inst = Instruction::At(dex_pc_ptr); Object* a = shadow_frame->GetVRegReference(inst->VRegB_23x()); @@ -383,24 +419,27 @@ extern "C" bool MterpAputObject(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr, return false; } -extern "C" bool MterpFilledNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr, - Thread* self) +extern "C" size_t MterpFilledNewArray(ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { const Instruction* inst = Instruction::At(dex_pc_ptr); return DoFilledNewArray<false, false, false>(inst, *shadow_frame, self, shadow_frame->GetResultRegister()); } -extern "C" bool MterpFilledNewArrayRange(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr, - Thread* self) +extern "C" size_t MterpFilledNewArrayRange(ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { const Instruction* inst = Instruction::At(dex_pc_ptr); return DoFilledNewArray<true, false, false>(inst, *shadow_frame, self, shadow_frame->GetResultRegister()); } -extern "C" bool MterpNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr, - uint32_t inst_data, Thread* self) +extern "C" size_t MterpNewArray(ShadowFrame* shadow_frame, + uint16_t* dex_pc_ptr, + uint32_t inst_data, Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { const Instruction* inst = Instruction::At(dex_pc_ptr); int32_t length = shadow_frame->GetVReg(inst->VRegB_22c(inst_data)); @@ -414,7 +453,7 @@ extern "C" bool MterpNewArray(ShadowFrame* shadow_frame, uint16_t* dex_pc_ptr, return true; } -extern "C" bool MterpHandleException(Thread* self, ShadowFrame* shadow_frame) +extern "C" size_t MterpHandleException(Thread* self, ShadowFrame* shadow_frame) SHARED_REQUIRES(Locks::mutator_lock_) { DCHECK(self->IsExceptionPending()); const instrumentation::Instrumentation* const instrumentation = @@ -526,14 +565,16 @@ extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, } } -extern "C" bool MterpSuspendCheck(Thread* self) +extern "C" size_t MterpSuspendCheck(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { self->AllowThreadSuspension(); return MterpShouldSwitchInterpreters(); } -extern "C" int artSet64IndirectStaticFromMterp(uint32_t field_idx, ArtMethod* referrer, - uint64_t* new_value, Thread* self) +extern "C" ssize_t artSet64IndirectStaticFromMterp(uint32_t field_idx, + ArtMethod* referrer, + uint64_t* new_value, + Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t)); @@ -551,8 +592,10 @@ extern "C" int artSet64IndirectStaticFromMterp(uint32_t field_idx, ArtMethod* re return -1; // failure } -extern "C" int artSet8InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint8_t new_value, - ArtMethod* referrer) +extern "C" ssize_t artSet8InstanceFromMterp(uint32_t field_idx, + mirror::Object* obj, + uint8_t new_value, + ArtMethod* referrer) SHARED_REQUIRES(Locks::mutator_lock_) { ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int8_t)); if (LIKELY(field != nullptr && obj != nullptr)) { @@ -568,8 +611,10 @@ extern "C" int artSet8InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, return -1; // failure } -extern "C" int artSet16InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, uint16_t new_value, - ArtMethod* referrer) +extern "C" ssize_t artSet16InstanceFromMterp(uint32_t field_idx, + mirror::Object* obj, + uint16_t new_value, + ArtMethod* referrer) SHARED_REQUIRES(Locks::mutator_lock_) { ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int16_t)); @@ -586,8 +631,10 @@ extern "C" int artSet16InstanceFromMterp(uint32_t field_idx, mirror::Object* obj return -1; // failure } -extern "C" int artSet32InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, - uint32_t new_value, ArtMethod* referrer) +extern "C" ssize_t artSet32InstanceFromMterp(uint32_t field_idx, + mirror::Object* obj, + uint32_t new_value, + ArtMethod* referrer) SHARED_REQUIRES(Locks::mutator_lock_) { ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int32_t)); @@ -598,8 +645,10 @@ extern "C" int artSet32InstanceFromMterp(uint32_t field_idx, mirror::Object* obj return -1; // failure } -extern "C" int artSet64InstanceFromMterp(uint32_t field_idx, mirror::Object* obj, - uint64_t* new_value, ArtMethod* referrer) +extern "C" ssize_t artSet64InstanceFromMterp(uint32_t field_idx, + mirror::Object* obj, + uint64_t* new_value, + ArtMethod* referrer) SHARED_REQUIRES(Locks::mutator_lock_) { ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int64_t)); @@ -610,8 +659,10 @@ extern "C" int artSet64InstanceFromMterp(uint32_t field_idx, mirror::Object* obj return -1; // failure } -extern "C" int artSetObjInstanceFromMterp(uint32_t field_idx, mirror::Object* obj, - mirror::Object* new_value, ArtMethod* referrer) +extern "C" ssize_t artSetObjInstanceFromMterp(uint32_t field_idx, + mirror::Object* obj, + mirror::Object* new_value, + ArtMethod* referrer) SHARED_REQUIRES(Locks::mutator_lock_) { ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite, sizeof(mirror::HeapReference<mirror::Object>)); @@ -651,7 +702,7 @@ extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t * to the full instrumentation via MterpAddHotnessBatch. Called once on entry to the method, * and regenerated following batch updates. */ -extern "C" int MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame) +extern "C" ssize_t MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame) SHARED_REQUIRES(Locks::mutator_lock_) { uint16_t hotness_count = method->GetCounter(); int32_t countdown_value = jit::kJitHotnessDisabled; @@ -689,7 +740,7 @@ extern "C" int MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow * Report a batch of hotness events to the instrumentation and then return the new * countdown value to the next time we should report. */ -extern "C" int16_t MterpAddHotnessBatch(ArtMethod* method, +extern "C" ssize_t MterpAddHotnessBatch(ArtMethod* method, ShadowFrame* shadow_frame, Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { @@ -702,7 +753,7 @@ extern "C" int16_t MterpAddHotnessBatch(ArtMethod* method, } // TUNING: Unused by arm/arm64/x86/x86_64. Remove when mips/mips64 mterps support batch updates. -extern "C" bool MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset) +extern "C" size_t MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset) SHARED_REQUIRES(Locks::mutator_lock_) { ArtMethod* method = shadow_frame->GetMethod(); JValue* result = shadow_frame->GetResultRegister(); @@ -719,9 +770,9 @@ extern "C" bool MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int } } -extern "C" bool MterpMaybeDoOnStackReplacement(Thread* self, - ShadowFrame* shadow_frame, - int32_t offset) +extern "C" size_t MterpMaybeDoOnStackReplacement(Thread* self, + ShadowFrame* shadow_frame, + int32_t offset) SHARED_REQUIRES(Locks::mutator_lock_) { ArtMethod* method = shadow_frame->GetMethod(); JValue* result = shadow_frame->GetResultRegister(); diff --git a/runtime/interpreter/mterp/mterp.h b/runtime/interpreter/mterp/mterp.h index 88e17bc33f..45ab98b9a6 100644 --- a/runtime/interpreter/mterp/mterp.h +++ b/runtime/interpreter/mterp/mterp.h @@ -30,7 +30,12 @@ namespace interpreter { void InitMterpTls(Thread* self); void CheckMterpAsmConstants(); -extern "C" bool MterpShouldSwitchInterpreters(); + +// The return type should be 'bool' but our assembly stubs expect 'bool' +// to be zero-extended to the whole register and that's broken on x86-64 +// as a 'bool' is returned in 'al' and the rest of 'rax' is garbage. +// TODO: Fix mterp and stubs and revert this workaround. http://b/30232671 +extern "C" size_t MterpShouldSwitchInterpreters(); // Poison value for TestExportPC. If we segfault with this value, it means that a mterp // handler for a recent opcode failed to export the Dalvik PC prior to a possible exit from diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S index df0b686d37..e318782c6d 100644 --- a/runtime/interpreter/mterp/out/mterp_arm64.S +++ b/runtime/interpreter/mterp/out/mterp_arm64.S @@ -747,10 +747,9 @@ artMterpAsmInstructionStart = .L_op_nop .L_op_const_wide_16: /* 0x16 */ /* File: arm64/op_const_wide_16.S */ /* const-wide/16 vAA, #+BBBB */ - FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended + FETCH_S x0, 1 // x0<- ssssssssssssBBBB (sign-extended) lsr w3, wINST, #8 // w3<- AA FETCH_ADVANCE_INST 2 // advance rPC, load rINST - sbfm x0, x0, 0, 31 GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 GOTO_OPCODE ip // jump to next instruction @@ -760,13 +759,12 @@ artMterpAsmInstructionStart = .L_op_nop .L_op_const_wide_32: /* 0x17 */ /* File: arm64/op_const_wide_32.S */ /* const-wide/32 vAA, #+BBBBbbbb */ - FETCH w0, 1 // w0<- 0000bbbb (low) + FETCH w0, 1 // x0<- 000000000000bbbb (low) lsr w3, wINST, #8 // w3<- AA - FETCH_S w2, 2 // w2<- ssssBBBB (high) + FETCH_S x2, 2 // x2<- ssssssssssssBBBB (high) FETCH_ADVANCE_INST 3 // advance rPC, load wINST GET_INST_OPCODE ip // extract opcode from wINST - orr w0, w0, w2, lsl #16 // w0<- BBBBbbbb - sbfm x0, x0, 0, 31 + orr x0, x0, x2, lsl #16 // x0<- ssssssssBBBBbbbb SET_VREG_WIDE x0, w3 GOTO_OPCODE ip // jump to next instruction @@ -934,8 +932,7 @@ artMterpAsmInstructionStart = .L_op_nop mov x3, xSELF // w3<- self bl MterpInstanceOf // (index, &obj, method, self) ldr x1, [xSELF, #THREAD_EXCEPTION_OFFSET] - lsr w2, wINST, #8 // w2<- A+ - and w2, w2, #15 // w2<- A + ubfx w2, wINST, #8, #4 // w2<- A PREFETCH_INST 2 cbnz x1, MterpException ADVANCE 2 // advance rPC @@ -1053,12 +1050,12 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_fill_array_data.S */ /* fill-array-data vAA, +BBBBBBBB */ EXPORT_PC - FETCH w0, 1 // w0<- bbbb (lo) - FETCH w1, 2 // w1<- BBBB (hi) + FETCH w0, 1 // x0<- 000000000000bbbb (lo) + FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi) lsr w3, wINST, #8 // w3<- AA - orr w1, w0, w1, lsl #16 // w1<- BBBBbbbb + orr x1, x0, x1, lsl #16 // x1<- ssssssssBBBBbbbb GET_VREG w0, w3 // w0<- vAA (array object) - add x1, xPC, w1, lsl #1 // w1<- PC + BBBBbbbb*2 (array data off.) + add x1, xPC, x1, lsl #1 // x1<- PC + ssssssssBBBBbbbb*2 (array data off.) bl MterpFillArrayData // (obj, payload) cbz w0, MterpPossibleException // exception? FETCH_ADVANCE_INST 3 // advance rPC, load rINST @@ -1143,14 +1140,14 @@ artMterpAsmInstructionStart = .L_op_nop * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ - FETCH w0, 1 // w0<- bbbb (lo) - FETCH w1, 2 // w1<- BBBB (hi) + FETCH w0, 1 // x0<- 000000000000bbbb (lo) + FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi) lsr w3, wINST, #8 // w3<- AA - orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb + orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb GET_VREG w1, w3 // w1<- vAA - add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2 + add x0, xPC, x0, lsl #1 // x0<- PC + ssssssssBBBBbbbb*2 bl MterpDoPackedSwitch // w0<- code-unit branch offset - sbfm xINST, x0, 0, 31 + sxtw xINST, w0 b MterpCommonTakenBranchNoFlags /* ------------------------------ */ @@ -1168,14 +1165,14 @@ artMterpAsmInstructionStart = .L_op_nop * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ - FETCH w0, 1 // w0<- bbbb (lo) - FETCH w1, 2 // w1<- BBBB (hi) + FETCH w0, 1 // x0<- 000000000000bbbb (lo) + FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi) lsr w3, wINST, #8 // w3<- AA - orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb + orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb GET_VREG w1, w3 // w1<- vAA - add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2 + add x0, xPC, x0, lsl #1 // x0<- PC + ssssssssBBBBbbbb*2 bl MterpDoSparseSwitch // w0<- code-unit branch offset - sbfm xINST, x0, 0, 31 + sxtw xINST, w0 b MterpCommonTakenBranchNoFlags @@ -1195,10 +1192,9 @@ artMterpAsmInstructionStart = .L_op_nop lsr w3, w0, #8 // w3<- CC GET_VREG s1, w2 GET_VREG s2, w3 - mov w0, #-1 fcmp s1, s2 - csneg w0, w0, w0, le - csel w0, wzr, w0, eq + cset w0, ne + cneg w0, w0, lt FETCH_ADVANCE_INST 2 // advance rPC, load rINST GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w4 // vAA<- w0 @@ -1221,10 +1217,9 @@ artMterpAsmInstructionStart = .L_op_nop lsr w3, w0, #8 // w3<- CC GET_VREG s1, w2 GET_VREG s2, w3 - mov w0, #1 fcmp s1, s2 - csneg w0, w0, w0, pl - csel w0, wzr, w0, eq + cset w0, ne + cneg w0, w0, cc FETCH_ADVANCE_INST 2 // advance rPC, load rINST GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w4 // vAA<- w0 @@ -1247,10 +1242,9 @@ artMterpAsmInstructionStart = .L_op_nop lsr w3, w0, #8 // w3<- CC GET_VREG_WIDE d1, w2 GET_VREG_WIDE d2, w3 - mov w0, #-1 fcmp d1, d2 - csneg w0, w0, w0, le - csel w0, wzr, w0, eq + cset w0, ne + cneg w0, w0, lt FETCH_ADVANCE_INST 2 // advance rPC, load rINST GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w4 // vAA<- w0 @@ -1273,10 +1267,9 @@ artMterpAsmInstructionStart = .L_op_nop lsr w3, w0, #8 // w3<- CC GET_VREG_WIDE d1, w2 GET_VREG_WIDE d2, w3 - mov w0, #1 fcmp d1, d2 - csneg w0, w0, w0, pl - csel w0, wzr, w0, eq + cset w0, ne + cneg w0, w0, cc FETCH_ADVANCE_INST 2 // advance rPC, load rINST GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w4 // vAA<- w0 @@ -1294,8 +1287,8 @@ artMterpAsmInstructionStart = .L_op_nop GET_VREG_WIDE x1, w2 GET_VREG_WIDE x2, w3 cmp x1, x2 - csinc w0, wzr, wzr, eq - csneg w0, w0, w0, ge + cset w0, ne + cneg w0, w0, lt FETCH_ADVANCE_INST 2 // advance rPC, load wINST SET_VREG w0, w4 GET_INST_OPCODE ip // extract opcode from wINST @@ -3345,11 +3338,10 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG w0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A - sbfm x0, x0, 0, 31 // d0<- op + sxtw x0, w0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE x0, w4 // vA<- d0 GOTO_OPCODE ip // jump to next instruction @@ -3369,10 +3361,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG w0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A scvtf s0, w0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG s0, w4 // vA<- d0 @@ -3392,10 +3383,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG w0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A scvtf d0, w0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE d0, w4 // vA<- d0 @@ -3415,10 +3405,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE x0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG w0, w4 // vA<- d0 @@ -3438,10 +3427,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE x0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A scvtf s0, x0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG s0, w4 // vA<- d0 @@ -3461,10 +3449,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE x0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A scvtf d0, x0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE d0, w4 // vA<- d0 @@ -3485,10 +3472,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG s0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvtzs w0, s0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG w0, w4 // vA<- d0 @@ -3508,10 +3494,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG s0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvtzs x0, s0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE x0, w4 // vA<- d0 @@ -3531,10 +3516,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG s0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvt d0, s0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE d0, w4 // vA<- d0 @@ -3554,10 +3538,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE d0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvtzs w0, d0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG w0, w4 // vA<- d0 @@ -3577,10 +3560,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE d0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvtzs x0, d0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE x0, w4 // vA<- d0 @@ -3600,10 +3582,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE d0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvt s0, d0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG s0, w4 // vA<- d0 @@ -4032,7 +4013,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero // is second operand zero? .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsl w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -4071,7 +4052,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero // is second operand zero? .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes asr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -4110,7 +4091,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero // is second operand zero? .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -4424,8 +4405,7 @@ artMterpAsmInstructionStart = .L_op_nop and w1, w0, #255 // w1<- BB GET_VREG_WIDE x1, w1 // x1<- vBB FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and x2, x2, #63 // Mask low 6 - lsl x0, x1, x2 // Do the shift. + lsl x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 // vAA<- x0 GOTO_OPCODE ip // jump to next instruction @@ -4450,8 +4430,7 @@ artMterpAsmInstructionStart = .L_op_nop and w1, w0, #255 // w1<- BB GET_VREG_WIDE x1, w1 // x1<- vBB FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and x2, x2, #63 // Mask low 6 - asr x0, x1, x2 // Do the shift. + asr x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 // vAA<- x0 GOTO_OPCODE ip // jump to next instruction @@ -4476,8 +4455,7 @@ artMterpAsmInstructionStart = .L_op_nop and w1, w0, #255 // w1<- BB GET_VREG_WIDE x1, w1 // x1<- vBB FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and x2, x2, #63 // Mask low 6 - lsr x0, x1, x2 // Do the shift. + lsr x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 // vAA<- x0 GOTO_OPCODE ip // jump to next instruction @@ -5089,7 +5067,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsl w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -5125,7 +5103,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes asr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -5161,7 +5139,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -5463,8 +5441,7 @@ artMterpAsmInstructionStart = .L_op_nop GET_VREG w1, w1 // x1<- vB GET_VREG_WIDE x0, w2 // x0<- vA FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and x1, x1, #63 // Mask low 6 bits. - lsl x0, x0, x1 + lsl x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w2 // vAA<- result GOTO_OPCODE ip // jump to next instruction @@ -5485,8 +5462,7 @@ artMterpAsmInstructionStart = .L_op_nop GET_VREG w1, w1 // x1<- vB GET_VREG_WIDE x0, w2 // x0<- vA FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and x1, x1, #63 // Mask low 6 bits. - asr x0, x0, x1 + asr x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w2 // vAA<- result GOTO_OPCODE ip // jump to next instruction @@ -5507,8 +5483,7 @@ artMterpAsmInstructionStart = .L_op_nop GET_VREG w1, w1 // x1<- vB GET_VREG_WIDE x0, w2 // x0<- vA FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and x1, x1, #63 // Mask low 6 bits. - lsr x0, x0, x1 + lsr x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w2 // vAA<- result GOTO_OPCODE ip // jump to next instruction @@ -5529,8 +5504,7 @@ artMterpAsmInstructionStart = .L_op_nop */ /* binop/2addr vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 fadd s2, s0, s1 // s2<- op @@ -5554,8 +5528,7 @@ artMterpAsmInstructionStart = .L_op_nop */ /* binop/2addr vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 fsub s2, s0, s1 // s2<- op @@ -5579,8 +5552,7 @@ artMterpAsmInstructionStart = .L_op_nop */ /* binop/2addr vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 fmul s2, s0, s1 // s2<- op @@ -5604,8 +5576,7 @@ artMterpAsmInstructionStart = .L_op_nop */ /* binop/2addr vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 fdiv s2, s0, s1 // s2<- op @@ -5621,13 +5592,11 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_rem_float_2addr.S */ /* rem vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 bl fmodf - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A FETCH_ADVANCE_INST 1 // advance rPC, load rINST GET_INST_OPCODE ip // extract opcode from rINST SET_VREG s0, w9 @@ -6381,7 +6350,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsl w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -6417,7 +6386,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes asr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -6453,7 +6422,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -6471,8 +6440,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null ldr w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6489,7 +6457,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w4, 1 // w4<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cbz w3, common_errNullObject // object was null + cbz w3, common_errNullObject // object was null add x4, x3, x4 // create direct pointer ldr x0, [x4] FETCH_ADVANCE_INST 2 // advance rPC, load wINST @@ -6544,8 +6512,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w3, 1 // w3<- field byte offset GET_VREG w2, w2 // w2<- fp[B], the object pointer ubfx w0, wINST, #8, #4 // w0<- A - cmp w2, #0 // check object for null - beq common_errNullObject // object was null + cbz w2, common_errNullObject // object was null GET_VREG_WIDE x0, w0 // x0-< fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load wINST add x1, x2, x3 // create a direct pointer @@ -6710,8 +6677,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null ldrb w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6731,8 +6697,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null ldrsb w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6752,8 +6717,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null ldrh w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6773,8 +6737,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null ldrsh w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -11521,7 +11484,7 @@ MterpOnStackReplacement: #if MTERP_LOGGING mov x0, xSELF add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 + sxtw x2, wINST bl MterpLogOSR #endif mov x0, #1 // Signal normal return diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S index 9e2dcea002..2f7b8548a3 100644 --- a/runtime/interpreter/mterp/out/mterp_x86_64.S +++ b/runtime/interpreter/mterp/out/mterp_x86_64.S @@ -965,8 +965,8 @@ SYMBOL(artMterpAsmInstructionStart) = .L_op_nop /* File: x86_64/op_fill_array_data.S */ /* fill-array-data vAA, +BBBBBBBB */ EXPORT_PC - movl 2(rPC), %ecx # ecx <- BBBBbbbb - leaq (rPC,%rcx,2), OUT_ARG1 # OUT_ARG1 <- PC + BBBBbbbb*2 + movslq 2(rPC), %rcx # rcx <- ssssssssBBBBbbbb + leaq (rPC,%rcx,2), OUT_ARG1 # OUT_ARG1 <- PC + ssssssssBBBBbbbb*2 GET_VREG OUT_32_ARG0, rINSTq # OUT_ARG0 <- vAA (array object) call SYMBOL(MterpFillArrayData) # (obj, payload) testb %al, %al # 0 means an exception is thrown @@ -1051,8 +1051,8 @@ SYMBOL(artMterpAsmInstructionStart) = .L_op_nop * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ - movslq 2(rPC), OUT_ARG0 # rcx <- BBBBbbbb - leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + BBBBbbbb*2 + movslq 2(rPC), OUT_ARG0 # rcx <- ssssssssBBBBbbbb + leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + ssssssssBBBBbbbb*2 GET_VREG OUT_32_ARG1, rINSTq # eax <- vAA call SYMBOL(MterpDoPackedSwitch) testl %eax, %eax @@ -1074,8 +1074,8 @@ SYMBOL(artMterpAsmInstructionStart) = .L_op_nop * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ - movslq 2(rPC), OUT_ARG0 # rcx <- BBBBbbbb - leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + BBBBbbbb*2 + movslq 2(rPC), OUT_ARG0 # rcx <- ssssssssBBBBbbbb + leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + ssssssssBBBBbbbb*2 GET_VREG OUT_32_ARG1, rINSTq # eax <- vAA call SYMBOL(MterpDoSparseSwitch) testl %eax, %eax diff --git a/runtime/interpreter/mterp/x86_64/op_fill_array_data.S b/runtime/interpreter/mterp/x86_64/op_fill_array_data.S index 626bad47c9..7ea36a6136 100644 --- a/runtime/interpreter/mterp/x86_64/op_fill_array_data.S +++ b/runtime/interpreter/mterp/x86_64/op_fill_array_data.S @@ -1,7 +1,7 @@ /* fill-array-data vAA, +BBBBBBBB */ EXPORT_PC - movl 2(rPC), %ecx # ecx <- BBBBbbbb - leaq (rPC,%rcx,2), OUT_ARG1 # OUT_ARG1 <- PC + BBBBbbbb*2 + movslq 2(rPC), %rcx # rcx <- ssssssssBBBBbbbb + leaq (rPC,%rcx,2), OUT_ARG1 # OUT_ARG1 <- PC + ssssssssBBBBbbbb*2 GET_VREG OUT_32_ARG0, rINSTq # OUT_ARG0 <- vAA (array object) call SYMBOL(MterpFillArrayData) # (obj, payload) testb %al, %al # 0 means an exception is thrown diff --git a/runtime/interpreter/mterp/x86_64/op_packed_switch.S b/runtime/interpreter/mterp/x86_64/op_packed_switch.S index fdf5a50f9c..148552f77e 100644 --- a/runtime/interpreter/mterp/x86_64/op_packed_switch.S +++ b/runtime/interpreter/mterp/x86_64/op_packed_switch.S @@ -9,8 +9,8 @@ * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ - movslq 2(rPC), OUT_ARG0 # rcx <- BBBBbbbb - leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + BBBBbbbb*2 + movslq 2(rPC), OUT_ARG0 # rcx <- ssssssssBBBBbbbb + leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + ssssssssBBBBbbbb*2 GET_VREG OUT_32_ARG1, rINSTq # eax <- vAA call SYMBOL($func) testl %eax, %eax diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc index 9c77d3814c..1c31c5764b 100644 --- a/runtime/mirror/class.cc +++ b/runtime/mirror/class.cc @@ -748,21 +748,24 @@ ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const String return nullptr; } -ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache, +ArtField* Class::FindStaticField(Thread* self, + Class* klass, + const DexCache* dex_cache, uint32_t dex_field_idx) { - for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) { + for (Class* k = klass; k != nullptr; k = k->GetSuperClass()) { // Is the field in this class? ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx); if (f != nullptr) { return f; } - // Wrap k incase it moves during GetDirectInterface. + // Though GetDirectInterface() should not cause thread suspension when called + // from here, it takes a Handle as an argument, so we need to wrap `k`. + ScopedAssertNoThreadSuspension ants(self, __FUNCTION__); StackHandleScope<1> hs(self); - HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k)); + Handle<mirror::Class> h_k(hs.NewHandle(k)); // Is this field in any of this class' interfaces? for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) { - StackHandleScope<1> hs2(self); - Handle<mirror::Class> interface(hs2.NewHandle(GetDirectInterface(self, h_k, i))); + mirror::Class* interface = GetDirectInterface(self, h_k, i); f = FindStaticField(self, interface, dex_cache, dex_field_idx); if (f != nullptr) { return f; diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h index f044b5968b..9be9f0107b 100644 --- a/runtime/mirror/class.h +++ b/runtime/mirror/class.h @@ -1091,7 +1091,9 @@ class MANAGED Class FINAL : public Object { // Finds the given static field in this class or superclass, only searches classes that // have the same dex cache. - static ArtField* FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache, + static ArtField* FindStaticField(Thread* self, + Class* klass, + const DexCache* dex_cache, uint32_t dex_field_idx) SHARED_REQUIRES(Locks::mutator_lock_); diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index 79b18aa84e..d987f65a08 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -342,7 +342,7 @@ static void PreloadDexCachesResolveField(Handle<mirror::DexCache> dex_cache, uin return; } if (is_static) { - field = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx); + field = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx); } else { field = klass->FindInstanceField(dex_cache.Get(), field_idx); } diff --git a/runtime/oat.h b/runtime/oat.h index e506e3c476..9b8f5452c4 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '8', '3', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '8', '4', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc index 67281231bf..8700a90276 100644 --- a/runtime/oat_file_assistant.cc +++ b/runtime/oat_file_assistant.cc @@ -81,11 +81,18 @@ OatFileAssistant::OatFileAssistant(const char* dex_location, load_executable_ = false; } - // If the user gave a target oat location, save that as the cached oat - // location now so we won't try to construct the default location later. + std::string error_msg; + if (!DexLocationToOdexFilename(dex_location_, isa_, &odex_file_name_, &error_msg)) { + LOG(WARNING) << "Failed to determine odex file name: " << error_msg; + } + if (oat_location != nullptr) { - cached_oat_file_name_ = std::string(oat_location); - cached_oat_file_name_attempted_ = true; + oat_file_name_ = std::string(oat_location); + } else { + if (!DexLocationToOatFilename(dex_location_, isa_, &oat_file_name_, &error_msg)) { + LOG(WARNING) << "Failed to determine oat file name for dex location " + << dex_location_ << ": " << error_msg; + } } } @@ -351,17 +358,7 @@ bool OatFileAssistant::HasOriginalDexFiles() { } const std::string* OatFileAssistant::OdexFileName() { - if (!cached_odex_file_name_attempted_) { - cached_odex_file_name_attempted_ = true; - - std::string error_msg; - if (!DexFilenameToOdexFilename(dex_location_, isa_, &cached_odex_file_name_, &error_msg)) { - // If we can't figure out the odex file, we treat it as if the odex - // file was inaccessible. - LOG(WARNING) << "Failed to determine odex file name: " << error_msg; - } - } - return cached_odex_file_name_.empty() ? nullptr : &cached_odex_file_name_; + return odex_file_name_.empty() ? nullptr : &odex_file_name_; } bool OatFileAssistant::OdexFileExists() { @@ -412,25 +409,7 @@ static std::string ArtFileName(const OatFile* oat_file) { } const std::string* OatFileAssistant::OatFileName() { - if (!cached_oat_file_name_attempted_) { - cached_oat_file_name_attempted_ = true; - - // Compute the oat file name from the dex location. - // TODO: The oat file assistant should be the definitive place for - // determining the oat file name from the dex location, not - // GetDalvikCacheFilename. - std::string cache_dir = StringPrintf("%s%s", - DalvikCacheDirectory().c_str(), GetInstructionSetString(isa_)); - std::string error_msg; - if (!GetDalvikCacheFilename(dex_location_.c_str(), - cache_dir.c_str(), &cached_oat_file_name_, &error_msg)) { - // If we can't determine the oat file name, we treat the oat file as - // inaccessible. - LOG(WARNING) << "Failed to determine oat file name for dex location " - << dex_location_ << ": " << error_msg; - } - } - return cached_oat_file_name_.empty() ? nullptr : &cached_oat_file_name_; + return oat_file_name_.empty() ? nullptr : &oat_file_name_; } bool OatFileAssistant::OatFileExists() { @@ -750,8 +729,10 @@ bool OatFileAssistant::Dex2Oat(const std::vector<std::string>& args, return Exec(argv, error_msg); } -bool OatFileAssistant::DexFilenameToOdexFilename(const std::string& location, - InstructionSet isa, std::string* odex_filename, std::string* error_msg) { +bool OatFileAssistant::DexLocationToOdexFilename(const std::string& location, + InstructionSet isa, + std::string* odex_filename, + std::string* error_msg) { CHECK(odex_filename != nullptr); CHECK(error_msg != nullptr); @@ -790,9 +771,12 @@ bool OatFileAssistant::DexFilenameToOdexFilename(const std::string& location, return true; } -std::string OatFileAssistant::DalvikCacheDirectory() { - // Note: We don't cache this, because it will only be called once by - // OatFileName. +bool OatFileAssistant::DexLocationToOatFilename(const std::string& location, + InstructionSet isa, + std::string* oat_filename, + std::string* error_msg) { + CHECK(oat_filename != nullptr); + CHECK(error_msg != nullptr); // TODO: The work done in GetDalvikCache is overkill for what we need. // Ideally a new API for getting the DalvikCacheDirectory the way we want @@ -800,12 +784,16 @@ std::string OatFileAssistant::DalvikCacheDirectory() { // of the GetDalvikCache family of functions. Until such an API is in place, // we use GetDalvikCache to avoid duplicating the logic for determining the // dalvik cache directory. - std::string result; - bool have_android_data; - bool dalvik_cache_exists; - bool is_global_cache; - GetDalvikCache("", false, &result, &have_android_data, &dalvik_cache_exists, &is_global_cache); - return result; + std::string dalvik_cache_dir; + bool ignored; + GetDalvikCache("", false, &dalvik_cache_dir, &ignored, &ignored, &ignored); + + // TODO: The oat file assistant should be the definitive place for + // determining the oat file name from the dex location, not + // GetDalvikCacheFilename. + std::string cache_dir = StringPrintf("%s%s", + dalvik_cache_dir.c_str(), GetInstructionSetString(isa)); + return GetDalvikCacheFilename(location.c_str(), cache_dir.c_str(), oat_filename, error_msg); } std::string OatFileAssistant::ImageLocation() { diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h index e4aba3f8ad..04bd20cd53 100644 --- a/runtime/oat_file_assistant.h +++ b/runtime/oat_file_assistant.h @@ -280,8 +280,21 @@ class OatFileAssistant { // Returns false on error, in which case error_msg describes the error and // odex_filename is not changed. // Neither odex_filename nor error_msg may be null. - static bool DexFilenameToOdexFilename(const std::string& location, - InstructionSet isa, std::string* odex_filename, std::string* error_msg); + static bool DexLocationToOdexFilename(const std::string& location, + InstructionSet isa, + std::string* odex_filename, + std::string* error_msg); + + // Constructs the oat file name for the given dex location. + // Returns true on success, in which case oat_filename is set to the oat + // file name. + // Returns false on error, in which case error_msg describes the error and + // oat_filename is not changed. + // Neither oat_filename nor error_msg may be null. + static bool DexLocationToOatFilename(const std::string& location, + InstructionSet isa, + std::string* oat_filename, + std::string* error_msg); static uint32_t CalculateCombinedImageChecksum(InstructionSet isa = kRuntimeISA); @@ -293,11 +306,6 @@ class OatFileAssistant { std::string location; }; - // Returns the path to the dalvik cache directory. - // Does not check existence of the cache or try to create it. - // Includes the trailing slash. - // Returns an empty string if we can't get the dalvik cache directory path. - std::string DalvikCacheDirectory(); // Returns the current image location. // Returns an empty string if the image location could not be retrieved. @@ -383,12 +391,9 @@ class OatFileAssistant { bool required_dex_checksum_found_; bool has_original_dex_files_; - // Cached value of the odex file name. - // This should be accessed only by the OdexFileName() method. // The sentinel value "" is used if the odex file name could not be // determined. - bool cached_odex_file_name_attempted_ = false; - std::string cached_odex_file_name_; + std::string odex_file_name_; // Cached value of the loaded odex file. // Use the GetOdexFile method rather than accessing this directly, unless you @@ -400,12 +405,9 @@ class OatFileAssistant { bool odex_file_status_attempted_ = false; OatStatus cached_odex_file_status_; - // Cached value of the oat file name. - // This should be accessed only by the OatFileName() method. // The sentinel value "" is used if the oat file name could not be // determined. - bool cached_oat_file_name_attempted_ = false; - std::string cached_oat_file_name_; + std::string oat_file_name_; // Cached value of the loaded oat file. // Use the GetOatFile method rather than accessing this directly, unless you diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc index 6bccea6776..39848b4afe 100644 --- a/runtime/oat_file_assistant_test.cc +++ b/runtime/oat_file_assistant_test.cc @@ -213,22 +213,22 @@ class OatFileAssistantNoDex2OatTest : public OatFileAssistantTest { // generation of oat files. static void GenerateOatForTest(const char* dex_location, CompilerFilter::Filter filter) { // Use an oat file assistant to find the proper oat location. - OatFileAssistant ofa(dex_location, kRuntimeISA, false); - const std::string* oat_location = ofa.OatFileName(); - ASSERT_TRUE(oat_location != nullptr); + std::string oat_location; + std::string error_msg; + ASSERT_TRUE(OatFileAssistant::DexLocationToOatFilename( + dex_location, kRuntimeISA, &oat_location, &error_msg)) << error_msg; std::vector<std::string> args; args.push_back("--dex-file=" + std::string(dex_location)); - args.push_back("--oat-file=" + *oat_location); + args.push_back("--oat-file=" + oat_location); args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter)); args.push_back("--runtime-arg"); args.push_back("-Xnorelocate"); - std::string error_msg; ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg; // Verify the oat file was generated as expected. - std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location->c_str(), - oat_location->c_str(), + std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location.c_str(), + oat_location.c_str(), nullptr, nullptr, false, @@ -1212,21 +1212,21 @@ TEST_F(OatFileAssistantTest, RuntimeCompilerFilterOptionUsed) { oat_file_assistant.MakeUpToDate(false, &error_msg)); } -TEST(OatFileAssistantUtilsTest, DexFilenameToOdexFilename) { +TEST(OatFileAssistantUtilsTest, DexLocationToOdexFilename) { std::string error_msg; std::string odex_file; - EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename( + EXPECT_TRUE(OatFileAssistant::DexLocationToOdexFilename( "/foo/bar/baz.jar", kArm, &odex_file, &error_msg)) << error_msg; EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file); - EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename( + EXPECT_TRUE(OatFileAssistant::DexLocationToOdexFilename( "/foo/bar/baz.funnyext", kArm, &odex_file, &error_msg)) << error_msg; EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file); - EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename( + EXPECT_FALSE(OatFileAssistant::DexLocationToOdexFilename( "nopath.jar", kArm, &odex_file, &error_msg)); - EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename( + EXPECT_FALSE(OatFileAssistant::DexLocationToOdexFilename( "/foo/bar/baz_noext", kArm, &odex_file, &error_msg)); } diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h index 0c3eb3b628..92efa211ce 100644 --- a/runtime/read_barrier-inl.h +++ b/runtime/read_barrier-inl.h @@ -220,7 +220,7 @@ inline void ReadBarrier::AssertToSpaceInvariant(GcRootSource* gc_root_source, } inline mirror::Object* ReadBarrier::Mark(mirror::Object* obj) { - return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->Mark(obj); + return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->MarkFromReadBarrier(obj); } inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj, diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 21cd2aa2c9..079c079244 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -989,6 +989,7 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { xgc_option.verify_pre_sweeping_rosalloc_, xgc_option.verify_post_gc_rosalloc_, xgc_option.gcstress_, + xgc_option.measure_, runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM), runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs)); diff --git a/runtime/trace.cc b/runtime/trace.cc index 0acc54d0f8..e77a11e5aa 100644 --- a/runtime/trace.cc +++ b/runtime/trace.cc @@ -869,7 +869,7 @@ bool Trace::RegisterMethod(ArtMethod* method) { bool Trace::RegisterThread(Thread* thread) { pid_t tid = thread->GetTid(); CHECK_LT(0U, static_cast<uint32_t>(tid)); - CHECK_LT(static_cast<uint32_t>(tid), 65536U); + CHECK_LT(static_cast<uint32_t>(tid), kMaxThreadIdNumber); if (!(*seen_threads_)[tid]) { seen_threads_->set(tid); diff --git a/runtime/trace.h b/runtime/trace.h index 80f1a4c1bc..9b29fb9f45 100644 --- a/runtime/trace.h +++ b/runtime/trace.h @@ -41,7 +41,9 @@ class DexFile; class Thread; using DexIndexBitSet = std::bitset<65536>; -using ThreadIDBitSet = std::bitset<65536>; + +constexpr size_t kMaxThreadIdNumber = kIsTargetBuild ? 65536U : 1048576U; +using ThreadIDBitSet = std::bitset<kMaxThreadIdNumber>; enum TracingMode { kTracingInactive, diff --git a/test/412-new-array/info.txt b/test/412-new-array/info.txt index cb388b6023..b5f834aa31 100644 --- a/test/412-new-array/info.txt +++ b/test/412-new-array/info.txt @@ -1 +1,3 @@ Simple tests for new-array, filled-new-array and fill-array-data. +Regression test for the arm64 mterp miscalculating the fill-array-data-payload +address, zero-extending a register instead of sign-extending. diff --git a/test/412-new-array/smali/fill_array_data.smali b/test/412-new-array/smali/fill_array_data.smali index 34776db5de..2b24e56cef 100644 --- a/test/412-new-array/smali/fill_array_data.smali +++ b/test/412-new-array/smali/fill_array_data.smali @@ -15,6 +15,21 @@ .end method +.method public static intArrayFillInstructionAfterData([I)V + .registers 1 + goto :FillInstruction + +:ArrayData + .array-data 4 + 1 2 3 4 5 + .end array-data + +:FillInstruction + fill-array-data v0, :ArrayData + return-void + +.end method + .method public static shortArray([S)V .registers 1 diff --git a/test/412-new-array/src/Main.java b/test/412-new-array/src/Main.java index b9c2a053e0..d95d2c52f3 100644 --- a/test/412-new-array/src/Main.java +++ b/test/412-new-array/src/Main.java @@ -259,6 +259,45 @@ public class Main extends TestCase { } { + Method m = c.getMethod("intArrayFillInstructionAfterData", int[].class); + int[] array = new int[7]; + Object[] args = { array }; + m.invoke(null, args); + assertEquals(7, array.length); + assertEquals(1, array[0]); + assertEquals(2, array[1]); + assertEquals(3, array[2]); + assertEquals(4, array[3]); + assertEquals(5, array[4]); + assertEquals(0, array[5]); + assertEquals(0, array[6]); + + array = new int[2]; + args[0] = array; + Throwable exception = null; + try { + m.invoke(null, args); + } catch (InvocationTargetException e) { + exception = e.getCause(); + assertTrue(exception instanceof IndexOutOfBoundsException); + } + assertNotNull(exception); + exception = null; + // Test that nothing has been written to the array. + assertEquals(0, array[0]); + assertEquals(0, array[1]); + + args[0] = null; + try { + m.invoke(null, args); + } catch (InvocationTargetException e) { + exception = e.getCause(); + assertTrue(exception instanceof NullPointerException); + } + assertNotNull(exception); + } + + { Method m = c.getMethod("shortArray", short[].class); short[] array = new short[7]; Object[] args = { array }; diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java index 41771b52c4..c125e337cb 100644 --- a/test/449-checker-bce/src/Main.java +++ b/test/449-checker-bce/src/Main.java @@ -1204,9 +1204,6 @@ public class Main { /// CHECK: Deoptimize /// CHECK: Deoptimize /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize /// CHECK-NOT: Deoptimize /// CHECK: Goto /// CHECK: Goto @@ -1217,7 +1214,7 @@ public class Main { for (int i = array.length - 1 ; i >= 0; i--) { array[i] = 1; } - // Several HDeoptimize will be added. Two for each index. + // Three HDeoptimize will be added for the bounds. // The null check is not necessary. for (int i = end - 2 ; i > 0; i--) { if (expectInterpreter) { @@ -1266,20 +1263,12 @@ public class Main { /// CHECK: Deoptimize /// CHECK: Deoptimize /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize /// CHECK-NOT: Deoptimize /// CHECK: Goto /// CHECK: Goto /// CHECK: Goto void foo6(int[] array, int start, int end, boolean expectInterpreter) { - // Several HDeoptimize will be added. for (int i = end; i >= start; i--) { if (expectInterpreter) { assertIsInterpreted(); @@ -1398,8 +1387,8 @@ public class Main { /// CHECK-NOT: Deoptimize void foo9(int[] array, boolean expectInterpreter) { - // Two HDeoptimize will be added. Two for the index - // and one for null check on array. + // Three HDeoptimize will be added. Two for the index and one for null check on array. Then + // simplification removes one redundant HDeoptimize. for (int i = 0 ; i < 10; i++) { if (expectInterpreter) { assertIsInterpreted(); diff --git a/test/501-regression-packed-switch/info.txt b/test/501-regression-packed-switch/info.txt index fbd93fa815..988b220a87 100644 --- a/test/501-regression-packed-switch/info.txt +++ b/test/501-regression-packed-switch/info.txt @@ -1,2 +1,4 @@ Regression test for the interpreter and optimizing's builder which used to trip when compiled code contained a packed switch with no targets. +Regression test for the arm64 mterp miscalculating the switch table +address, zero-extending a register instead of sign-extending. diff --git a/test/501-regression-packed-switch/smali/Test.smali b/test/501-regression-packed-switch/smali/Test.smali index 8756ed5f23..5a760c7880 100644 --- a/test/501-regression-packed-switch/smali/Test.smali +++ b/test/501-regression-packed-switch/smali/Test.smali @@ -27,3 +27,28 @@ .packed-switch 0x0 .end packed-switch .end method + +.method public static PackedSwitchAfterData(I)I + .registers 1 + goto :pswitch_instr + + :case0 + const/4 v0, 0x1 + return v0 + + :pswitch_data + .packed-switch 0x0 + :case0 + :case1 + .end packed-switch + + :pswitch_instr + packed-switch v0, :pswitch_data + const/4 v0, 0x7 + return v0 + + :case1 + const/4 v0, 0x4 + return v0 + +.end method diff --git a/test/501-regression-packed-switch/src/Main.java b/test/501-regression-packed-switch/src/Main.java index b80bc62c50..12bc1a8138 100644 --- a/test/501-regression-packed-switch/src/Main.java +++ b/test/501-regression-packed-switch/src/Main.java @@ -29,5 +29,10 @@ public class Main { if (result != 5) { throw new Error("Expected 5, got " + result); } + m = c.getMethod("PackedSwitchAfterData", new Class[] { int.class }); + result = (Integer) m.invoke(null, new Integer(0)); + if (result != 1) { + throw new Error("Expected 1, got " + result); + } } } diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java index ead94464bf..3366f20cc5 100644 --- a/test/527-checker-array-access-split/src/Main.java +++ b/test/527-checker-array-access-split/src/Main.java @@ -34,9 +34,21 @@ public class Main { /// CHECK-START-ARM64: int Main.constantIndexGet(int[]) instruction_simplifier_arm64 (after) /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK-NOT: Arm64IntermediateAddress + /// CHECK-NOT: IntermediateAddress /// CHECK: ArrayGet [<<Array>>,<<Index>>] + + /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (before) + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: ArrayGet [<<Array>>,<<Index>>] + + /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (after) + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK-NOT: IntermediateAddress + /// CHECK: ArrayGet [<<Array>>,<<Index>>] + public static int constantIndexGet(int array[]) { return array[1]; } @@ -55,9 +67,22 @@ public class Main { /// CHECK: <<Const2:i\d+>> IntConstant 2 /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK-NOT: Arm64IntermediateAddress + /// CHECK-NOT: IntermediateAddress + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>] + + + /// CHECK-START-ARM: void Main.constantIndexSet(int[]) instruction_simplifier_arm (before) + /// CHECK: <<Const2:i\d+>> IntConstant 2 + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>] + /// CHECK-START-ARM: void Main.constantIndexSet(int[]) instruction_simplifier_arm (after) + /// CHECK: <<Const2:i\d+>> IntConstant 2 + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK-NOT: IntermediateAddress + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>] public static void constantIndexSet(int array[]) { array[1] = 2; @@ -76,7 +101,20 @@ public class Main { /// CHECK: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: ArrayGet [<<Address>>,<<Index>>] + + + /// CHECK-START-ARM: int Main.get(int[], int) instruction_simplifier_arm (before) + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: ArrayGet [<<Array>>,<<Index>>] + + /// CHECK-START-ARM: int Main.get(int[], int) instruction_simplifier_arm (after) + /// CHECK: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: ArrayGet [<<Address>>,<<Index>>] public static int get(int array[], int index) { @@ -102,7 +140,26 @@ public class Main { /// CHECK: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: ArraySet [<<Address>>,<<Index>>,<<Arg>>] + + + /// CHECK-START-ARM: void Main.set(int[], int, int) instruction_simplifier_arm (before) + /// CHECK: ParameterValue + /// CHECK: ParameterValue + /// CHECK: <<Arg:i\d+>> ParameterValue + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Arg>>] + + /// CHECK-START-ARM: void Main.set(int[], int, int) instruction_simplifier_arm (after) + /// CHECK: ParameterValue + /// CHECK: ParameterValue + /// CHECK: <<Arg:i\d+>> ParameterValue + /// CHECK: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: ArraySet [<<Address>>,<<Index>>,<<Arg>>] public static void set(int array[], int index, int value) { @@ -126,10 +183,10 @@ public class Main { /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] - /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] /// CHECK-START-ARM64: void Main.getSet(int[], int) GVN_after_arch (after) @@ -137,12 +194,42 @@ public class Main { /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] - /// CHECK-NOT: Arm64IntermediateAddress + /// CHECK-NOT: IntermediateAddress /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: void Main.getSet(int[], int) instruction_simplifier_arm (before) + /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: void Main.getSet(int[], int) instruction_simplifier_arm (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: void Main.getSet(int[], int) GVN_after_arch (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK-NOT: IntermediateAddress + /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>] public static void getSet(int array[], int index) { array[index] = array[index] + 1; } @@ -166,11 +253,11 @@ public class Main { /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] /// CHECK: NewArray - /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN_after_arch (after) @@ -178,11 +265,45 @@ public class Main { /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] /// CHECK: NewArray - /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: ArraySet [<<Address2>>,<<Index>>,<<Add>>] + + + /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) instruction_simplifier_arm (before) + /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: NewArray + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) instruction_simplifier_arm (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: NewArray + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) GVN_after_arch (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: NewArray + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: ArraySet [<<Address2>>,<<Index>>,<<Add>>] public static int[] accrossGC(int array[], int index) { @@ -196,6 +317,14 @@ public class Main { * Test that the intermediate address is shared between array accesses after * the bounds check have been removed by BCE. */ + // For checker tests `instruction_simplifier_<arch> (after)` below, by the time we reach + // the architecture-specific instruction simplifier, BCE has removed the bounds checks in + // the loop. + + // Note that we do not care that the `DataOffset` is `12`. But if we do not + // specify it and any other `IntConstant` appears before that instruction, + // checker will match the previous `IntConstant`, and we will thus fail the + // check. /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (before) /// CHECK: <<Const1:i\d+>> IntConstant 1 @@ -207,14 +336,6 @@ public class Main { /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>] - // By the time we reach the architecture-specific instruction simplifier, BCE - // has removed the bounds checks in the loop. - - // Note that we do not care that the `DataOffset` is `12`. But if we do not - // specify it and any other `IntConstant` appears before that instruction, - // checker will match the previous `IntConstant`, and we will thus fail the - // check. - /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (after) /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 @@ -222,10 +343,10 @@ public class Main { /// CHECK: <<Index:i\d+>> Phi /// CHECK: If // -------------- Loop - /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] - /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN_after_arch (after) @@ -235,10 +356,47 @@ public class Main { /// CHECK: <<Index:i\d+>> Phi /// CHECK: If // -------------- Loop - /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK-NOT: IntermediateAddress + /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>] + + + /// CHECK-START-ARM: int Main.canMergeAfterBCE1() instruction_simplifier_arm (before) + /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: int Main.canMergeAfterBCE1() instruction_simplifier_arm (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: int Main.canMergeAfterBCE1() GVN_after_arch (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] - /// CHECK-NOT: Arm64IntermediateAddress + /// CHECK-NOT: IntermediateAddress /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>] public static int canMergeAfterBCE1() { @@ -279,12 +437,12 @@ public class Main { /// CHECK: If // -------------- Loop /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] - /// CHECK-DAG: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address1>>,<<Index>>] - /// CHECK-DAG: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address2>>,<<Index1>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] - /// CHECK: <<Address3:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address3:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Add>>] /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after) @@ -295,7 +453,7 @@ public class Main { /// CHECK: If // -------------- Loop /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] - /// CHECK-DAG: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address>>,<<Index>>] /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address>>,<<Index1>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] @@ -304,8 +462,55 @@ public class Main { // There should be only one intermediate address computation in the loop. /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after) - /// CHECK: Arm64IntermediateAddress - /// CHECK-NOT: Arm64IntermediateAddress + /// CHECK: IntermediateAddress + /// CHECK-NOT: IntermediateAddress + + + /// CHECK-START-ARM: int Main.canMergeAfterBCE2() instruction_simplifier_arm (before) + /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] + /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Array>>,<<Index>>] + /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Array>>,<<Index1>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: ArraySet [<<Array>>,<<Index1>>,<<Add>>] + + /// CHECK-START-ARM: int Main.canMergeAfterBCE2() instruction_simplifier_arm (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] + /// CHECK-DAG: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address1>>,<<Index>>] + /// CHECK-DAG: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address2>>,<<Index1>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: <<Address3:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Add>>] + + /// CHECK-START-ARM: int Main.canMergeAfterBCE2() GVN_after_arch (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] + /// CHECK-DAG: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address>>,<<Index>>] + /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address>>,<<Index1>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: ArraySet [<<Address>>,<<Index1>>,<<Add>>] + + /// CHECK-START-ARM: int Main.canMergeAfterBCE2() GVN_after_arch (after) + /// CHECK: IntermediateAddress + /// CHECK-NOT: IntermediateAddress public static int canMergeAfterBCE2() { int[] array = {0, 1, 2, 3}; @@ -315,6 +520,37 @@ public class Main { return array[array.length - 1]; } + /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (before) + /// CHECK-DAG: <<Array1:l\d+>> NewArray + /// CHECK-DAG: <<Array2:l\d+>> NewArray + /// CHECK-DAG: <<Array3:l\d+>> NewArray + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: ArrayGet [<<Array1>>,<<Index>>] + /// CHECK-DAG: ArrayGet [<<Array2>>,<<Index>>] + /// CHECK-DAG: ArrayGet [<<Array3>>,<<Index>>] + + /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after) + /// CHECK-DAG: <<Array1:l\d+>> NewArray + /// CHECK-DAG: <<Array2:l\d+>> NewArray + /// CHECK-DAG: <<Array3:l\d+>> NewArray + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: ArrayGet [<<Array1>>,<<Index>>] + /// CHECK-DAG: ArrayGet [<<Array2>>,<<Index>>] + /// CHECK-DAG: ArrayGet [<<Array3>>,<<Index>>] + + /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after) + /// CHECK-NOT: IntermediateAddress + public static int checkLongFloatDouble() { + long[] array_long = {0, 1, 2, 3}; + float[] array_float = {(float)0.0, (float)1.0, (float)2.0, (float)3.0}; + double[] array_double = {0.0, 1.0, 2.0, 3.0}; + double s = 0.0; + + for (int i = 0; i < 4; i++) { + s += (double)array_long[i] + (double)array_float[i] + array_double[i]; + } + return (int)s; + } public static void main(String[] args) { int[] array = {123, 456, 789}; @@ -337,5 +573,7 @@ public class Main { assertIntEquals(4, canMergeAfterBCE1()); assertIntEquals(6, canMergeAfterBCE2()); + + assertIntEquals(18, checkLongFloatDouble()); } } diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java index 5a36ba5d9c..7b5cbc1504 100644 --- a/test/529-checker-unresolved/src/Main.java +++ b/test/529-checker-unresolved/src/Main.java @@ -77,6 +77,16 @@ public class Main extends UnresolvedSuperClass { expectEquals(123456789123456789f, UnresolvedClass.staticFloat); expectEquals(123456789123456789d, UnresolvedClass.staticDouble); expectEquals(o, UnresolvedClass.staticObject); + + // Check "large" values. + + UnresolvedClass.staticByte = (byte)-1; + UnresolvedClass.staticChar = (char)32768; + UnresolvedClass.staticInt = -1; + + expectEquals((byte)-1, UnresolvedClass.staticByte); + expectEquals((char)32768, UnresolvedClass.staticChar); + expectEquals(-1, UnresolvedClass.staticInt); } /// CHECK-START: void Main.callUnresolvedInstanceFieldAccess(UnresolvedClass) register (before) diff --git a/test/530-checker-loops3/expected.txt b/test/530-checker-loops3/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/530-checker-loops3/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/530-checker-loops3/info.txt b/test/530-checker-loops3/info.txt new file mode 100644 index 0000000000..07d99a3e55 --- /dev/null +++ b/test/530-checker-loops3/info.txt @@ -0,0 +1 @@ +Test on loop optimizations, in particular loop-based dynamic bce. diff --git a/test/530-checker-loops3/src/Main.java b/test/530-checker-loops3/src/Main.java new file mode 100644 index 0000000000..5ffcbe964d --- /dev/null +++ b/test/530-checker-loops3/src/Main.java @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Test on loop optimizations, in particular dynamic BCE. In all cases, +// bounds check on a[] is resolved statically. Bounds checks on b[] +// exercise various different scenarios. In all cases, loop-based +// dynamic BCE is better than the dominator-based BCE, since it +// generates the test outside the loop. +// +public class Main { + + /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (before) + /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + // + /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.oneConstantIndex(int[], int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + public static void oneConstantIndex(int[] a, int[] b) { + // Dynamic bce on b requires two deopts: one null and one bound. + for (int i = 0; i < a.length; i++) { + a[i] = b[1]; + } + } + + /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (before) + /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + // + /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.multipleConstantIndices(int[], int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + public static void multipleConstantIndices(int[] a, int[] b) { + // Dynamic bce on b requires two deopts: one null and one bound. + for (int i = 0; i < a.length; i++) { + a[i] = b[0] + b[1] + b[2]; + } + } + + /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (before) + /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + // + /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.oneInvariantIndex(int[], int[], int) BCE (after) + /// CHECK-NOT: BoundsCheck + public static void oneInvariantIndex(int[] a, int[] b, int c) { + // Dynamic bce on b requires two deopts: one null and one bound. + for (int i = 0; i < a.length; i++) { + a[i] = b[c]; + } + } + + /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (before) + /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + // + /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.multipleInvariantIndices(int[], int[], int) BCE (after) + /// CHECK-NOT: BoundsCheck + public static void multipleInvariantIndices(int[] a, int[] b, int c) { + // Dynamic bce on b requires three deopts: one null and two bounds. + for (int i = 0; i < a.length; i++) { + a[i] = b[c-1] + b[c] + b[c+1]; + } + } + + /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (before) + /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + // + /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.oneUnitStride(int[], int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + public static void oneUnitStride(int[] a, int[] b) { + // Dynamic bce on b requires three deopts: one null and two bounds. + for (int i = 0; i < a.length; i++) { + a[i] = b[i]; + } + } + + /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (before) + /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + // + /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) instruction_simplifier_after_bce (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.multipleUnitStrides(int[], int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + public static void multipleUnitStrides(int[] a, int[] b) { + // Dynamic bce on b requires four deopts: one null and three bounds. + // One redundant deopt is removed by simplifier. + // TODO: range information could remove another + for (int i = 1; i < a.length - 1; i++) { + a[i] = b[i-1] + b[i] + b[i+1]; + } + } + + /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (before) + /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + // + /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) instruction_simplifier_after_bce (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.multipleUnitStridesConditional(int[], int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + public static void multipleUnitStridesConditional(int[] a, int[] b) { + // Dynamic bce on b requires four deopts: one null and three bounds. + // The two conditional references may be included, since they are in range. + // One redundant deopt is removed by simplifier. + for (int i = 2; i < a.length - 2; i++) { + int t = b[i-2] + b[i] + b[i+2] + (((i & 1) == 0) ? b[i+1] : b[i-1]); + a[i] = t; + } + } + + /// CHECK-START: void Main.shifter(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + // + /// CHECK-START: void Main.shifter(int[]) BCE (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.shifter(int[]) instruction_simplifier_after_bce (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.shifter(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + public static void shifter(int[] x) { + // Real-life example: should have four deopts: one null and three bounds. + // Two redundant deopts are removed by simplifier. + for (int i = 16; i < 80; i++) { + int t = x[i - 3] ^ x[i - 8] ^ x[i - 14] ^ x[i - 16]; + x[i] = t << 1 | t >>> 31; + } + } + + /// CHECK-START: void Main.stencil(int[], int, int) BCE (before) + /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + /// CHECK-DAG: BoundsCheck loop:<<Loop>> + // + /// CHECK-START: void Main.stencil(int[], int, int) BCE (after) + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-DAG: Deoptimize loop:none + /// CHECK-NOT: Deoptimize + // + /// CHECK-START: void Main.stencil(int[], int, int) BCE (after) + /// CHECK-NOT: BoundsCheck + public static void stencil(int[] array, int start, int end) { + // Real-life example: should have four deopts: one null and three bounds. + for (int i = end; i >= start; i--) { + array[i] = (array[i-2] + array[i-1] + array[i] + array[i+1] + array[i+2]) / 5; + } + } + + // + // Verifier. + // + + public static void main(String[] args) { + int[] a = new int[10]; + int b[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int b1[] = { 100 }; + + oneConstantIndex(a, b); + for (int i = 0; i < a.length; i++) { + expectEquals(2, a[i]);; + } + try { + oneConstantIndex(a, b1); + throw new Error("Should throw AIOOBE"); + } catch (ArrayIndexOutOfBoundsException e) { + } + + multipleConstantIndices(a, b); + for (int i = 0; i < a.length; i++) { + expectEquals(6, a[i]);; + } + try { + multipleConstantIndices(a, b1); + throw new Error("Should throw AIOOBE"); + } catch (ArrayIndexOutOfBoundsException e) { + } + + oneInvariantIndex(a, b, 1); + for (int i = 0; i < a.length; i++) { + expectEquals(2, a[i]);; + } + try { + oneInvariantIndex(a, b1, 1); + throw new Error("Should throw AIOOBE"); + } catch (ArrayIndexOutOfBoundsException e) { + } + + multipleInvariantIndices(a, b, 1); + for (int i = 0; i < a.length; i++) { + expectEquals(6, a[i]);; + } + try { + multipleInvariantIndices(a, b1, 1); + throw new Error("Should throw AIOOBE"); + } catch (ArrayIndexOutOfBoundsException e) { + } + + oneUnitStride(a, b); + for (int i = 0; i < a.length; i++) { + expectEquals(i + 1, a[i]);; + } + try { + oneUnitStride(a, b1); + throw new Error("Should throw AIOOBE"); + } catch (ArrayIndexOutOfBoundsException e) { + expectEquals(100, a[0]);; + } + + multipleUnitStrides(a, b); + for (int i = 1; i < a.length - 1; i++) { + expectEquals(3 * i + 3, a[i]);; + } + try { + multipleUnitStrides(a, b1); + throw new Error("Should throw AIOOBE"); + } catch (ArrayIndexOutOfBoundsException e) { + } + + multipleUnitStridesConditional(a, b); + for (int i = 2; i < a.length - 2; i++) { + int e = 3 * i + 3 + (((i & 1) == 0) ? i + 2 : i); + expectEquals(e, a[i]);; + } + try { + multipleUnitStridesConditional(a, b1); + throw new Error("Should throw AIOOBE"); + } catch (ArrayIndexOutOfBoundsException e) { + } + + System.out.println("passed"); + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} diff --git a/test/604-hot-static-interface/hot_static_interface.cc b/test/604-hot-static-interface/hot_static_interface.cc deleted file mode 100644 index 475a11d351..0000000000 --- a/test/604-hot-static-interface/hot_static_interface.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "art_method.h" -#include "jit/jit.h" -#include "jit/jit_code_cache.h" -#include "jit/profiling_info.h" -#include "oat_quick_method_header.h" -#include "scoped_thread_state_change.h" -#include "ScopedUtfChars.h" -#include "stack_map.h" - -namespace art { - -extern "C" JNIEXPORT void JNICALL Java_Main_waitUntilJitted(JNIEnv* env, - jclass, - jclass itf, - jstring method_name) { - jit::Jit* jit = Runtime::Current()->GetJit(); - if (jit == nullptr) { - return; - } - - ScopedObjectAccess soa(Thread::Current()); - - ScopedUtfChars chars(env, method_name); - CHECK(chars.c_str() != nullptr); - - mirror::Class* klass = soa.Decode<mirror::Class*>(itf); - ArtMethod* method = klass->FindDeclaredDirectMethodByName(chars.c_str(), sizeof(void*)); - - jit::JitCodeCache* code_cache = jit->GetCodeCache(); - OatQuickMethodHeader* header = nullptr; - // Make sure there is a profiling info, required by the compiler. - ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true); - while (true) { - header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode()); - if (code_cache->ContainsPc(header->GetCode())) { - break; - } else { - // Sleep to yield to the compiler thread. - usleep(1000); - // Will either ensure it's compiled or do the compilation itself. - jit->CompileMethod(method, soa.Self(), /* osr */ false); - } - } -} - -} // namespace art diff --git a/test/604-hot-static-interface/src/Main.java b/test/604-hot-static-interface/src/Main.java index 559f15d380..04d7cd6567 100644 --- a/test/604-hot-static-interface/src/Main.java +++ b/test/604-hot-static-interface/src/Main.java @@ -22,14 +22,14 @@ public class Main { Itf.foo(new Object()); } - waitUntilJitted(Itf.class, "foo"); + ensureJitCompiled(Itf.class, "foo"); if (!Itf.foo(new Object())) { throw new Error("Unexpected result"); } } - private static native void waitUntilJitted(Class itf, String method_name); + private static native void ensureJitCompiled(Class itf, String method_name); } interface Itf { diff --git a/test/612-jit-dex-cache/expected.txt b/test/612-jit-dex-cache/expected.txt new file mode 100644 index 0000000000..6a5618ebc6 --- /dev/null +++ b/test/612-jit-dex-cache/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/612-jit-dex-cache/info.txt b/test/612-jit-dex-cache/info.txt new file mode 100644 index 0000000000..e80f642f3e --- /dev/null +++ b/test/612-jit-dex-cache/info.txt @@ -0,0 +1,2 @@ +Regression test for the JIT compiler which used to +wrongly update the dex cache of a class loader. diff --git a/test/612-jit-dex-cache/src-ex/B.java b/test/612-jit-dex-cache/src-ex/B.java new file mode 100644 index 0000000000..4da9a1da6b --- /dev/null +++ b/test/612-jit-dex-cache/src-ex/B.java @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class B { +} diff --git a/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java new file mode 100644 index 0000000000..1d6158a593 --- /dev/null +++ b/test/612-jit-dex-cache/src-ex/LoadedByAppClassLoader.java @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class LoadedByAppClassLoader { + public static void letMeInlineYou(A a) { + a.foo(); + } + + public static ClassLoader areYouB() { + // Ensure letMeInlineYou is JITted and tries to do inlining of A.foo. + // The compiler used to wrongly update the dex cache of letMeInlineYou's + // class loader. + Main.ensureJitCompiled(LoadedByAppClassLoader.class, "letMeInlineYou"); + return OtherClass.getB().getClassLoader(); + } +} + +class OtherClass { + public static Class getB() { + // This used to return the B class of another class loader. + return B.class; + } +} diff --git a/test/612-jit-dex-cache/src/A.java b/test/612-jit-dex-cache/src/A.java new file mode 100644 index 0000000000..415c712477 --- /dev/null +++ b/test/612-jit-dex-cache/src/A.java @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class A { + public int foo() { + return 42; + } +} diff --git a/test/612-jit-dex-cache/src/B.java b/test/612-jit-dex-cache/src/B.java new file mode 100644 index 0000000000..46c878b572 --- /dev/null +++ b/test/612-jit-dex-cache/src/B.java @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class B extends A { +} diff --git a/test/612-jit-dex-cache/src/Main.java b/test/612-jit-dex-cache/src/Main.java new file mode 100644 index 0000000000..0e4bd2245d --- /dev/null +++ b/test/612-jit-dex-cache/src/Main.java @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; +import java.lang.reflect.InvocationTargetException; + +import dalvik.system.PathClassLoader; + +// ClassLoader not delegating for non java. packages. +class DelegateLastPathClassLoader extends PathClassLoader { + + public DelegateLastPathClassLoader(String dexPath, ClassLoader parent) { + super(dexPath, parent); + } + + @Override + protected Class<?> loadClass(String name, boolean resolve) throws ClassNotFoundException { + if (!name.startsWith("java.")) { + try { + return findClass(name); + } catch (ClassNotFoundException ignore) { + // Ignore and fall through to parent class loader. + } + } + return super.loadClass(name, resolve); + } +} + +public class Main { + + private static Class classFromDifferentLoader() throws Exception { + final String DEX_FILE = System.getenv("DEX_LOCATION") + "/612-jit-dex-cache-ex.jar"; + ClassLoader loader = new DelegateLastPathClassLoader(DEX_FILE, Main.class.getClassLoader()); + return loader.loadClass("LoadedByAppClassLoader"); + } + + public static void main(String[] args) throws Exception { + System.loadLibrary(args[0]); + Class cls = classFromDifferentLoader(); + Method m = cls.getDeclaredMethod("letMeInlineYou", A.class); + B b = new B(); + // Invoke the method enough times to get an inline cache and get JITted. + for (int i = 0; i < 10000; ++i) { + m.invoke(null, b); + } + m = cls.getDeclaredMethod("areYouB", null); + ClassLoader loader = (ClassLoader) m.invoke(null); + if (loader != cls.getClassLoader()) { + throw new Error("Wrong class loader"); + } + } + + public static native void ensureJitCompiled(Class cls, String method_name); +} diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk index 75e74eca3a..7813d16657 100644 --- a/test/Android.libarttest.mk +++ b/test/Android.libarttest.mk @@ -47,8 +47,7 @@ LIBARTTEST_COMMON_SRC_FILES := \ 570-checker-osr/osr.cc \ 595-profile-saving/profile-saving.cc \ 596-app-images/app_images.cc \ - 597-deopt-new-string/deopt.cc \ - 604-hot-static-interface/hot_static_interface.cc + 597-deopt-new-string/deopt.cc ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index dd6b6f3fbc..8f8b667429 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -527,7 +527,7 @@ TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS := # Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT). # 484: Baker's fast path based read barrier compiler instrumentation generates code containing # more parallel moves on x86, thus some Checker assertions may fail. -# 527: On ARM64, the read barrier instrumentation does not support the HArm64IntermediateAddress +# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress # instruction yet (b/26601270). # 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are # not yet handled in the read barrier configuration. diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc index fd41fd281f..e70a95cbb5 100644 --- a/test/common/runtime_state.cc +++ b/test/common/runtime_state.cc @@ -18,10 +18,14 @@ #include "base/logging.h" #include "dex_file-inl.h" +#include "jit/jit.h" +#include "jit/jit_code_cache.h" #include "mirror/class-inl.h" #include "nth_caller_visitor.h" +#include "oat_quick_method_header.h" #include "runtime.h" #include "scoped_thread_state_change.h" +#include "ScopedUtfChars.h" #include "stack.h" #include "thread-inl.h" @@ -116,4 +120,38 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_compiledWithOptimizing(JNIEnv* e return JNI_TRUE; } +extern "C" JNIEXPORT void JNICALL Java_Main_ensureJitCompiled(JNIEnv* env, + jclass, + jclass cls, + jstring method_name) { + jit::Jit* jit = Runtime::Current()->GetJit(); + if (jit == nullptr) { + return; + } + + ScopedObjectAccess soa(Thread::Current()); + + ScopedUtfChars chars(env, method_name); + CHECK(chars.c_str() != nullptr); + + mirror::Class* klass = soa.Decode<mirror::Class*>(cls); + ArtMethod* method = klass->FindDeclaredDirectMethodByName(chars.c_str(), sizeof(void*)); + + jit::JitCodeCache* code_cache = jit->GetCodeCache(); + OatQuickMethodHeader* header = nullptr; + // Make sure there is a profiling info, required by the compiler. + ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true); + while (true) { + header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode()); + if (code_cache->ContainsPc(header->GetCode())) { + break; + } else { + // Sleep to yield to the compiler thread. + usleep(1000); + // Will either ensure it's compiled or do the compilation itself. + jit->CompileMethod(method, soa.Self(), /* osr */ false); + } + } +} + } // namespace art diff --git a/test/run-test b/test/run-test index bbcd4b0f0b..1ef5428726 100755 --- a/test/run-test +++ b/test/run-test @@ -37,7 +37,7 @@ test_dir="test-$$" if [ -z "$TMPDIR" ]; then tmp_dir="/tmp/$USER/${test_dir}" else - tmp_dir="${TMPDIR}/$USER/${test_dir}" + tmp_dir="${TMPDIR}/${test_dir}" fi checker="${progdir}/../tools/checker/checker.py" export JAVA="java" |