diff options
77 files changed, 2002 insertions, 1467 deletions
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index be149af82a..52940687de 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -357,7 +357,7 @@ CompilerDriver::CompilerDriver( compiler_kind_(compiler_kind), instruction_set_(instruction_set), instruction_set_features_(instruction_set_features), - freezing_constructor_lock_("freezing constructor lock"), + requires_constructor_barrier_lock_("constructor barrier lock"), compiled_classes_lock_("compiled classes lock"), compiled_methods_lock_("compiled method lock"), compiled_methods_(MethodTable::key_compare()), @@ -2006,6 +2006,28 @@ static void CheckAndClearResolveException(Thread* self) self->ClearException(); } +bool CompilerDriver::RequiresConstructorBarrier(const DexFile& dex_file, + uint16_t class_def_idx) const { + const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_idx); + const uint8_t* class_data = dex_file.GetClassData(class_def); + if (class_data == nullptr) { + // Empty class such as a marker interface. + return false; + } + ClassDataItemIterator it(dex_file, class_data); + while (it.HasNextStaticField()) { + it.Next(); + } + // We require a constructor barrier if there are final instance fields. + while (it.HasNextInstanceField()) { + if (it.MemberIsFinal()) { + return true; + } + it.Next(); + } + return false; +} + class ResolveClassFieldsAndMethodsVisitor : public CompilationVisitor { public: explicit ResolveClassFieldsAndMethodsVisitor(const ParallelCompilationManager* manager) @@ -2110,9 +2132,10 @@ class ResolveClassFieldsAndMethodsVisitor : public CompilationVisitor { DCHECK(!it.HasNext()); } } - if (requires_constructor_barrier) { - manager_->GetCompiler()->AddRequiresConstructorBarrier(self, &dex_file, class_def_index); - } + manager_->GetCompiler()->SetRequiresConstructorBarrier(self, + &dex_file, + class_def_index, + requires_constructor_barrier); } private: @@ -2769,16 +2792,29 @@ size_t CompilerDriver::GetNonRelativeLinkerPatchCount() const { return non_relative_linker_patch_count_; } -void CompilerDriver::AddRequiresConstructorBarrier(Thread* self, const DexFile* dex_file, - uint16_t class_def_index) { - WriterMutexLock mu(self, freezing_constructor_lock_); - freezing_constructor_classes_.insert(ClassReference(dex_file, class_def_index)); +void CompilerDriver::SetRequiresConstructorBarrier(Thread* self, + const DexFile* dex_file, + uint16_t class_def_index, + bool requires) { + WriterMutexLock mu(self, requires_constructor_barrier_lock_); + requires_constructor_barrier_.emplace(ClassReference(dex_file, class_def_index), requires); } -bool CompilerDriver::RequiresConstructorBarrier(Thread* self, const DexFile* dex_file, - uint16_t class_def_index) const { - ReaderMutexLock mu(self, freezing_constructor_lock_); - return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0; +bool CompilerDriver::RequiresConstructorBarrier(Thread* self, + const DexFile* dex_file, + uint16_t class_def_index) { + ClassReference class_ref(dex_file, class_def_index); + { + ReaderMutexLock mu(self, requires_constructor_barrier_lock_); + auto it = requires_constructor_barrier_.find(class_ref); + if (it != requires_constructor_barrier_.end()) { + return it->second; + } + } + WriterMutexLock mu(self, requires_constructor_barrier_lock_); + const bool requires = RequiresConstructorBarrier(*dex_file, class_def_index); + requires_constructor_barrier_.emplace(class_ref, requires); + return requires; } std::string CompilerDriver::GetMemoryUsageString(bool extended) const { diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 64a06a2f83..905f84dd45 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -183,12 +183,15 @@ class CompilerDriver { // Remove and delete a compiled method. void RemoveCompiledMethod(const MethodReference& method_ref) REQUIRES(!compiled_methods_lock_); - void AddRequiresConstructorBarrier(Thread* self, const DexFile* dex_file, - uint16_t class_def_index) - REQUIRES(!freezing_constructor_lock_); - bool RequiresConstructorBarrier(Thread* self, const DexFile* dex_file, - uint16_t class_def_index) const - REQUIRES(!freezing_constructor_lock_); + void SetRequiresConstructorBarrier(Thread* self, + const DexFile* dex_file, + uint16_t class_def_index, + bool requires) + REQUIRES(!requires_constructor_barrier_lock_); + bool RequiresConstructorBarrier(Thread* self, + const DexFile* dex_file, + uint16_t class_def_index) + REQUIRES(!requires_constructor_barrier_lock_); // Callbacks from compiler to see what runtime checks must be generated. @@ -619,6 +622,8 @@ class CompilerDriver { void FreeThreadPools(); void CheckThreadPools(); + bool RequiresConstructorBarrier(const DexFile& dex_file, uint16_t class_def_idx) const; + const CompilerOptions* const compiler_options_; VerificationResults* const verification_results_; DexFileToMethodInlinerMap* const method_inliner_map_; @@ -629,9 +634,11 @@ class CompilerDriver { const InstructionSet instruction_set_; const InstructionSetFeatures* const instruction_set_features_; - // All class references that require - mutable ReaderWriterMutex freezing_constructor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - std::set<ClassReference> freezing_constructor_classes_ GUARDED_BY(freezing_constructor_lock_); + // All class references that require constructor barriers. If the class reference is not in the + // set then the result has not yet been computed. + mutable ReaderWriterMutex requires_constructor_barrier_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + std::map<ClassReference, bool> requires_constructor_barrier_ + GUARDED_BY(requires_constructor_barrier_lock_); typedef SafeMap<const ClassReference, CompiledClass*> ClassTable; // All class references that this compiler has compiled. diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index c747ffa65b..8bb462c667 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -576,7 +576,16 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { } } else if (object->GetClass<kVerifyNone>()->IsStringClass()) { bin = kBinString; // Strings are almost always immutable (except for object header). - } // else bin = kBinRegular + } else if (object->GetClass<kVerifyNone>() == + Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kJavaLangObject)) { + // Instance of java lang object, probably a lock object. This means it will be dirty when we + // synchronize on it. + bin = kBinMiscDirty; + } else if (object->IsDexCache()) { + // Dex file field becomes dirty when the image is loaded. + bin = kBinMiscDirty; + } + // else bin = kBinRegular } size_t oat_index = GetOatIndex(object); diff --git a/compiler/image_writer.h b/compiler/image_writer.h index f204b28380..0cb6aea9b2 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -149,16 +149,17 @@ class ImageWriter FINAL { void RecordImageAllocations() SHARED_REQUIRES(Locks::mutator_lock_); // Classify different kinds of bins that objects end up getting packed into during image writing. + // Ordered from dirtiest to cleanest (until ArtMethods). enum Bin { - // Likely-clean: - kBinString, // [String] Almost always immutable (except for obj header). + kBinMiscDirty, // Dex caches, object locks, etc... + kBinClassVerified, // Class verified, but initializers haven't been run // Unknown mix of clean/dirty: kBinRegular, - // Likely-dirty: + kBinClassInitialized, // Class initializers have been run // All classes get their own bins since their fields often dirty kBinClassInitializedFinalStatics, // Class initializers have been run, no non-final statics - kBinClassInitialized, // Class initializers have been run - kBinClassVerified, // Class verified, but initializers haven't been run + // Likely-clean: + kBinString, // [String] Almost always immutable (except for obj header). // Add more bins here if we add more segregation code. // Non mirror fields must be below. // ArtFields should be always clean. diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index b0f0893720..06b39680b2 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -567,10 +567,10 @@ void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } -static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, const CompilerDriver& driver) { +static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) { Thread* self = Thread::Current(); return cu->IsConstructor() - && driver.RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex()); + && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex()); } // Returns true if `block` has only one successor which starts at the next @@ -616,7 +616,7 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, if (graph_->ShouldGenerateConstructorBarrier()) { // The compilation unit is null during testing. if (dex_compilation_unit_ != nullptr) { - DCHECK(RequiresConstructorBarrier(dex_compilation_unit_, *compiler_driver_)) + DCHECK(RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) << "Inconsistent use of ShouldGenerateConstructorBarrier. Should not generate a barrier."; } AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc)); diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 3da82851a6..863dd1c6f6 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -124,11 +124,12 @@ public: \ void Set##name() { SetBit(k##name); } \ bool Get##name() const { return IsBitSet(k##name); } \ private: \ -static constexpr int k##name = bit +static constexpr size_t k##name = bit class IntrinsicOptimizations : public ValueObject { public: - explicit IntrinsicOptimizations(HInvoke* invoke) : value_(invoke->GetIntrinsicOptimizations()) {} + explicit IntrinsicOptimizations(HInvoke* invoke) + : value_(invoke->GetIntrinsicOptimizations()) {} explicit IntrinsicOptimizations(const HInvoke& invoke) : value_(invoke.GetIntrinsicOptimizations()) {} @@ -138,15 +139,17 @@ class IntrinsicOptimizations : public ValueObject { protected: bool IsBitSet(uint32_t bit) const { + DCHECK_LT(bit, sizeof(uint32_t) * kBitsPerByte); return (*value_ & (1 << bit)) != 0u; } void SetBit(uint32_t bit) { - *(const_cast<uint32_t*>(value_)) |= (1 << bit); + DCHECK_LT(bit, sizeof(uint32_t) * kBitsPerByte); + *(const_cast<uint32_t* const>(value_)) |= (1 << bit); } private: - const uint32_t *value_; + const uint32_t* const value_; DISALLOW_COPY_AND_ASSIGN(IntrinsicOptimizations); }; @@ -158,7 +161,7 @@ public: \ void Set##name() { SetBit(k##name); } \ bool Get##name() const { return IsBitSet(k##name); } \ private: \ -static constexpr int k##name = bit + kNumberOfGenericOptimizations +static constexpr size_t k##name = bit + kNumberOfGenericOptimizations class StringEqualsOptimizations : public IntrinsicOptimizations { public: diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 4b94c94f39..86b7bc138c 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1394,15 +1394,13 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); codegen_->AddSlowPath(slow_path); - Label ok; + Label conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); - if (!optimizations.GetDestinationIsSource()) { - if (!src_pos.IsConstant() || !dest_pos.IsConstant()) { - __ cmp(src, ShifterOperand(dest)); - } + if (!optimizations.GetDestinationIsSource() && + (!src_pos.IsConstant() || !dest_pos.IsConstant())) { + __ cmp(src, ShifterOperand(dest)); } - // If source and destination are the same, we go to slow path if we need to do // forward copying. if (src_pos.IsConstant()) { @@ -1413,14 +1411,14 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); } else { if (!optimizations.GetDestinationIsSource()) { - __ b(&ok, NE); + __ b(&conditions_on_positions_validated, NE); } __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant)); __ b(slow_path->GetEntryLabel(), GT); } } else { if (!optimizations.GetDestinationIsSource()) { - __ b(&ok, NE); + __ b(&conditions_on_positions_validated, NE); } if (dest_pos.IsConstant()) { int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); @@ -1431,7 +1429,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { __ b(slow_path->GetEntryLabel(), LT); } - __ Bind(&ok); + __ Bind(&conditions_on_positions_validated); if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. @@ -1482,7 +1480,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { bool did_unpoison = false; if (!optimizations.GetDestinationIsNonPrimitiveArray() || !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoisoned them + // One or two of the references need to be unpoisoned. Unpoison them // both to make the identity check valid. __ MaybeUnpoisonHeapReference(temp1); __ MaybeUnpoisonHeapReference(temp2); @@ -1491,6 +1489,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp1->component_type_ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); __ MaybeUnpoisonHeapReference(temp3); @@ -1501,7 +1500,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // Bail out if the source is not a non primitive array. - // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp2->component_type_ __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset); __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); __ MaybeUnpoisonHeapReference(temp3); @@ -1518,8 +1517,10 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { if (!did_unpoison) { __ MaybeUnpoisonHeapReference(temp1); } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); // No need to unpoison the result, we're comparing against null. __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); @@ -1530,8 +1531,10 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = src->klass_ __ LoadFromOffset(kLoadWord, temp1, src, class_offset); __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp3 = temp1->component_type_ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); __ MaybeUnpoisonHeapReference(temp3); @@ -1585,7 +1588,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { temp2, dest, Register(kNoRegister), - /* can_be_null */ false); + /* value_can_be_null */ false); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 927e2ecfbb..04ae3a6732 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1819,39 +1819,32 @@ static void GenSystemArrayCopyAddresses(vixl::MacroAssembler* masm, const Register& dst_base, const Register& src_end) { DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar) - << "Unexpected element type: " - << type; - const int32_t char_size = Primitive::ComponentSize(type); - const int32_t char_size_shift = Primitive::ComponentSizeShift(type); + << "Unexpected element type: " << type; + const int32_t element_size = Primitive::ComponentSize(type); + const int32_t element_size_shift = Primitive::ComponentSizeShift(type); - uint32_t offset = mirror::Array::DataOffset(char_size).Uint32Value(); + uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); if (src_pos.IsConstant()) { int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); - __ Add(src_base, src, char_size * constant + offset); + __ Add(src_base, src, element_size * constant + data_offset); } else { - __ Add(src_base, src, offset); - __ Add(src_base, - src_base, - Operand(XRegisterFrom(src_pos), LSL, char_size_shift)); + __ Add(src_base, src, data_offset); + __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift)); } if (dst_pos.IsConstant()) { int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue(); - __ Add(dst_base, dst, char_size * constant + offset); + __ Add(dst_base, dst, element_size * constant + data_offset); } else { - __ Add(dst_base, dst, offset); - __ Add(dst_base, - dst_base, - Operand(XRegisterFrom(dst_pos), LSL, char_size_shift)); + __ Add(dst_base, dst, data_offset); + __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift)); } if (copy_length.IsConstant()) { int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); - __ Add(src_end, src_base, char_size * constant); + __ Add(src_end, src_base, element_size * constant); } else { - __ Add(src_end, - src_base, - Operand(XRegisterFrom(copy_length), LSL, char_size_shift)); + __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift)); } } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 9ca4ef049a..1d32dc7bc5 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1150,15 +1150,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); codegen_->AddSlowPath(slow_path); - NearLabel ok; + NearLabel conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); - if (!optimizations.GetDestinationIsSource()) { - if (!src_pos.IsConstant() || !dest_pos.IsConstant()) { - __ cmpl(src, dest); - } + if (!optimizations.GetDestinationIsSource() && + (!src_pos.IsConstant() || !dest_pos.IsConstant())) { + __ cmpl(src, dest); } - // If source and destination are the same, we go to slow path if we need to do // forward copying. if (src_pos.IsConstant()) { @@ -1169,14 +1167,14 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); } else { if (!optimizations.GetDestinationIsSource()) { - __ j(kNotEqual, &ok); + __ j(kNotEqual, &conditions_on_positions_validated); } __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant)); __ j(kGreater, slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { - __ j(kNotEqual, &ok); + __ j(kNotEqual, &conditions_on_positions_validated); } if (dest_pos.IsConstant()) { int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); @@ -1188,7 +1186,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } } - __ Bind(&ok); + __ Bind(&conditions_on_positions_validated); if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. @@ -1241,7 +1239,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { bool did_unpoison = false; if (!optimizations.GetDestinationIsNonPrimitiveArray() || !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoisoned them + // One or two of the references need to be unpoisoned. Unpoison them // both to make the identity check valid. __ MaybeUnpoisonHeapReference(temp1); __ MaybeUnpoisonHeapReference(temp2); @@ -1250,6 +1248,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ TMP = temp1->component_type_ __ movl(CpuRegister(TMP), Address(temp1, component_offset)); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, slow_path->GetEntryLabel()); @@ -1260,6 +1259,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ TMP = temp2->component_type_ __ movl(CpuRegister(TMP), Address(temp2, component_offset)); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, slow_path->GetEntryLabel()); @@ -1276,8 +1276,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!did_unpoison) { __ MaybeUnpoisonHeapReference(temp1); } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ __ movl(temp1, Address(temp1, component_offset)); __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ __ movl(temp1, Address(temp1, super_offset)); // No need to unpoison the result, we're comparing against null. __ testl(temp1, temp1); @@ -1289,8 +1291,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = src->klass_ __ movl(temp1, Address(src, class_offset)); __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ TMP = temp1->component_type_ __ movl(CpuRegister(TMP), Address(temp1, component_offset)); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, slow_path->GetEntryLabel()); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 1086cbf503..1afa36a89c 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -535,11 +535,16 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) { } } -void HLoopInformation::PopulateIrreducibleRecursive(HBasicBlock* block) { - if (blocks_.IsBitSet(block->GetBlockId())) { +void HLoopInformation::PopulateIrreducibleRecursive(HBasicBlock* block, ArenaBitVector* finalized) { + size_t block_id = block->GetBlockId(); + + // If `block` is in `finalized`, we know its membership in the loop has been + // decided and it does not need to be revisited. + if (finalized->IsBitSet(block_id)) { return; } + bool is_finalized = false; if (block->IsLoopHeader()) { // If we hit a loop header in an irreducible loop, we first check if the // pre header of that loop belongs to the currently analyzed loop. If it does, @@ -547,26 +552,36 @@ void HLoopInformation::PopulateIrreducibleRecursive(HBasicBlock* block) { // Note that we cannot use GetPreHeader, as the loop may have not been populated // yet. HBasicBlock* pre_header = block->GetPredecessors()[0]; - PopulateIrreducibleRecursive(pre_header); + PopulateIrreducibleRecursive(pre_header, finalized); if (blocks_.IsBitSet(pre_header->GetBlockId())) { - blocks_.SetBit(block->GetBlockId()); block->SetInLoop(this); + blocks_.SetBit(block_id); + finalized->SetBit(block_id); + is_finalized = true; + HLoopInformation* info = block->GetLoopInformation(); for (HBasicBlock* back_edge : info->GetBackEdges()) { - PopulateIrreducibleRecursive(back_edge); + PopulateIrreducibleRecursive(back_edge, finalized); } } } else { // Visit all predecessors. If one predecessor is part of the loop, this // block is also part of this loop. for (HBasicBlock* predecessor : block->GetPredecessors()) { - PopulateIrreducibleRecursive(predecessor); - if (blocks_.IsBitSet(predecessor->GetBlockId())) { - blocks_.SetBit(block->GetBlockId()); + PopulateIrreducibleRecursive(predecessor, finalized); + if (!is_finalized && blocks_.IsBitSet(predecessor->GetBlockId())) { block->SetInLoop(this); + blocks_.SetBit(block_id); + finalized->SetBit(block_id); + is_finalized = true; } } } + + // All predecessors have been recursively visited. Mark finalized if not marked yet. + if (!is_finalized) { + finalized->SetBit(block_id); + } } void HLoopInformation::Populate() { @@ -576,22 +591,37 @@ void HLoopInformation::Populate() { // to end the recursion. // This is a recursive implementation of the algorithm described in // "Advanced Compiler Design & Implementation" (Muchnick) p192. + HGraph* graph = header_->GetGraph(); blocks_.SetBit(header_->GetBlockId()); header_->SetInLoop(this); + + bool is_irreducible_loop = false; for (HBasicBlock* back_edge : GetBackEdges()) { DCHECK(back_edge->GetDominator() != nullptr); if (!header_->Dominates(back_edge)) { - irreducible_ = true; - header_->GetGraph()->SetHasIrreducibleLoops(true); - PopulateIrreducibleRecursive(back_edge); - } else { - if (header_->GetGraph()->IsCompilingOsr()) { - irreducible_ = true; - header_->GetGraph()->SetHasIrreducibleLoops(true); - } + is_irreducible_loop = true; + break; + } + } + + if (is_irreducible_loop) { + ArenaBitVector visited(graph->GetArena(), + graph->GetBlocks().size(), + /* expandable */ false, + kArenaAllocGraphBuilder); + for (HBasicBlock* back_edge : GetBackEdges()) { + PopulateIrreducibleRecursive(back_edge, &visited); + } + } else { + for (HBasicBlock* back_edge : GetBackEdges()) { PopulateRecursive(back_edge); } } + + if (is_irreducible_loop || graph->IsCompilingOsr()) { + irreducible_ = true; + graph->SetHasIrreducibleLoops(true); + } } HBasicBlock* HLoopInformation::GetPreHeader() const { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 0088fed62a..dc5a8fa9cb 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -727,7 +727,7 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); - void PopulateIrreducibleRecursive(HBasicBlock* block); + void PopulateIrreducibleRecursive(HBasicBlock* block, ArenaBitVector* finalized); HBasicBlock* header_; HSuspendCheck* suspend_check_; diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 0889098fa6..370583e3ba 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -68,6 +68,7 @@ #include "mirror/class_loader.h" #include "mirror/object-inl.h" #include "mirror/object_array-inl.h" +#include "oat_file_assistant.h" #include "oat_writer.h" #include "os.h" #include "runtime.h" @@ -1325,7 +1326,7 @@ class Dex2Oat FINAL { TimingLogger::ScopedTiming t3("Loading image checksum", timings_); std::vector<gc::space::ImageSpace*> image_spaces = Runtime::Current()->GetHeap()->GetBootImageSpaces(); - image_file_location_oat_checksum_ = image_spaces[0]->GetImageHeader().GetOatChecksum(); + image_file_location_oat_checksum_ = OatFileAssistant::CalculateCombinedImageChecksum(); image_file_location_oat_data_begin_ = reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin()); image_patch_delta_ = image_spaces[0]->GetImageHeader().GetPatchDelta(); diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc index 811d9fd025..cbd0c40b5a 100644 --- a/imgdiag/imgdiag.cc +++ b/imgdiag/imgdiag.cc @@ -18,6 +18,7 @@ #include <stdlib.h> #include <fstream> +#include <functional> #include <iostream> #include <string> #include <vector> @@ -189,6 +190,28 @@ class ImgDiagDumper { return oss.str(); } + // Aggregate and detail class data from an image diff. + struct ClassData { + int dirty_object_count = 0; + + // Track only the byte-per-byte dirtiness (in bytes) + int dirty_object_byte_count = 0; + + // Track the object-by-object dirtiness (in bytes) + int dirty_object_size_in_bytes = 0; + + int clean_object_count = 0; + + std::string descriptor; + + int false_dirty_byte_count = 0; + int false_dirty_object_count = 0; + std::vector<mirror::Object*> false_dirty_objects; + + // Remote pointers to dirty objects + std::vector<mirror::Object*> dirty_objects; + }; + // Look at /proc/$pid/mem and only diff the things from there bool DumpImageDiffMap(pid_t image_diff_pid, const backtrace_map_t& boot_map) SHARED_REQUIRES(Locks::mutator_lock_) { @@ -373,16 +396,10 @@ class ImgDiagDumper { } } + std::map<mirror::Class*, ClassData> class_data; + // Walk each object in the remote image space and compare it against ours size_t different_objects = 0; - std::map<mirror::Class*, int /*count*/> dirty_object_class_map; - // Track only the byte-per-byte dirtiness (in bytes) - std::map<mirror::Class*, int /*byte_count*/> dirty_object_byte_count; - // Track the object-by-object dirtiness (in bytes) - std::map<mirror::Class*, int /*byte_count*/> dirty_object_size_in_bytes; - std::map<mirror::Class*, int /*count*/> clean_object_class_map; - - std::map<mirror::Class*, std::string> class_to_descriptor_map; std::map<off_t /* field offset */, int /* count */> art_method_field_dirty_count; std::vector<ArtMethod*> art_method_dirty_objects; @@ -392,19 +409,15 @@ class ImgDiagDumper { // List of local objects that are clean, but located on dirty pages. std::vector<mirror::Object*> false_dirty_objects; - std::map<mirror::Class*, int /*byte_count*/> false_dirty_byte_count; - std::map<mirror::Class*, int /*object_count*/> false_dirty_object_count; - std::map<mirror::Class*, std::vector<mirror::Object*>> false_dirty_objects_map; size_t false_dirty_object_bytes = 0; - // Remote pointers to dirty objects - std::map<mirror::Class*, std::vector<mirror::Object*>> dirty_objects_by_class; // Look up remote classes by their descriptor std::map<std::string, mirror::Class*> remote_class_map; // Look up local classes by their descriptor std::map<std::string, mirror::Class*> local_class_map; - std::unordered_set<mirror::Object*> dirty_objects; + // Use set to have sorted output. + std::set<mirror::Object*> dirty_objects; size_t dirty_object_bytes = 0; const uint8_t* begin_image_ptr = image_begin_unaligned; @@ -453,7 +466,7 @@ class ImgDiagDumper { dirty_object_bytes += obj->SizeOf(); dirty_objects.insert(obj); - ++dirty_object_class_map[klass]; + ++class_data[klass].dirty_object_count; // Go byte-by-byte and figure out what exactly got dirtied size_t dirty_byte_count_per_object = 0; @@ -462,14 +475,14 @@ class ImgDiagDumper { dirty_byte_count_per_object++; } } - dirty_object_byte_count[klass] += dirty_byte_count_per_object; - dirty_object_size_in_bytes[klass] += obj->SizeOf(); + class_data[klass].dirty_object_byte_count += dirty_byte_count_per_object; + class_data[klass].dirty_object_size_in_bytes += obj->SizeOf(); different_object = true; - dirty_objects_by_class[klass].push_back(remote_obj); + class_data[klass].dirty_objects.push_back(remote_obj); } else { - ++clean_object_class_map[klass]; + ++class_data[klass].clean_object_count; } std::string descriptor = GetClassDescriptor(klass); @@ -503,10 +516,10 @@ class ImgDiagDumper { // This object was either never mutated or got mutated back to the same value. // TODO: Do I want to distinguish a "different" vs a "dirty" page here? false_dirty_objects.push_back(obj); - false_dirty_objects_map[klass].push_back(obj); + class_data[klass].false_dirty_objects.push_back(obj); false_dirty_object_bytes += obj->SizeOf(); - false_dirty_byte_count[obj->GetClass()] += obj->SizeOf(); - false_dirty_object_count[obj->GetClass()] += 1; + class_data[obj->GetClass()].false_dirty_byte_count += obj->SizeOf(); + class_data[obj->GetClass()].false_dirty_object_count += 1; } if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) { @@ -515,7 +528,7 @@ class ImgDiagDumper { } // Unconditionally store the class descriptor in case we need it later - class_to_descriptor_map[klass] = descriptor; + class_data[klass].descriptor = descriptor; current += RoundUp(obj->SizeOf(), kObjectAlignment); } @@ -540,8 +553,10 @@ class ImgDiagDumper { << ""; // vector of pairs (int count, Class*) - auto dirty_object_class_values = SortByValueDesc(dirty_object_class_map); - auto clean_object_class_values = SortByValueDesc(clean_object_class_map); + auto dirty_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>( + class_data, [](const ClassData& d) { return d.dirty_object_count; }); + auto clean_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>( + class_data, [](const ClassData& d) { return d.clean_object_count; }); os << "\n" << " Dirty objects: " << dirty_objects.size() << "\n"; for (mirror::Object* obj : dirty_objects) { @@ -612,10 +627,11 @@ class ImgDiagDumper { for (const auto& vk_pair : dirty_object_class_values) { int dirty_object_count = vk_pair.first; mirror::Class* klass = vk_pair.second; - int object_sizes = dirty_object_size_in_bytes[klass]; - float avg_dirty_bytes_per_class = dirty_object_byte_count[klass] * 1.0f / object_sizes; + int object_sizes = class_data[klass].dirty_object_size_in_bytes; + float avg_dirty_bytes_per_class = + class_data[klass].dirty_object_byte_count * 1.0f / object_sizes; float avg_object_size = object_sizes * 1.0f / dirty_object_count; - const std::string& descriptor = class_to_descriptor_map[klass]; + const std::string& descriptor = class_data[klass].descriptor; os << " " << PrettyClass(klass) << " (" << "objects: " << dirty_object_count << ", " << "avg dirty bytes: " << avg_dirty_bytes_per_class << ", " @@ -634,7 +650,8 @@ class ImgDiagDumper { os << "\n"; os << " dirty byte +offset:count list = "; - auto art_method_field_dirty_count_sorted = SortByValueDesc(art_method_field_dirty_count); + auto art_method_field_dirty_count_sorted = + SortByValueDesc<off_t, int, int>(art_method_field_dirty_count); for (auto pair : art_method_field_dirty_count_sorted) { off_t offset = pair.second; int count = pair.first; @@ -645,7 +662,7 @@ class ImgDiagDumper { os << "\n"; os << " field contents:\n"; - const auto& dirty_objects_list = dirty_objects_by_class[klass]; + const auto& dirty_objects_list = class_data[klass].dirty_objects; for (mirror::Object* obj : dirty_objects_list) { // remote method auto art_method = reinterpret_cast<ArtMethod*>(obj); @@ -684,7 +701,8 @@ class ImgDiagDumper { os << "\n"; os << " dirty byte +offset:count list = "; - auto class_field_dirty_count_sorted = SortByValueDesc(class_field_dirty_count); + auto class_field_dirty_count_sorted = + SortByValueDesc<off_t, int, int>(class_field_dirty_count); for (auto pair : class_field_dirty_count_sorted) { off_t offset = pair.second; int count = pair.first; @@ -694,7 +712,7 @@ class ImgDiagDumper { os << "\n"; os << " field contents:\n"; - const auto& dirty_objects_list = dirty_objects_by_class[klass]; + const auto& dirty_objects_list = class_data[klass].dirty_objects; for (mirror::Object* obj : dirty_objects_list) { // remote class object auto remote_klass = reinterpret_cast<mirror::Class*>(obj); @@ -712,15 +730,16 @@ class ImgDiagDumper { } } - auto false_dirty_object_class_values = SortByValueDesc(false_dirty_object_count); + auto false_dirty_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>( + class_data, [](const ClassData& d) { return d.false_dirty_object_count; }); os << "\n" << " False-dirty object count by class:\n"; for (const auto& vk_pair : false_dirty_object_class_values) { int object_count = vk_pair.first; mirror::Class* klass = vk_pair.second; - int object_sizes = false_dirty_byte_count[klass]; + int object_sizes = class_data[klass].false_dirty_byte_count; float avg_object_size = object_sizes * 1.0f / object_count; - const std::string& descriptor = class_to_descriptor_map[klass]; + const std::string& descriptor = class_data[klass].descriptor; os << " " << PrettyClass(klass) << " (" << "objects: " << object_count << ", " << "avg object size: " << avg_object_size << ", " @@ -729,7 +748,7 @@ class ImgDiagDumper { << ")\n"; if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) { - auto& art_method_false_dirty_objects = false_dirty_objects_map[klass]; + auto& art_method_false_dirty_objects = class_data[klass].false_dirty_objects; os << " field contents:\n"; for (mirror::Object* obj : art_method_false_dirty_objects) { @@ -808,14 +827,16 @@ class ImgDiagDumper { return std::string(descriptor_str); } - template <typename K, typename V> - static std::vector<std::pair<V, K>> SortByValueDesc(const std::map<K, V> map) { + template <typename K, typename V, typename D> + static std::vector<std::pair<V, K>> SortByValueDesc( + const std::map<K, D> map, + std::function<V(const D&)> value_mapper = [](const D& d) { return static_cast<V>(d); }) { // Store value->key so that we can use the default sort from pair which // sorts by value first and then key std::vector<std::pair<V, K>> value_key_vector; for (const auto& kv_pair : map) { - value_key_vector.push_back(std::make_pair(kv_pair.second, kv_pair.first)); + value_key_vector.push_back(std::make_pair(value_mapper(kv_pair.second), kv_pair.first)); } // Sort in reverse (descending order) diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index b673eff9ad..3c6a05d97b 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -1052,22 +1052,6 @@ class OatDumper { } } - void DumpInformationAtOffset(VariableIndentationOutputStream* vios, - const OatFile::OatMethod& oat_method, - const DexFile::CodeItem* code_item, - size_t offset, - bool suspend_point_mapping) { - if (!IsMethodGeneratedByOptimizingCompiler(oat_method, code_item)) { - // Native method. - return; - } - if (suspend_point_mapping) { - ScopedIndentation indent1(vios); - DumpDexRegisterMapAtOffset(vios, oat_method, code_item, offset); - } - } - - void DumpDexCode(std::ostream& os, const DexFile& dex_file, const DexFile::CodeItem* code_item) { if (code_item != nullptr) { size_t i = 0; @@ -1104,27 +1088,6 @@ class OatDumper { code_item != nullptr; } - void DumpDexRegisterMapAtOffset(VariableIndentationOutputStream* vios, - const OatFile::OatMethod& oat_method, - const DexFile::CodeItem* code_item, - size_t offset) { - // This method is only relevant for oat methods compiled with the - // optimizing compiler. - DCHECK(IsMethodGeneratedByOptimizingCompiler(oat_method, code_item)); - - // The optimizing compiler outputs its CodeInfo data in the vmap table. - const void* raw_code_info = oat_method.GetVmapTable(); - if (raw_code_info != nullptr) { - CodeInfo code_info(raw_code_info); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - StackMap stack_map = code_info.GetStackMapForNativePcOffset(offset, encoding); - if (stack_map.IsValid()) { - stack_map.Dump(vios, code_info, encoding, oat_method.GetCodeOffset(), - code_item->registers_size_); - } - } - } - verifier::MethodVerifier* DumpVerifier(VariableIndentationOutputStream* vios, StackHandleScope<1>* hs, uint32_t dex_method_idx, @@ -1147,6 +1110,91 @@ class OatDumper { return nullptr; } + // The StackMapsHelper provides the stack maps in the native PC order. + // For identical native PCs, the order from the CodeInfo is preserved. + class StackMapsHelper { + public: + explicit StackMapsHelper(const uint8_t* raw_code_info) + : code_info_(raw_code_info), + encoding_(code_info_.ExtractEncoding()), + number_of_stack_maps_(code_info_.GetNumberOfStackMaps(encoding_)), + indexes_(), + offset_(static_cast<size_t>(-1)), + stack_map_index_(0u) { + if (number_of_stack_maps_ != 0u) { + // Check if native PCs are ordered. + bool ordered = true; + StackMap last = code_info_.GetStackMapAt(0u, encoding_); + for (size_t i = 1; i != number_of_stack_maps_; ++i) { + StackMap current = code_info_.GetStackMapAt(i, encoding_); + if (last.GetNativePcOffset(encoding_.stack_map_encoding) > + current.GetNativePcOffset(encoding_.stack_map_encoding)) { + ordered = false; + break; + } + last = current; + } + if (!ordered) { + // Create indirection indexes for access in native PC order. We do not optimize + // for the fact that there can currently be only two separately ordered ranges, + // namely normal stack maps and catch-point stack maps. + indexes_.resize(number_of_stack_maps_); + std::iota(indexes_.begin(), indexes_.end(), 0u); + std::sort(indexes_.begin(), + indexes_.end(), + [this](size_t lhs, size_t rhs) { + StackMap left = code_info_.GetStackMapAt(lhs, encoding_); + uint32_t left_pc = left.GetNativePcOffset(encoding_.stack_map_encoding); + StackMap right = code_info_.GetStackMapAt(rhs, encoding_); + uint32_t right_pc = right.GetNativePcOffset(encoding_.stack_map_encoding); + // If the PCs are the same, compare indexes to preserve the original order. + return (left_pc < right_pc) || (left_pc == right_pc && lhs < rhs); + }); + } + offset_ = GetStackMapAt(0).GetNativePcOffset(encoding_.stack_map_encoding); + } + } + + const CodeInfo& GetCodeInfo() const { + return code_info_; + } + + const CodeInfoEncoding& GetEncoding() const { + return encoding_; + } + + size_t GetOffset() const { + return offset_; + } + + StackMap GetStackMap() const { + return GetStackMapAt(stack_map_index_); + } + + void Next() { + ++stack_map_index_; + offset_ = (stack_map_index_ == number_of_stack_maps_) + ? static_cast<size_t>(-1) + : GetStackMapAt(stack_map_index_).GetNativePcOffset(encoding_.stack_map_encoding); + } + + private: + StackMap GetStackMapAt(size_t i) const { + if (!indexes_.empty()) { + i = indexes_[i]; + } + DCHECK_LT(i, number_of_stack_maps_); + return code_info_.GetStackMapAt(i, encoding_); + } + + const CodeInfo code_info_; + const CodeInfoEncoding encoding_; + const size_t number_of_stack_maps_; + dchecked_vector<size_t> indexes_; // Used if stack map native PCs are not ordered. + size_t offset_; + size_t stack_map_index_; + }; + void DumpCode(VariableIndentationOutputStream* vios, const OatFile::OatMethod& oat_method, const DexFile::CodeItem* code_item, bool bad_input, size_t code_size) { @@ -1158,17 +1206,34 @@ class OatDumper { if (code_size == 0 || quick_code == nullptr) { vios->Stream() << "NO CODE!\n"; return; - } else { + } else if (!bad_input && IsMethodGeneratedByOptimizingCompiler(oat_method, code_item)) { + // The optimizing compiler outputs its CodeInfo data in the vmap table. + StackMapsHelper helper(oat_method.GetVmapTable()); const uint8_t* quick_native_pc = reinterpret_cast<const uint8_t*>(quick_code); size_t offset = 0; while (offset < code_size) { - if (!bad_input) { - DumpInformationAtOffset(vios, oat_method, code_item, offset, false); - } offset += disassembler_->Dump(vios->Stream(), quick_native_pc + offset); - if (!bad_input) { - DumpInformationAtOffset(vios, oat_method, code_item, offset, true); + if (offset == helper.GetOffset()) { + ScopedIndentation indent1(vios); + StackMap stack_map = helper.GetStackMap(); + DCHECK(stack_map.IsValid()); + stack_map.Dump(vios, + helper.GetCodeInfo(), + helper.GetEncoding(), + oat_method.GetCodeOffset(), + code_item->registers_size_); + do { + helper.Next(); + // There may be multiple stack maps at a given PC. We display only the first one. + } while (offset == helper.GetOffset()); } + DCHECK_LT(offset, helper.GetOffset()); + } + } else { + const uint8_t* quick_native_pc = reinterpret_cast<const uint8_t*>(quick_code); + size_t offset = 0; + while (offset < code_size) { + offset += disassembler_->Dump(vios->Stream(), quick_native_pc + offset); } } } diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 5a901f1e46..da7db1dbb4 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -1247,7 +1247,7 @@ ENTRY art_quick_imt_conflict_trampoline ldr r4, [r0, #(2 * __SIZEOF_POINTER__)]! b .Limt_table_iterate .Limt_table_found: - // We successuflly hit an entry in the table. Load the target method + // We successfully hit an entry in the table. Load the target method // and jump to it. ldr r0, [r0, #__SIZEOF_POINTER__] ldr pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32] diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 3d59d6d2df..1cdda2d19e 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1824,7 +1824,7 @@ ENTRY art_quick_imt_conflict_trampoline ldr x0, [xIP1, #(2 * __SIZEOF_POINTER__)]! b .Limt_table_iterate .Limt_table_found: - // We successuflly hit an entry in the table. Load the target method + // We successfully hit an entry in the table. Load the target method // and jump to it. ldr x0, [xIP1, #__SIZEOF_POINTER__] ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64] diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index fd1851f2fc..8939a488e9 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -319,6 +319,111 @@ .endm /* + * On stack replacement stub. + * On entry: + * a0 = stack to copy + * a1 = size of stack + * a2 = pc to call + * a3 = JValue* result + * [sp + 16] = shorty + * [sp + 20] = thread + */ +ENTRY art_quick_osr_stub + // Save callee general purpose registers, RA and GP. + addiu $sp, $sp, -48 + .cfi_adjust_cfa_offset 48 + sw $ra, 44($sp) + .cfi_rel_offset 31, 44 + sw $s8, 40($sp) + .cfi_rel_offset 30, 40 + sw $gp, 36($sp) + .cfi_rel_offset 28, 36 + sw $s7, 32($sp) + .cfi_rel_offset 23, 32 + sw $s6, 28($sp) + .cfi_rel_offset 22, 28 + sw $s5, 24($sp) + .cfi_rel_offset 21, 24 + sw $s4, 20($sp) + .cfi_rel_offset 20, 20 + sw $s3, 16($sp) + .cfi_rel_offset 19, 16 + sw $s2, 12($sp) + .cfi_rel_offset 18, 12 + sw $s1, 8($sp) + .cfi_rel_offset 17, 8 + sw $s0, 4($sp) + .cfi_rel_offset 16, 4 + + move $s8, $sp # Save the stack pointer + move $s7, $a1 # Save size of stack + move $s6, $a2 # Save the pc to call + lw rSELF, 48+20($sp) # Save managed thread pointer into rSELF + addiu $t0, $sp, -12 # Reserve space for stack pointer, + # JValue* result, and ArtMethod* slot. + srl $t0, $t0, 4 # Align stack pointer to 16 bytes + sll $sp, $t0, 4 # Update stack pointer + sw $s8, 4($sp) # Save old stack pointer + sw $a3, 8($sp) # Save JValue* result + sw $zero, 0($sp) # Store null for ArtMethod* at bottom of frame + subu $sp, $a1 # Reserve space for callee stack + move $a2, $a1 + move $a1, $a0 + move $a0, $sp + la $t9, memcpy + jalr $t9 # memcpy (dest a0, src a1, bytes a2) + addiu $sp, $sp, -16 # make space for argument slots for memcpy + bal .Losr_entry # Call the method + addiu $sp, $sp, 16 # restore stack after memcpy + lw $a2, 8($sp) # Restore JValue* result + lw $sp, 4($sp) # Restore saved stack pointer + lw $a0, 48+16($sp) # load shorty + lbu $a0, 0($a0) # load return type + li $a1, 'D' # put char 'D' into a1 + beq $a0, $a1, .Losr_fp_result # Test if result type char == 'D' + li $a1, 'F' # put char 'F' into a1 + beq $a0, $a1, .Losr_fp_result # Test if result type char == 'F' + nop + sw $v0, 0($a2) + b .Losr_exit + sw $v1, 4($a2) # store v0/v1 into result +.Losr_fp_result: + SDu $f0, $f1, 0, $a2, $t0 # store f0/f1 into result +.Losr_exit: + lw $ra, 44($sp) + .cfi_restore 31 + lw $s8, 40($sp) + .cfi_restore 30 + lw $gp, 36($sp) + .cfi_restore 28 + lw $s7, 32($sp) + .cfi_restore 23 + lw $s6, 28($sp) + .cfi_restore 22 + lw $s5, 24($sp) + .cfi_restore 21 + lw $s4, 20($sp) + .cfi_restore 20 + lw $s3, 16($sp) + .cfi_restore 19 + lw $s2, 12($sp) + .cfi_restore 18 + lw $s1, 8($sp) + .cfi_restore 17 + lw $s0, 4($sp) + .cfi_restore 16 + jalr $zero, $ra + addiu $sp, $sp, 48 + .cfi_adjust_cfa_offset -48 +.Losr_entry: + addiu $s7, $s7, -4 + addu $t0, $s7, $sp + move $t9, $s6 + jalr $zero, $t9 + sw $ra, 0($t0) # Store RA per the compiler ABI +END art_quick_osr_stub + + /* * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_ * FIXME: just guessing about the shape of the jmpbuf. Where will pc be? */ @@ -1540,11 +1645,41 @@ ENTRY art_quick_proxy_invoke_handler END art_quick_proxy_invoke_handler /* - * Called to resolve an imt conflict. t0 is a hidden argument that holds the target method's - * dex method index. + * Called to resolve an imt conflict. + * a0 is the conflict ArtMethod. + * t0 is a hidden argument that holds the target interface method's dex method index. + * + * Note that this stub writes to a0, t0 and t1. */ ENTRY art_quick_imt_conflict_trampoline - move $a0, $t0 + lw $t1, 0($sp) # Load referrer. + lw $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t1) # Load dex cache methods array. + sll $t0, $t0, POINTER_SIZE_SHIFT # Calculate offset. + addu $t0, $t1, $t0 # Add offset to base. + lw $t0, 0($t0) # Load interface method. + lw $a0, ART_METHOD_JNI_OFFSET_32($a0) # Load ImtConflictTable. + +.Limt_table_iterate: + lw $t1, 0($a0) # Load next entry in ImtConflictTable. + # Branch if found. + beq $t1, $t0, .Limt_table_found + nop + # If the entry is null, the interface method is not in the ImtConflictTable. + beqz $t1, .Lconflict_trampoline + nop + # Iterate over the entries of the ImtConflictTable. + b .Limt_table_iterate + addiu $a0, $a0, 2 * __SIZEOF_POINTER__ # Iterate to the next entry. + +.Limt_table_found: + # We successfully hit an entry in the table. Load the target method and jump to it. + lw $a0, __SIZEOF_POINTER__($a0) + lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0) + jr $t9 + nop + +.Lconflict_trampoline: + # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method. INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline END art_quick_imt_conflict_trampoline diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index f1e605ac4a..5d0c94c637 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -358,6 +358,138 @@ .endm /* + * On stack replacement stub. + * On entry: + * a0 = stack to copy + * a1 = size of stack + * a2 = pc to call + * a3 = JValue* result + * a4 = shorty + * a5 = thread + */ +ENTRY art_quick_osr_stub + move $t0, $sp # save stack pointer + daddiu $t1, $sp, -112 # reserve stack space + dsrl $t1, $t1, 4 # enforce 16 byte stack alignment + dsll $sp, $t1, 4 # update stack pointer + + // Save callee general purpose registers, SP, T8(GP), RA, A3, and A4 (8x14 bytes) + sd $ra, 104($sp) + .cfi_rel_offset 31, 104 + sd $s8, 96($sp) + .cfi_rel_offset 30, 96 + sd $t0, 88($sp) # save original stack pointer stored in t0 + .cfi_rel_offset 29, 88 + sd $t8, 80($sp) # t8 holds caller's gp, now save it to the stack. + .cfi_rel_offset 28, 80 # Value from gp is pushed, so set the cfi offset accordingly. + sd $s7, 72($sp) + .cfi_rel_offset 23, 72 + sd $s6, 64($sp) + .cfi_rel_offset 22, 64 + sd $s5, 56($sp) + .cfi_rel_offset 21, 56 + sd $s4, 48($sp) + .cfi_rel_offset 20, 48 + sd $s3, 40($sp) + .cfi_rel_offset 19, 40 + sd $s2, 32($sp) + .cfi_rel_offset 18, 32 + sd $s1, 24($sp) + .cfi_rel_offset 17, 24 + sd $s0, 16($sp) + .cfi_rel_offset 16, 16 + sd $a4, 8($sp) + .cfi_rel_offset 8, 8 + sd $a3, 0($sp) + .cfi_rel_offset 7, 0 + move rSELF, $a5 # Save managed thread pointer into rSELF + + daddiu $sp, $sp, -16 + jal .Losr_entry + sd $zero, 0($sp) # Store null for ArtMethod* at bottom of frame + daddiu $sp, $sp, 16 + + // Restore return value address and shorty address + ld $a4, 8($sp) # shorty address + .cfi_restore 8 + ld $a3, 0($sp) # result value address + .cfi_restore 7 + + lbu $t1, 0($a4) # load return type + li $t2, 'D' # put char 'D' into t2 + beq $t1, $t2, .Losr_fp_result # branch if result type char == 'D' + li $t2, 'F' # put char 'F' into t2 + beq $t1, $t2, .Losr_fp_result # branch if result type char == 'F' + nop + b .Losr_exit + dsrl $v1, $v0, 32 # put high half of result in v1 +.Losr_fp_result: + mfc1 $v0, $f0 + mfhc1 $v1, $f0 # put high half of FP result in v1 +.Losr_exit: + sw $v0, 0($a3) # store low half of result + sw $v1, 4($a3) # store high half of result + + // Restore callee registers + ld $ra, 104($sp) + .cfi_restore 31 + ld $s8, 96($sp) + .cfi_restore 30 + ld $t0, 88($sp) # save SP into t0 for now + .cfi_restore 29 + ld $t8, 80($sp) # Restore gp back to it's temp storage. + .cfi_restore 28 + ld $s7, 72($sp) + .cfi_restore 23 + ld $s6, 64($sp) + .cfi_restore 22 + ld $s5, 56($sp) + .cfi_restore 21 + ld $s4, 48($sp) + .cfi_restore 20 + ld $s3, 40($sp) + .cfi_restore 19 + ld $s2, 32($sp) + .cfi_restore 18 + ld $s1, 24($sp) + .cfi_restore 17 + ld $s0, 16($sp) + .cfi_restore 16 + jalr $zero, $ra + move $sp, $t0 + +.Losr_entry: + dsubu $sp, $sp, $a1 # Reserve space for callee stack + daddiu $a1, $a1, -8 + daddu $t0, $a1, $sp + sw $ra, 0($t0) # Store low half of RA per compiler ABI + dsrl $t1, $ra, 32 + sw $t1, 4($t0) # Store high half of RA per compiler ABI + + // Copy arguments into callee stack + // Use simple copy routine for now. + // 4 bytes per slot. + // a0 = source address + // a1 = args length in bytes (does not include 8 bytes for RA) + // sp = destination address + beqz $a1, .Losr_loop_exit + daddiu $a1, $a1, -4 + daddu $t1, $a0, $a1 + daddu $t2, $sp, $a1 +.Losr_loop_entry: + lw $t0, 0($t1) + daddiu $t1, $t1, -4 + sw $t0, 0($t2) + bne $sp, $t2, .Losr_loop_entry + daddiu $t2, $t2, -4 + +.Losr_loop_exit: + move $t9, $a2 + jalr $zero, $t9 # Jump to the OSR entry point. + nop +END art_quick_osr_stub + + /* * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_ * FIXME: just guessing about the shape of the jmpbuf. Where will pc be? */ @@ -1534,11 +1666,40 @@ ENTRY art_quick_proxy_invoke_handler END art_quick_proxy_invoke_handler /* - * Called to resolve an imt conflict. t0 is a hidden argument that holds the target method's - * dex method index. + * Called to resolve an imt conflict. + * a0 is the conflict ArtMethod. + * t0 is a hidden argument that holds the target interface method's dex method index. + * + * Mote that this stub writes to a0, t0 and t1. */ ENTRY art_quick_imt_conflict_trampoline - move $a0, $t0 + ld $t1, 0($sp) # Load referrer. + ld $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_64($t1) # Load dex cache methods array. + dsll $t0, $t0, POINTER_SIZE_SHIFT # Calculate offset. + daddu $t0, $t1, $t0 # Add offset to base. + ld $t0, 0($t0) # Load interface method. + ld $a0, ART_METHOD_JNI_OFFSET_64($a0) # Load ImtConflictTable. + +.Limt_table_iterate: + ld $t1, 0($a0) # Load next entry in ImtConflictTable. + # Branch if found. + beq $t1, $t0, .Limt_table_found + nop + # If the entry is null, the interface method is not in the ImtConflictTable. + beqzc $t1, .Lconflict_trampoline + # Iterate over the entries of the ImtConflictTable. + daddiu $a0, $a0, 2 * __SIZEOF_POINTER__ # Iterate to the next entry. + bc .Limt_table_iterate + +.Limt_table_found: + # We successfully hit an entry in the table. Load the target method and jump to it. + ld $a0, __SIZEOF_POINTER__($a0) + ld $t9, ART_METHOD_QUICK_CODE_OFFSET_64($a0) + jr $t9 + .cpreturn # Restore gp from t8 in branch delay slot. + +.Lconflict_trampoline: + # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method. INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline END art_quick_imt_conflict_trampoline diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 82ac5749b2..551ec6880d 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1422,7 +1422,7 @@ DEFINE_FUNCTION art_quick_imt_conflict_trampoline .Limt_table_iterate: cmpl %edi, 0(%eax) jne .Limt_table_next_entry - // We successuflly hit an entry in the table. Load the target method + // We successfully hit an entry in the table. Load the target method // and jump to it. POP EDI movl __SIZEOF_POINTER__(%eax), %eax diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 90049cc748..26e668e7ae 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1331,7 +1331,7 @@ DEFINE_FUNCTION art_quick_imt_conflict_trampoline .Limt_table_iterate: cmpq %r10, 0(%rdi) jne .Limt_table_next_entry - // We successuflly hit an entry in the table. Load the target method + // We successfully hit an entry in the table. Load the target method // and jump to it. movq __SIZEOF_POINTER__(%rdi), %rdi jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) diff --git a/runtime/art_method.h b/runtime/art_method.h index 3dbcd58f05..d1ef019428 100644 --- a/runtime/art_method.h +++ b/runtime/art_method.h @@ -545,6 +545,9 @@ class ArtMethod FINAL { ALWAYS_INLINE GcRoot<mirror::Class>* GetDexCacheResolvedTypes(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_); + // Note, hotness_counter_ updates are non-atomic but it doesn't need to be precise. Also, + // given that the counter is only 16 bits wide we can expect wrap-around in some + // situations. Consumers of hotness_count_ must be able to deal with that. uint16_t IncrementCounter() { return ++hotness_count_; } @@ -553,6 +556,14 @@ class ArtMethod FINAL { hotness_count_ = 0; } + void SetCounter(int16_t hotness_count) { + hotness_count_ = hotness_count; + } + + uint16_t GetCounter() const { + return hotness_count_; + } + const uint8_t* GetQuickenedInfo() SHARED_REQUIRES(Locks::mutator_lock_); // Returns the method header for the compiled code containing 'pc'. Note that runtime @@ -597,7 +608,7 @@ class ArtMethod FINAL { // ifTable. uint16_t method_index_; - // The hotness we measure for this method. Incremented by the interpreter. Not atomic, as we allow + // The hotness we measure for this method. Managed by the interpreter. Not atomic, as we allow // missing increments: if the method is hot, we will see it eventually. uint16_t hotness_count_; diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 942f9de0b5..d27d2f6c91 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -20,6 +20,7 @@ #if defined(__cplusplus) #include "art_method.h" #include "gc/allocator/rosalloc.h" +#include "jit/jit_instrumentation.h" #include "lock_word.h" #include "mirror/class.h" #include "mirror/string.h" @@ -188,7 +189,13 @@ ADD_TEST_EQ(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET, #define SHADOWFRAME_DEX_PC_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 4) ADD_TEST_EQ(SHADOWFRAME_DEX_PC_OFFSET, static_cast<int32_t>(art::ShadowFrame::DexPCOffset())) -#define SHADOWFRAME_VREGS_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 8) +#define SHADOWFRAME_CACHED_HOTNESS_COUNTDOWN_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 8) +ADD_TEST_EQ(SHADOWFRAME_CACHED_HOTNESS_COUNTDOWN_OFFSET, + static_cast<int32_t>(art::ShadowFrame::CachedHotnessCountdownOffset())) +#define SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 10) +ADD_TEST_EQ(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET, + static_cast<int32_t>(art::ShadowFrame::HotnessCountdownOffset())) +#define SHADOWFRAME_VREGS_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 12) ADD_TEST_EQ(SHADOWFRAME_VREGS_OFFSET, static_cast<int32_t>(art::ShadowFrame::VRegsOffset())) @@ -389,6 +396,12 @@ ADD_TEST_EQ(THREAD_SUSPEND_REQUEST, static_cast<int32_t>(art::kSuspendRequest)) #define THREAD_CHECKPOINT_REQUEST 2 ADD_TEST_EQ(THREAD_CHECKPOINT_REQUEST, static_cast<int32_t>(art::kCheckpointRequest)) +#define JIT_CHECK_OSR -1 +ADD_TEST_EQ(JIT_CHECK_OSR, static_cast<int32_t>(art::jit::kJitCheckForOSR)) + +#define JIT_HOTNESS_DISABLE -2 +ADD_TEST_EQ(JIT_HOTNESS_DISABLE, static_cast<int32_t>(art::jit::kJitHotnessDisabled)) + #if defined(__cplusplus) } // End of CheckAsmSupportOffsets. #endif diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index a84b366687..d386c74354 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -703,6 +703,11 @@ class FixupVisitor : public ValueObject { return src; } + // Must be called on pointers that already have been relocated to the destination relocation. + ALWAYS_INLINE bool IsInAppImage(mirror::Object* object) const { + return app_image_.InDest(reinterpret_cast<uintptr_t>(object)); + } + protected: // Source section. const RelocationRange boot_image_; @@ -717,36 +722,12 @@ class FixupObjectAdapter : public FixupVisitor { template<typename... Args> explicit FixupObjectAdapter(Args... args) : FixupVisitor(args...) {} - // Must be called on pointers that already have been relocated to the destination relocation. - ALWAYS_INLINE bool IsInAppImage(mirror::Object* object) const { - return app_image_.InDest(reinterpret_cast<uintptr_t>(object)); - } - template <typename T> T* operator()(T* obj) const { return ForwardObject(obj); } }; -class FixupClassVisitor : public FixupVisitor { - public: - template<typename... Args> - explicit FixupClassVisitor(Args... args) : FixupVisitor(args...) {} - - // The image space is contained so the GC doesn't need to know about it. Avoid requiring mutator - // lock to prevent possible pauses. - ALWAYS_INLINE void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS { - mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>(); - DCHECK(klass != nullptr) << "Null class in image"; - // No AsClass since our fields aren't quite fixed up yet. - mirror::Class* new_klass = down_cast<mirror::Class*>(ForwardObject(klass)); - // Keep clean if possible. - if (klass != new_klass) { - obj->SetClass<kVerifyNone>(new_klass); - } - } -}; - class FixupRootVisitor : public FixupVisitor { public: template<typename... Args> @@ -772,12 +753,12 @@ class FixupRootVisitor : public FixupVisitor { class FixupObjectVisitor : public FixupVisitor { public: template<typename... Args> - explicit FixupObjectVisitor(gc::accounting::ContinuousSpaceBitmap* pointer_array_visited, + explicit FixupObjectVisitor(gc::accounting::ContinuousSpaceBitmap* visited, const size_t pointer_size, Args... args) : FixupVisitor(args...), pointer_size_(pointer_size), - pointer_array_visited_(pointer_array_visited) {} + visited_(visited) {} // Fix up separately since we also need to fix up method entrypoints. ALWAYS_INLINE void VisitRootIfNonNull( @@ -805,13 +786,20 @@ class FixupObjectVisitor : public FixupVisitor { // Visit a pointer array and forward corresponding native data. Ignores pointer arrays in the // boot image. Uses the bitmap to ensure the same array is not visited multiple times. template <typename Visitor> - void VisitPointerArray(mirror::PointerArray* array, const Visitor& visitor) const + void UpdatePointerArrayContents(mirror::PointerArray* array, const Visitor& visitor) const NO_THREAD_SAFETY_ANALYSIS { - if (array != nullptr && - visitor.IsInAppImage(array) && - !pointer_array_visited_->Test(array)) { + DCHECK(array != nullptr); + DCHECK(visitor.IsInAppImage(array)); + // The bit for the array contents is different than the bit for the array. Since we may have + // already visited the array as a long / int array from walking the bitmap without knowing it + // was a pointer array. + static_assert(kObjectAlignment == 8u, "array bit may be in another object"); + mirror::Object* const contents_bit = reinterpret_cast<mirror::Object*>( + reinterpret_cast<uintptr_t>(array) + kObjectAlignment); + // If the bit is not set then the contents have not yet been updated. + if (!visited_->Test(contents_bit)) { array->Fixup<kVerifyNone, kWithoutReadBarrier>(array, pointer_size_, visitor); - pointer_array_visited_->Set(array); + visited_->Set(contents_bit); } } @@ -824,26 +812,61 @@ class FixupObjectVisitor : public FixupVisitor { ForwardObject(obj)); } - ALWAYS_INLINE void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS { + void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS { + if (visited_->Test(obj)) { + // Already visited. + return; + } + visited_->Set(obj); + + // Handle class specially first since we need it to be updated to properly visit the rest of + // the instance fields. + { + mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>(); + DCHECK(klass != nullptr) << "Null class in image"; + // No AsClass since our fields aren't quite fixed up yet. + mirror::Class* new_klass = down_cast<mirror::Class*>(ForwardObject(klass)); + if (klass != new_klass) { + obj->SetClass<kVerifyNone>(new_klass); + } + if (new_klass != klass && IsInAppImage(new_klass)) { + // Make sure the klass contents are fixed up since we depend on it to walk the fields. + operator()(new_klass); + } + } + obj->VisitReferences</*visit native roots*/false, kVerifyNone, kWithoutReadBarrier>( *this, *this); + // Note that this code relies on no circular dependencies. // We want to use our own class loader and not the one in the image. if (obj->IsClass<kVerifyNone, kWithoutReadBarrier>()) { - mirror::Class* klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>(); + mirror::Class* as_klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>(); FixupObjectAdapter visitor(boot_image_, boot_oat_, app_image_, app_oat_); - klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(klass, pointer_size_, visitor); + as_klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(as_klass, + pointer_size_, + visitor); // Deal with the pointer arrays. Use the helper function since multiple classes can reference // the same arrays. - VisitPointerArray(klass->GetVTable<kVerifyNone, kWithoutReadBarrier>(), visitor); - mirror::IfTable* iftable = klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>(); - if (iftable != nullptr) { + mirror::PointerArray* const vtable = as_klass->GetVTable<kVerifyNone, kWithoutReadBarrier>(); + if (vtable != nullptr && IsInAppImage(vtable)) { + operator()(vtable); + UpdatePointerArrayContents(vtable, visitor); + } + mirror::IfTable* iftable = as_klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>(); + // Ensure iftable arrays are fixed up since we need GetMethodArray to return the valid + // contents. + if (iftable != nullptr && IsInAppImage(iftable)) { + operator()(iftable); for (int32_t i = 0, count = iftable->Count(); i < count; ++i) { if (iftable->GetMethodArrayCount<kVerifyNone, kWithoutReadBarrier>(i) > 0) { mirror::PointerArray* methods = iftable->GetMethodArray<kVerifyNone, kWithoutReadBarrier>(i); - DCHECK(methods != nullptr); - VisitPointerArray(methods, visitor); + if (visitor.IsInAppImage(methods)) { + operator()(methods); + DCHECK(methods != nullptr); + UpdatePointerArrayContents(methods, visitor); + } } } } @@ -852,7 +875,7 @@ class FixupObjectVisitor : public FixupVisitor { private: const size_t pointer_size_; - gc::accounting::ContinuousSpaceBitmap* const pointer_array_visited_; + gc::accounting::ContinuousSpaceBitmap* const visited_; }; class ForwardObjectAdapter { @@ -938,9 +961,14 @@ static bool RelocateInPlace(ImageHeader& image_header, const size_t pointer_size = image_header.GetPointerSize(); gc::Heap* const heap = Runtime::Current()->GetHeap(); heap->GetBootImagesSize(&boot_image_begin, &boot_image_end, &boot_oat_begin, &boot_oat_end); - CHECK_NE(boot_image_begin, boot_image_end) - << "Can not relocate app image without boot image space"; - CHECK_NE(boot_oat_begin, boot_oat_end) << "Can not relocate app image without boot oat file"; + if (boot_image_begin == boot_image_end) { + *error_msg = "Can not relocate app image without boot image space"; + return false; + } + if (boot_oat_begin == boot_oat_end) { + *error_msg = "Can not relocate app image without boot oat file"; + return false; + } const uint32_t boot_image_size = boot_image_end - boot_image_begin; const uint32_t boot_oat_size = boot_oat_end - boot_oat_begin; const uint32_t image_header_boot_image_size = image_header.GetBootImageSize(); @@ -994,7 +1022,7 @@ static bool RelocateInPlace(ImageHeader& image_header, // Two pass approach, fix up all classes first, then fix up non class-objects. // The visited bitmap is used to ensure that pointer arrays are not forwarded twice. std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> visited_bitmap( - gc::accounting::ContinuousSpaceBitmap::Create("Pointer array bitmap", + gc::accounting::ContinuousSpaceBitmap::Create("Relocate bitmap", target_base, image_header.GetImageSize())); FixupObjectVisitor fixup_object_visitor(visited_bitmap.get(), @@ -1004,10 +1032,6 @@ static bool RelocateInPlace(ImageHeader& image_header, app_image, app_oat); TimingLogger::ScopedTiming timing("Fixup classes", &logger); - // Fixup class only touches app image classes, don't need the mutator lock since the space is - // not yet visible to the GC. - FixupClassVisitor fixup_class_visitor(boot_image, boot_oat, app_image, app_oat); - bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_class_visitor); // Fixup objects may read fields in the boot image, use the mutator lock here for sanity. Though // its probably not required. ScopedObjectAccess soa(Thread::Current()); diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h index d07f47bf29..a4c3d41537 100644 --- a/runtime/instrumentation.h +++ b/runtime/instrumentation.h @@ -303,7 +303,8 @@ class Instrumentation { bool NonJitProfilingActive() const SHARED_REQUIRES(Locks::mutator_lock_) { return have_dex_pc_listeners_ || have_method_exit_listeners_ || have_field_read_listeners_ || have_field_write_listeners_ || - have_exception_caught_listeners_ || have_method_unwind_listeners_; + have_exception_caught_listeners_ || have_method_unwind_listeners_ || + have_branch_listeners_; } // Inform listeners that a method has been entered. A dex PC is provided as we may install diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc index 12d6fdc00d..ce698fb688 100644 --- a/runtime/interpreter/interpreter_goto_table_impl.cc +++ b/runtime/interpreter/interpreter_goto_table_impl.cc @@ -22,6 +22,7 @@ #include "experimental_flags.h" #include "interpreter_common.h" #include "jit/jit.h" +#include "jit/jit_instrumentation.h" #include "safe_math.h" #include <memory> // std::unique_ptr @@ -64,15 +65,20 @@ namespace interpreter { currentHandlersTable = handlersTable[ \ Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()] -#define BRANCH_INSTRUMENTATION(offset) \ - do { \ - ArtMethod* method = shadow_frame.GetMethod(); \ - instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \ - instrumentation->Branch(self, method, dex_pc, offset); \ - JValue result; \ - if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) { \ - return result; \ - } \ +#define BRANCH_INSTRUMENTATION(offset) \ + do { \ + instrumentation->Branch(self, method, dex_pc, offset); \ + JValue result; \ + if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) { \ + return result; \ + } \ + } while (false) + +#define HOTNESS_UPDATE() \ + do { \ + if (jit_instrumentation_cache != nullptr) { \ + jit_instrumentation_cache->AddSamples(self, method, 1); \ + } \ } while (false) #define UNREACHABLE_CODE_CHECK() \ @@ -186,6 +192,13 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF UPDATE_HANDLER_TABLE(); std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder; size_t lambda_captured_variable_index = 0; + const auto* const instrumentation = Runtime::Current()->GetInstrumentation(); + ArtMethod* method = shadow_frame.GetMethod(); + jit::Jit* jit = Runtime::Current()->GetJit(); + jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr; + if (jit != nullptr) { + jit_instrumentation_cache = jit->GetInstrumentationCache(); + } // Jump to first instruction. ADVANCE(0); @@ -277,7 +290,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF JValue result; self->AllowThreadSuspension(); HANDLE_MONITOR_CHECKS(); - instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); if (UNLIKELY(instrumentation->HasMethodExitListeners())) { instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_), shadow_frame.GetMethod(), dex_pc, @@ -292,7 +304,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF JValue result; self->AllowThreadSuspension(); HANDLE_MONITOR_CHECKS(); - instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); if (UNLIKELY(instrumentation->HasMethodExitListeners())) { instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_), shadow_frame.GetMethod(), dex_pc, @@ -308,7 +319,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data))); self->AllowThreadSuspension(); HANDLE_MONITOR_CHECKS(); - instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); if (UNLIKELY(instrumentation->HasMethodExitListeners())) { instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_), shadow_frame.GetMethod(), dex_pc, @@ -323,7 +333,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data))); self->AllowThreadSuspension(); HANDLE_MONITOR_CHECKS(); - instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); if (UNLIKELY(instrumentation->HasMethodExitListeners())) { instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_), shadow_frame.GetMethod(), dex_pc, @@ -359,7 +368,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF } } result.SetL(obj_result); - instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); if (UNLIKELY(instrumentation->HasMethodExitListeners())) { instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_), shadow_frame.GetMethod(), dex_pc, @@ -630,6 +638,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int8_t offset = inst->VRegA_10t(inst_data); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -643,6 +652,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegA_20t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -656,6 +666,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int32_t offset = inst->VRegA_30t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -669,6 +680,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -682,6 +694,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -785,6 +798,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -804,6 +818,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -823,6 +838,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -842,6 +858,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -861,6 +878,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -880,6 +898,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -898,6 +917,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -916,6 +936,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -934,6 +955,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -952,6 +974,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -970,6 +993,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -988,6 +1012,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); if (UNLIKELY(self->TestAllFlags())) { self->CheckSuspend(); UPDATE_HANDLER_TABLE(); @@ -2558,7 +2583,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF self->CheckSuspend(); UPDATE_HANDLER_TABLE(); } - instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame, dex_pc, instrumentation); if (found_dex_pc == DexFile::kDexNoIndex) { @@ -2579,8 +2603,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF // a constant condition that would remove the "if" statement so the test is free. #define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v) \ alt_op_##code: { \ - Runtime* const runtime = Runtime::Current(); \ - const instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation(); \ if (UNLIKELY(instrumentation->HasDexPcListeners())) { \ Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_); \ instrumentation->DexPcMovedEvent(self, this_object, shadow_frame.GetMethod(), dex_pc); \ diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index 0488dbf028..442e1915f8 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -18,6 +18,7 @@ #include "experimental_flags.h" #include "interpreter_common.h" #include "jit/jit.h" +#include "jit/jit_instrumentation.h" #include "safe_math.h" #include <memory> // std::unique_ptr @@ -37,6 +38,7 @@ namespace interpreter { shadow_frame.GetLockCountData(). \ CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self); \ if (interpret_one_instruction) { \ + /* Signal mterp to return to caller */ \ shadow_frame.SetDexPC(DexFile::kDexNoIndex); \ } \ return JValue(); /* Handled in caller. */ \ @@ -72,14 +74,24 @@ namespace interpreter { #define BRANCH_INSTRUMENTATION(offset) \ do { \ - ArtMethod* method = shadow_frame.GetMethod(); \ instrumentation->Branch(self, method, dex_pc, offset); \ JValue result; \ if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) { \ + if (interpret_one_instruction) { \ + /* OSR has completed execution of the method. Signal mterp to return to caller */ \ + shadow_frame.SetDexPC(DexFile::kDexNoIndex); \ + } \ return result; \ } \ } while (false) +#define HOTNESS_UPDATE() \ + do { \ + if (jit_instrumentation_cache != nullptr) { \ + jit_instrumentation_cache->AddSamples(self, method, 1); \ + } \ + } while (false) + static bool IsExperimentalInstructionEnabled(const Instruction *inst) { DCHECK(inst->IsExperimental()); return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas); @@ -101,6 +113,12 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, const uint16_t* const insns = code_item->insns_; const Instruction* inst = Instruction::At(insns + dex_pc); uint16_t inst_data; + ArtMethod* method = shadow_frame.GetMethod(); + jit::Jit* jit = Runtime::Current()->GetJit(); + jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr; + if (jit != nullptr) { + jit_instrumentation_cache = jit->GetInstrumentationCache(); + } // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need // to keep this live for the scope of the entire function call. @@ -205,6 +223,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, result); } if (interpret_one_instruction) { + /* Signal mterp to return to caller */ shadow_frame.SetDexPC(DexFile::kDexNoIndex); } return result; @@ -221,6 +240,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, result); } if (interpret_one_instruction) { + /* Signal mterp to return to caller */ shadow_frame.SetDexPC(DexFile::kDexNoIndex); } return result; @@ -238,6 +258,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, result); } if (interpret_one_instruction) { + /* Signal mterp to return to caller */ shadow_frame.SetDexPC(DexFile::kDexNoIndex); } return result; @@ -254,6 +275,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, result); } if (interpret_one_instruction) { + /* Signal mterp to return to caller */ shadow_frame.SetDexPC(DexFile::kDexNoIndex); } return result; @@ -292,6 +314,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, result); } if (interpret_one_instruction) { + /* Signal mterp to return to caller */ shadow_frame.SetDexPC(DexFile::kDexNoIndex); } return result; @@ -564,6 +587,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int8_t offset = inst->VRegA_10t(inst_data); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -574,6 +598,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegA_20t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -584,6 +609,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int32_t offset = inst->VRegA_30t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -594,6 +620,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -604,6 +631,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -708,6 +736,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -724,6 +753,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -740,6 +770,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -756,6 +787,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -772,6 +804,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -788,6 +821,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -803,6 +837,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -818,6 +853,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -833,6 +869,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -848,6 +885,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -863,6 +901,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -878,6 +917,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S index cfad7147e2..8fad42f0d2 100644 --- a/runtime/interpreter/mterp/arm/bincmp.S +++ b/runtime/interpreter/mterp/arm/bincmp.S @@ -1,7 +1,6 @@ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -9,23 +8,12 @@ mov r1, rINST, lsr #12 @ r1<- B ubfx r0, rINST, #8, #4 @ r0<- A GET_VREG r3, r1 @ r3<- vB - GET_VREG r2, r0 @ r2<- vA + GET_VREG r0, r0 @ r0<- vA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - cmp r2, r3 @ compare (vA, vB) - mov${revcmp} rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r2, rINST, rINST @ convert to bytes, check sign - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, r3 @ compare (vA, vB) + b${condition} MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction diff --git a/runtime/interpreter/mterp/arm/entry.S b/runtime/interpreter/mterp/arm/entry.S index 981c03659f..a6b131d219 100644 --- a/runtime/interpreter/mterp/arm/entry.S +++ b/runtime/interpreter/mterp/arm/entry.S @@ -33,10 +33,8 @@ ExecuteMterpImpl: .fnstart - .save {r4-r10,fp,lr} - stmfd sp!, {r4-r10,fp,lr} @ save 9 regs - .pad #4 - sub sp, sp, #4 @ align 64 + .save {r3-r10,fp,lr} + stmfd sp!, {r3-r10,fp,lr} @ save 10 regs, (r3 just to align 64) /* Remember the return register */ str r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET] @@ -57,6 +55,12 @@ ExecuteMterpImpl: /* Starting ibase */ ldr rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] + /* Set up for backwards branches & osr profiling */ + ldr r0, [rFP, #OFF_FP_METHOD] + add r1, rFP, #OFF_FP_SHADOWFRAME + bl MterpSetUpHotnessCountdown + mov rPROFILE, r0 @ Starting hotness countdown to rPROFILE + /* start executing the instruction at rPC */ FETCH_INST @ load rINST from rPC GET_INST_OPCODE ip @ extract opcode from rINST diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S index 3456a7559b..62e573a7db 100644 --- a/runtime/interpreter/mterp/arm/footer.S +++ b/runtime/interpreter/mterp/arm/footer.S @@ -114,21 +114,117 @@ MterpException: /* NOTE: no fallthrough */ /* - * Check for suspend check request. Assumes rINST already loaded, rPC advanced and - * still needs to get the opcode and branch to it, and flags are in lr. + * Common handling for branches with support for Jit profiling. + * On entry: + * rINST <= signed offset + * rPROFILE <= signed hotness countdown (expanded to 32 bits) + * condition bits <= set to establish sign of offset (use "NoFlags" entry if not) + * + * We have quite a few different cases for branch profiling, OSR detection and + * suspend check support here. + * + * Taken backward branches: + * If profiling active, do hotness countdown and report if we hit zero. + * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. + * Is there a pending suspend request? If so, suspend. + * + * Taken forward branches and not-taken backward branches: + * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. + * + * Our most common case is expected to be a taken backward branch with active jit profiling, + * but no full OSR check and no pending suspend request. + * Next most common case is not-taken branch with no full OSR check. + * + */ +MterpCommonTakenBranchNoFlags: + cmp rINST, #0 +MterpCommonTakenBranch: + bgt .L_forward_branch @ don't add forward branches to hotness +/* + * We need to subtract 1 from positive values and we should not see 0 here, + * so we may use the result of the comparison with -1. */ -MterpCheckSuspendAndContinue: - ldr rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh rIBASE +#if JIT_CHECK_OSR != -1 +# error "JIT_CHECK_OSR must be -1." +#endif + cmp rPROFILE, #JIT_CHECK_OSR + beq .L_osr_check + subgts rPROFILE, #1 + beq .L_add_batch @ counted down to zero - report +.L_resume_backward_branch: + ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] + REFRESH_IBASE + add r2, rINST, rINST @ r2<- byte offset + FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST ands lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST) - bne 1f + bne .L_suspend_request_pending GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction -1: + +.L_suspend_request_pending: EXPORT_PC mov r0, rSELF bl MterpSuspendCheck @ (self) cmp r0, #0 bne MterpFallback + REFRESH_IBASE @ might have changed during suspend + GET_INST_OPCODE ip @ extract opcode from rINST + GOTO_OPCODE ip @ jump to next instruction + +.L_no_count_backwards: + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + bne .L_resume_backward_branch +.L_osr_check: + mov r0, rSELF + add r1, rFP, #OFF_FP_SHADOWFRAME + mov r2, rINST + EXPORT_PC + bl MterpMaybeDoOnStackReplacement @ (self, shadow_frame, offset) + cmp r0, #0 + bne MterpOnStackReplacement + b .L_resume_backward_branch + +.L_forward_branch: + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_osr_forward +.L_resume_forward_branch: + add r2, rINST, rINST @ r2<- byte offset + FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST + GET_INST_OPCODE ip @ extract opcode from rINST + GOTO_OPCODE ip @ jump to next instruction + +.L_check_osr_forward: + mov r0, rSELF + add r1, rFP, #OFF_FP_SHADOWFRAME + mov r2, rINST + EXPORT_PC + bl MterpMaybeDoOnStackReplacement @ (self, shadow_frame, offset) + cmp r0, #0 + bne MterpOnStackReplacement + b .L_resume_forward_branch + +.L_add_batch: + add r1, rFP, #OFF_FP_SHADOWFRAME + strh rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] + ldr r0, [rFP, #OFF_FP_METHOD] + mov r2, rSELF + bl MterpAddHotnessBatch @ (method, shadow_frame, self) + mov rPROFILE, r0 @ restore new hotness countdown to rPROFILE + b .L_no_count_backwards + +/* + * Entered from the conditional branch handlers when OSR check request active on + * not-taken path. All Dalvik not-taken conditional branch offsets are 2. + */ +.L_check_not_taken_osr: + mov r0, rSELF + add r1, rFP, #OFF_FP_SHADOWFRAME + mov r2, #2 + EXPORT_PC + bl MterpMaybeDoOnStackReplacement @ (self, shadow_frame, offset) + cmp r0, #0 + bne MterpOnStackReplacement + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -176,9 +272,27 @@ MterpReturn: str r1, [r2, #4] mov r0, #1 @ signal return to caller. MterpDone: - add sp, sp, #4 @ un-align 64 - ldmfd sp!, {r4-r10,fp,pc} @ restore 9 regs and return +/* + * At this point, we expect rPROFILE to be non-zero. If negative, hotness is disabled or we're + * checking for OSR. If greater than zero, we might have unreported hotness to register + * (the difference between the ending rPROFILE and the cached hotness counter). rPROFILE + * should only reach zero immediately after a hotness decrement, and is then reset to either + * a negative special state or the new non-zero countdown value. + */ + cmp rPROFILE, #0 + bgt MterpProfileActive @ if > 0, we may have some counts to report. + ldmfd sp!, {r3-r10,fp,pc} @ restore 10 regs and return +MterpProfileActive: + mov rINST, r0 @ stash return value + /* Report cached hotness counts */ + ldr r0, [rFP, #OFF_FP_METHOD] + add r1, rFP, #OFF_FP_SHADOWFRAME + mov r2, rSELF + strh rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] + bl MterpAddHotnessBatch @ (method, shadow_frame, self) + mov r0, rINST @ restore return value + ldmfd sp!, {r3-r10,fp,pc} @ restore 10 regs and return .fnend .size ExecuteMterpImpl, .-ExecuteMterpImpl diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S index 298af8a57e..039bcbe262 100644 --- a/runtime/interpreter/mterp/arm/header.S +++ b/runtime/interpreter/mterp/arm/header.S @@ -72,7 +72,8 @@ The following registers have fixed assignments: r6 rSELF self (Thread) pointer r7 rINST first 16-bit code unit of current instruction r8 rIBASE interpreted instruction base pointer, used for computed goto - r11 rREFS base of object references in shadow frame (ideally, we'll get rid of this later). + r10 rPROFILE branch profiling countdown + r11 rREFS base of object references in shadow frame (ideally, we'll get rid of this later). Macros are provided for common operations. Each macro MUST emit only one instruction to make instruction-counting easier. They MUST NOT alter @@ -90,12 +91,13 @@ unspecified registers or condition codes. /* During bringup, we'll use the shadow frame model instead of rFP */ /* single-purpose registers, given names for clarity */ -#define rPC r4 -#define rFP r5 -#define rSELF r6 -#define rINST r7 -#define rIBASE r8 -#define rREFS r11 +#define rPC r4 +#define rFP r5 +#define rSELF r6 +#define rINST r7 +#define rIBASE r8 +#define rPROFILE r10 +#define rREFS r11 /* * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs. So, @@ -109,7 +111,7 @@ unspecified registers or condition codes. #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET) #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET) #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET) -#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET) +#define OFF_FP_SHADOWFRAME OFF_FP(0) /* * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects. Must diff --git a/runtime/interpreter/mterp/arm/op_cmp_long.S b/runtime/interpreter/mterp/arm/op_cmp_long.S index e57b19c5cc..6626ff0f49 100644 --- a/runtime/interpreter/mterp/arm/op_cmp_long.S +++ b/runtime/interpreter/mterp/arm/op_cmp_long.S @@ -1,22 +1,6 @@ /* * Compare two 64-bit values. Puts 0, 1, or -1 into the destination * register based on the results of the comparison. - * - * We load the full values with LDM, but in practice many values could - * be resolved by only looking at the high word. This could be made - * faster or slower by splitting the LDM into a pair of LDRs. - * - * If we just wanted to set condition flags, we could do this: - * subs ip, r0, r2 - * sbcs ip, r1, r3 - * subeqs ip, r0, r2 - * Leaving { <0, 0, >0 } in ip. However, we have to set it to a specific - * integer value, which we can do with 2 conditional mov/mvn instructions - * (set 1, set -1; if they're equal we already have 0 in ip), giving - * us a constant 5-cycle path plus a branch at the end to the - * instruction epilogue code. The multi-compare approach below needs - * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch - * in the worst case (the 64-bit values are equal). */ /* cmp-long vAA, vBB, vCC */ FETCH r0, 1 @ r0<- CCBB @@ -27,30 +11,13 @@ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC] ldmia r2, {r0-r1} @ r0/r1<- vBB/vBB+1 ldmia r3, {r2-r3} @ r2/r3<- vCC/vCC+1 - cmp r1, r3 @ compare (vBB+1, vCC+1) - blt .L${opcode}_less @ signed compare on high part - bgt .L${opcode}_greater - subs r1, r0, r2 @ r1<- r0 - r2 - bhi .L${opcode}_greater @ unsigned compare on low part - bne .L${opcode}_less - b .L${opcode}_finish @ equal; r1 already holds 0 -%break - -.L${opcode}_less: - mvn r1, #0 @ r1<- -1 - @ Want to cond code the next mov so we can avoid branch, but don't see it; - @ instead, we just replicate the tail end. + cmp r0, r2 + sbcs ip, r1, r3 @ Sets correct CCs for checking LT (but not EQ/NE) + mov ip, #0 + mvnlt ip, #0 @ -1 + cmpeq r0, r2 @ For correct EQ/NE, we may need to repeat the first CMP + orrne ip, #1 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST - SET_VREG r1, r9 @ vAA<- r1 - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction - -.L${opcode}_greater: - mov r1, #1 @ r1<- 1 - @ fall through to _finish - -.L${opcode}_finish: - FETCH_ADVANCE_INST 2 @ advance rPC, load rINST - SET_VREG r1, r9 @ vAA<- r1 + SET_VREG ip, r9 @ vAA<- ip GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S index 6861950909..aa42dfd843 100644 --- a/runtime/interpreter/mterp/arm/op_goto.S +++ b/runtime/interpreter/mterp/arm/op_goto.S @@ -5,32 +5,5 @@ * double to get a byte offset. */ /* goto +AA */ - /* tuning: use sbfx for 6t2+ targets */ -#if MTERP_PROFILE_BRANCHES - mov r0, rINST, lsl #16 @ r0<- AAxx0000 - movs rINST, r0, asr #24 @ rINST<- ssssssAA (sign-extended) - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r2, rINST, rINST @ r2<- byte offset, set flags - FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST - @ If backwards branch refresh rIBASE - bmi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#else - mov r0, rINST, lsl #16 @ r0<- AAxx0000 - movs rINST, r0, asr #24 @ rINST<- ssssssAA (sign-extended) - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r2, rINST, rINST @ r2<- byte offset, set flags - FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST - @ If backwards branch refresh rIBASE - bmi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#endif + sbfx rINST, rINST, #8, #8 @ rINST<- ssssssAA (sign-extended) + b MterpCommonTakenBranchNoFlags diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S index 91639ca796..12a6bc07f8 100644 --- a/runtime/interpreter/mterp/arm/op_goto_16.S +++ b/runtime/interpreter/mterp/arm/op_goto_16.S @@ -5,27 +5,5 @@ * double to get a byte offset. */ /* goto/16 +AAAA */ -#if MTERP_PROFILE_BRANCHES FETCH_S rINST, 1 @ rINST<- ssssAAAA (sign-extended) - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset, flags set - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#else - FETCH_S rINST, 1 @ rINST<- ssssAAAA (sign-extended) - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset, flags set - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#endif + b MterpCommonTakenBranchNoFlags diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S index e730b527ec..7325a1c2d6 100644 --- a/runtime/interpreter/mterp/arm/op_goto_32.S +++ b/runtime/interpreter/mterp/arm/op_goto_32.S @@ -10,31 +10,7 @@ * offset to byte offset. */ /* goto/32 +AAAAAAAA */ -#if MTERP_PROFILE_BRANCHES FETCH r0, 1 @ r0<- aaaa (lo) - FETCH r1, 2 @ r1<- AAAA (hi) - orr rINST, r0, r1, lsl #16 @ rINST<- AAAAaaaa - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - ble MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#else - FETCH r0, 1 @ r0<- aaaa (lo) - FETCH r1, 2 @ r1<- AAAA (hi) - orr rINST, r0, r1, lsl #16 @ rINST<- AAAAaaaa - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - ble MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#endif + FETCH r3, 2 @ r1<- AAAA (hi) + orrs rINST, r0, r3, lsl #16 @ rINST<- AAAAaaaa + b MterpCommonTakenBranch diff --git a/runtime/interpreter/mterp/arm/op_if_eq.S b/runtime/interpreter/mterp/arm/op_if_eq.S index 568568662f..b8b6a6eec1 100644 --- a/runtime/interpreter/mterp/arm/op_if_eq.S +++ b/runtime/interpreter/mterp/arm/op_if_eq.S @@ -1 +1 @@ -%include "arm/bincmp.S" { "revcmp":"ne" } +%include "arm/bincmp.S" { "condition":"eq" } diff --git a/runtime/interpreter/mterp/arm/op_if_eqz.S b/runtime/interpreter/mterp/arm/op_if_eqz.S index 2a9c0f9e17..7012f61c69 100644 --- a/runtime/interpreter/mterp/arm/op_if_eqz.S +++ b/runtime/interpreter/mterp/arm/op_if_eqz.S @@ -1 +1 @@ -%include "arm/zcmp.S" { "revcmp":"ne" } +%include "arm/zcmp.S" { "condition":"eq" } diff --git a/runtime/interpreter/mterp/arm/op_if_ge.S b/runtime/interpreter/mterp/arm/op_if_ge.S index 60a0307a15..eb29e63f7c 100644 --- a/runtime/interpreter/mterp/arm/op_if_ge.S +++ b/runtime/interpreter/mterp/arm/op_if_ge.S @@ -1 +1 @@ -%include "arm/bincmp.S" { "revcmp":"lt" } +%include "arm/bincmp.S" { "condition":"ge" } diff --git a/runtime/interpreter/mterp/arm/op_if_gez.S b/runtime/interpreter/mterp/arm/op_if_gez.S index 981cdec0fa..d9da374199 100644 --- a/runtime/interpreter/mterp/arm/op_if_gez.S +++ b/runtime/interpreter/mterp/arm/op_if_gez.S @@ -1 +1 @@ -%include "arm/zcmp.S" { "revcmp":"lt" } +%include "arm/zcmp.S" { "condition":"ge" } diff --git a/runtime/interpreter/mterp/arm/op_if_gt.S b/runtime/interpreter/mterp/arm/op_if_gt.S index ca50cd7561..a35eab8f47 100644 --- a/runtime/interpreter/mterp/arm/op_if_gt.S +++ b/runtime/interpreter/mterp/arm/op_if_gt.S @@ -1 +1 @@ -%include "arm/bincmp.S" { "revcmp":"le" } +%include "arm/bincmp.S" { "condition":"gt" } diff --git a/runtime/interpreter/mterp/arm/op_if_gtz.S b/runtime/interpreter/mterp/arm/op_if_gtz.S index c621812439..4ef4d8ee19 100644 --- a/runtime/interpreter/mterp/arm/op_if_gtz.S +++ b/runtime/interpreter/mterp/arm/op_if_gtz.S @@ -1 +1 @@ -%include "arm/zcmp.S" { "revcmp":"le" } +%include "arm/zcmp.S" { "condition":"gt" } diff --git a/runtime/interpreter/mterp/arm/op_if_le.S b/runtime/interpreter/mterp/arm/op_if_le.S index 7e060f2fc9..c7c31bc089 100644 --- a/runtime/interpreter/mterp/arm/op_if_le.S +++ b/runtime/interpreter/mterp/arm/op_if_le.S @@ -1 +1 @@ -%include "arm/bincmp.S" { "revcmp":"gt" } +%include "arm/bincmp.S" { "condition":"le" } diff --git a/runtime/interpreter/mterp/arm/op_if_lez.S b/runtime/interpreter/mterp/arm/op_if_lez.S index f92be23717..9fbf6c9f02 100644 --- a/runtime/interpreter/mterp/arm/op_if_lez.S +++ b/runtime/interpreter/mterp/arm/op_if_lez.S @@ -1 +1 @@ -%include "arm/zcmp.S" { "revcmp":"gt" } +%include "arm/zcmp.S" { "condition":"le" } diff --git a/runtime/interpreter/mterp/arm/op_if_lt.S b/runtime/interpreter/mterp/arm/op_if_lt.S index 213344d809..9469fbb1ef 100644 --- a/runtime/interpreter/mterp/arm/op_if_lt.S +++ b/runtime/interpreter/mterp/arm/op_if_lt.S @@ -1 +1 @@ -%include "arm/bincmp.S" { "revcmp":"ge" } +%include "arm/bincmp.S" { "condition":"lt" } diff --git a/runtime/interpreter/mterp/arm/op_if_ltz.S b/runtime/interpreter/mterp/arm/op_if_ltz.S index dfd4e44856..a4fc1b8f0b 100644 --- a/runtime/interpreter/mterp/arm/op_if_ltz.S +++ b/runtime/interpreter/mterp/arm/op_if_ltz.S @@ -1 +1 @@ -%include "arm/zcmp.S" { "revcmp":"ge" } +%include "arm/zcmp.S" { "condition":"lt" } diff --git a/runtime/interpreter/mterp/arm/op_if_ne.S b/runtime/interpreter/mterp/arm/op_if_ne.S index 4a58b4aba0..c945331a31 100644 --- a/runtime/interpreter/mterp/arm/op_if_ne.S +++ b/runtime/interpreter/mterp/arm/op_if_ne.S @@ -1 +1 @@ -%include "arm/bincmp.S" { "revcmp":"eq" } +%include "arm/bincmp.S" { "condition":"ne" } diff --git a/runtime/interpreter/mterp/arm/op_if_nez.S b/runtime/interpreter/mterp/arm/op_if_nez.S index d864ef437b..2d81fda444 100644 --- a/runtime/interpreter/mterp/arm/op_if_nez.S +++ b/runtime/interpreter/mterp/arm/op_if_nez.S @@ -1 +1 @@ -%include "arm/zcmp.S" { "revcmp":"eq" } +%include "arm/zcmp.S" { "condition":"ne" } diff --git a/runtime/interpreter/mterp/arm/op_mul_long.S b/runtime/interpreter/mterp/arm/op_mul_long.S index 8f40f1976b..a13c803301 100644 --- a/runtime/interpreter/mterp/arm/op_mul_long.S +++ b/runtime/interpreter/mterp/arm/op_mul_long.S @@ -24,13 +24,13 @@ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC] ldmia r2, {r0-r1} @ r0/r1<- vBB/vBB+1 ldmia r3, {r2-r3} @ r2/r3<- vCC/vCC+1 - mul ip, r2, r1 @ ip<- ZxW - umull r9, r10, r2, r0 @ r9/r10 <- ZxX - mla r2, r0, r3, ip @ r2<- YxX + (ZxW) + mul ip, r2, r1 @ ip<- ZxW + umull r1, lr, r2, r0 @ r1/lr <- ZxX + mla r2, r0, r3, ip @ r2<- YxX + (ZxW) mov r0, rINST, lsr #8 @ r0<- AA - add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX)) + add r2, r2, lr @ r2<- lr + low(ZxW + (YxX)) VREG_INDEX_TO_ADDR r0, r0 @ r0<- &fp[AA] FETCH_ADVANCE_INST 2 @ advance rPC, load rINST GET_INST_OPCODE ip @ extract opcode from rINST - stmia r0, {r9-r10} @ vAA/vAA+1<- r9/r10 + stmia r0, {r1-r2 } @ vAA/vAA+1<- r1/r2 GOTO_OPCODE ip @ jump to next instruction diff --git a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S index 7ef24c5142..4c1f058260 100644 --- a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S +++ b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S @@ -13,12 +13,12 @@ VREG_INDEX_TO_ADDR rINST, r9 @ rINST<- &fp[A] ldmia r1, {r2-r3} @ r2/r3<- vBB/vBB+1 ldmia rINST, {r0-r1} @ r0/r1<- vAA/vAA+1 - mul ip, r2, r1 @ ip<- ZxW - umull r9, r10, r2, r0 @ r9/r10 <- ZxX - mla r2, r0, r3, ip @ r2<- YxX + (ZxW) + mul ip, r2, r1 @ ip<- ZxW + umull r1, lr, r2, r0 @ r1/lr <- ZxX + mla r2, r0, r3, ip @ r2<- YxX + (ZxW) mov r0, rINST @ r0<- &fp[A] (free up rINST) FETCH_ADVANCE_INST 1 @ advance rPC, load rINST - add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX)) + add r2, r2, lr @ r2<- r2 + low(ZxW + (YxX)) GET_INST_OPCODE ip @ extract opcode from rINST - stmia r0, {r9-r10} @ vAA/vAA+1<- r9/r10 + stmia r0, {r1-r2} @ vAA/vAA+1<- r1/r2 GOTO_OPCODE ip @ jump to next instruction diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S index 4c369cb136..412c58f1bc 100644 --- a/runtime/interpreter/mterp/arm/op_packed_switch.S +++ b/runtime/interpreter/mterp/arm/op_packed_switch.S @@ -9,7 +9,6 @@ * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ -#if MTERP_PROFILE_BRANCHES FETCH r0, 1 @ r0<- bbbb (lo) FETCH r1, 2 @ r1<- BBBB (hi) mov r3, rINST, lsr #8 @ r3<- AA @@ -17,33 +16,5 @@ GET_VREG r1, r3 @ r1<- vAA add r0, rPC, r0, lsl #1 @ r0<- PC + BBBBbbbb*2 bl $func @ r0<- code-unit branch offset - mov rINST, r0 - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset; clear V - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - ble MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#else - FETCH r0, 1 @ r0<- bbbb (lo) - FETCH r1, 2 @ r1<- BBBB (hi) - mov r3, rINST, lsr #8 @ r3<- AA - orr r0, r0, r1, lsl #16 @ r0<- BBBBbbbb - GET_VREG r1, r3 @ r1<- vAA - add r0, rPC, r0, lsl #1 @ r0<- PC + BBBBbbbb*2 - bl $func @ r0<- code-unit branch offset - mov rINST, r0 - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset; clear V - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - ble MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#endif + movs rINST, r0 + b MterpCommonTakenBranch diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S index 3d7dec006d..5db8b6cdd7 100644 --- a/runtime/interpreter/mterp/arm/zcmp.S +++ b/runtime/interpreter/mterp/arm/zcmp.S @@ -1,29 +1,17 @@ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ mov r0, rINST, lsr #8 @ r0<- AA - GET_VREG r2, r0 @ r2<- vAA + GET_VREG r0, r0 @ r0<- vAA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - cmp r2, #0 @ compare (vA, 0) - mov${revcmp} rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r1, rINST, rINST @ convert to bytes & set flags - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, #0 @ compare (vA, 0) + b${condition} MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction diff --git a/runtime/interpreter/mterp/arm64/bincmp.S b/runtime/interpreter/mterp/arm64/bincmp.S index 2356ecbb89..8dd4fed7ca 100644 --- a/runtime/interpreter/mterp/arm64/bincmp.S +++ b/runtime/interpreter/mterp/arm64/bincmp.S @@ -1,7 +1,6 @@ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -10,22 +9,11 @@ ubfx w0, wINST, #8, #4 // w0<- A GET_VREG w3, w1 // w3<- vB GET_VREG w2, w0 // w2<- vA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Offset if branch not taken + FETCH_S wINST, 1 // wINST<- branch offset, in code units cmp w2, w3 // compare (vA, vB) - csel wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg. -#if MTERP_PROFILE_BRANCHES - // TUINING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 // Sign extend branch offset - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in xINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes, check sign - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.${condition} MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S index 23e656e826..9fbbbd34a4 100644 --- a/runtime/interpreter/mterp/arm64/entry.S +++ b/runtime/interpreter/mterp/arm64/entry.S @@ -31,11 +31,12 @@ ExecuteMterpImpl: .cfi_startproc - stp xIBASE, xREFS, [sp, #-64]! - stp xSELF, xINST, [sp, #16] - stp xPC, xFP, [sp, #32] - stp fp, lr, [sp, #48] - add fp, sp, #48 + stp xPROFILE, x27, [sp, #-80]! + stp xIBASE, xREFS, [sp, #16] + stp xSELF, xINST, [sp, #32] + stp xPC, xFP, [sp, #48] + stp fp, lr, [sp, #64] + add fp, sp, #64 /* Remember the return register */ str x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET] @@ -56,6 +57,12 @@ ExecuteMterpImpl: /* Starting ibase */ ldr xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] + /* Set up for backwards branches & osr profiling */ + ldr x0, [xFP, #OFF_FP_METHOD] + add x1, xFP, #OFF_FP_SHADOWFRAME + bl MterpSetUpHotnessCountdown + mov wPROFILE, w0 // Starting hotness countdown to xPROFILE + /* start executing the instruction at rPC */ FETCH_INST // load wINST from rPC GET_INST_OPCODE ip // extract opcode from wINST diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S index aae78de1b3..2d3a11eafa 100644 --- a/runtime/interpreter/mterp/arm64/footer.S +++ b/runtime/interpreter/mterp/arm64/footer.S @@ -107,6 +107,107 @@ MterpException: GET_INST_OPCODE ip GOTO_OPCODE ip /* NOTE: no fallthrough */ +/* + * Common handling for branches with support for Jit profiling. + * On entry: + * wINST <= signed offset + * wPROFILE <= signed hotness countdown (expanded to 32 bits) + * condition bits <= set to establish sign of offset (use "NoFlags" entry if not) + * + * We have quite a few different cases for branch profiling, OSR detection and + * suspend check support here. + * + * Taken backward branches: + * If profiling active, do hotness countdown and report if we hit zero. + * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. + * Is there a pending suspend request? If so, suspend. + * + * Taken forward branches and not-taken backward branches: + * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. + * + * Our most common case is expected to be a taken backward branch with active jit profiling, + * but no full OSR check and no pending suspend request. + * Next most common case is not-taken branch with no full OSR check. + * + */ +MterpCommonTakenBranchNoFlags: + cmp wINST, #0 + b.gt .L_forward_branch // don't add forward branches to hotness + tbnz wPROFILE, #31, .L_no_count_backwards // go if negative + subs wPROFILE, wPROFILE, #1 // countdown + b.eq .L_add_batch // counted down to zero - report +.L_resume_backward_branch: + ldr lr, [xSELF, #THREAD_FLAGS_OFFSET] + add w2, wINST, wINST // w2<- byte offset + FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST + REFRESH_IBASE + ands lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST) + b.ne .L_suspend_request_pending + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + +.L_suspend_request_pending: + EXPORT_PC + mov x0, xSELF + bl MterpSuspendCheck // (self) + cbnz x0, MterpFallback + REFRESH_IBASE // might have changed during suspend + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + +.L_no_count_backwards: + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.ne .L_resume_backward_branch + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, xINST + EXPORT_PC + bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) + cbnz x0, MterpOnStackReplacement + b .L_resume_backward_branch + +.L_forward_branch: + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_osr_forward +.L_resume_forward_branch: + add w2, wINST, wINST // w2<- byte offset + FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + +.L_check_osr_forward: + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, xINST + EXPORT_PC + bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) + cbnz x0, MterpOnStackReplacement + b .L_resume_forward_branch + +.L_add_batch: + add x1, xFP, #OFF_FP_SHADOWFRAME + strh wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] + ldr x0, [xFP, #OFF_FP_METHOD] + mov x2, xSELF + bl MterpAddHotnessBatch // (method, shadow_frame, self) + mov wPROFILE, w0 // restore new hotness countdown to wPROFILE + b .L_no_count_backwards + +/* + * Entered from the conditional branch handlers when OSR check request active on + * not-taken path. All Dalvik not-taken conditional branch offsets are 2. + */ +.L_check_not_taken_osr: + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, #2 + EXPORT_PC + bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) + cbnz x0, MterpOnStackReplacement + FETCH_ADVANCE_INST 2 + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + /* * Check for suspend check request. Assumes wINST already loaded, xPC advanced and @@ -175,10 +276,36 @@ MterpReturn: check2: mov x0, #1 // signal return to caller. MterpDone: - ldp fp, lr, [sp, #48] - ldp xPC, xFP, [sp, #32] - ldp xSELF, xINST, [sp, #16] - ldp xIBASE, xREFS, [sp], #64 +/* + * At this point, we expect wPROFILE to be non-zero. If negative, hotness is disabled or we're + * checking for OSR. If greater than zero, we might have unreported hotness to register + * (the difference between the ending wPROFILE and the cached hotness counter). wPROFILE + * should only reach zero immediately after a hotness decrement, and is then reset to either + * a negative special state or the new non-zero countdown value. + */ + cmp wPROFILE, #0 + bgt MterpProfileActive // if > 0, we may have some counts to report. + ldp fp, lr, [sp, #64] + ldp xPC, xFP, [sp, #48] + ldp xSELF, xINST, [sp, #32] + ldp xIBASE, xREFS, [sp, #16] + ldp xPROFILE, x27, [sp], #80 + ret + +MterpProfileActive: + mov xINST, x0 // stash return value + /* Report cached hotness counts */ + ldr x0, [xFP, #OFF_FP_METHOD] + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, xSELF + strh wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] + bl MterpAddHotnessBatch // (method, shadow_frame, self) + mov x0, xINST // restore return value + ldp fp, lr, [sp, #64] + ldp xPC, xFP, [sp, #48] + ldp xSELF, xINST, [sp, #32] + ldp xIBASE, xREFS, [sp, #16] + ldp xPROFILE, x27, [sp], #80 ret .cfi_endproc diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S index 7101ba972c..4257200bb7 100644 --- a/runtime/interpreter/mterp/arm64/header.S +++ b/runtime/interpreter/mterp/arm64/header.S @@ -74,6 +74,7 @@ The following registers have fixed assignments: x23 xINST first 16-bit code unit of current instruction x24 xIBASE interpreted instruction base pointer, used for computed goto x25 xREFS base of object references in shadow frame (ideally, we'll get rid of this later). + x26 wPROFILE jit profile hotness countdown x16 ip scratch reg x17 ip2 scratch reg (used by macros) @@ -92,15 +93,17 @@ codes. /* During bringup, we'll use the shadow frame model instead of xFP */ /* single-purpose registers, given names for clarity */ -#define xPC x20 -#define xFP x21 -#define xSELF x22 -#define xINST x23 -#define wINST w23 -#define xIBASE x24 -#define xREFS x25 -#define ip x16 -#define ip2 x17 +#define xPC x20 +#define xFP x21 +#define xSELF x22 +#define xINST x23 +#define wINST w23 +#define xIBASE x24 +#define xREFS x25 +#define wPROFILE w26 +#define xPROFILE x26 +#define ip x16 +#define ip2 x17 /* * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs. So, @@ -114,7 +117,7 @@ codes. #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET) #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET) #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET) -#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET) +#define OFF_FP_SHADOWFRAME OFF_FP(0) /* * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects. Must diff --git a/runtime/interpreter/mterp/arm64/op_goto.S b/runtime/interpreter/mterp/arm64/op_goto.S index 7e2f6a9c11..6381e94fb5 100644 --- a/runtime/interpreter/mterp/arm64/op_goto.S +++ b/runtime/interpreter/mterp/arm64/op_goto.S @@ -5,21 +5,5 @@ * double to get a byte offset. */ /* goto +AA */ - /* tuning: use sbfx for 6t2+ targets */ - lsl w0, wINST, #16 // w0<- AAxx0000 - asr wINST, w0, #24 // wINST<- ssssssAA (sign-extended) -#if MTERP_PROFILE_BRANCHES - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in wINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] // Preload flags for MterpCheckSuspendAndContinue - adds w1, wINST, wINST // Convert dalvik offset to byte offset, setting flags - FETCH_ADVANCE_INST_RB w1 // load wINST and advance xPC - // If backwards branch refresh rIBASE - b.mi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip // extract opcode from wINST - GOTO_OPCODE ip // jump to next instruction + sbfx wINST, wINST, #8, #8 // wINST<- ssssssAA (sign-extended) + b MterpCommonTakenBranchNoFlags diff --git a/runtime/interpreter/mterp/arm64/op_goto_16.S b/runtime/interpreter/mterp/arm64/op_goto_16.S index b2b9924409..fb9a80a3c1 100644 --- a/runtime/interpreter/mterp/arm64/op_goto_16.S +++ b/runtime/interpreter/mterp/arm64/op_goto_16.S @@ -6,17 +6,4 @@ */ /* goto/16 +AAAA */ FETCH_S wINST, 1 // wINST<- ssssAAAA (sign-extended) -#if MTERP_PROFILE_BRANCHES - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in xINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w1, wINST, wINST // w1<- byte offset, flags set - FETCH_ADVANCE_INST_RB w1 // update rPC, load rINST - b.mi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip // extract opcode from rINST - GOTO_OPCODE ip // jump to next instruction + b MterpCommonTakenBranchNoFlags diff --git a/runtime/interpreter/mterp/arm64/op_goto_32.S b/runtime/interpreter/mterp/arm64/op_goto_32.S index b785857b9b..b13cb41bc7 100644 --- a/runtime/interpreter/mterp/arm64/op_goto_32.S +++ b/runtime/interpreter/mterp/arm64/op_goto_32.S @@ -13,17 +13,4 @@ FETCH w0, 1 // w0<- aaaa (lo) FETCH w1, 2 // w1<- AAAA (hi) orr wINST, w0, w1, lsl #16 // wINST<- AAAAaaaa -#if MTERP_PROFILE_BRANCHES - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in xINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w1, wINST, wINST // w1<- byte offset - FETCH_ADVANCE_INST_RB w1 // update rPC, load xINST - b.le MterpCheckSuspendAndContinue - GET_INST_OPCODE ip // extract opcode from xINST - GOTO_OPCODE ip // jump to next instruction + b MterpCommonTakenBranchNoFlags diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S index e8b4f04dfe..1456f1a650 100644 --- a/runtime/interpreter/mterp/arm64/op_packed_switch.S +++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S @@ -17,17 +17,4 @@ add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2 bl $func // w0<- code-unit branch offset sbfm xINST, x0, 0, 31 -#if MTERP_PROFILE_BRANCHES - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - mov x2, xINST - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w1, wINST, wINST // w1<- byte offset; clear V - FETCH_ADVANCE_INST_RB w1 // update rPC, load wINST - b.le MterpCheckSuspendAndContinue - GET_INST_OPCODE ip // extract opcode from wINST - GOTO_OPCODE ip // jump to next instruction + b MterpCommonTakenBranchNoFlags diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S index 3f1e1b180f..b303e6a13f 100644 --- a/runtime/interpreter/mterp/arm64/zcmp.S +++ b/runtime/interpreter/mterp/arm64/zcmp.S @@ -1,29 +1,17 @@ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Branch offset if not taken + FETCH_S wINST, 1 // w1<- branch offset, in code units cmp w2, #0 // compare (vA, 0) - csel wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg -#if MTERP_PROFILE_BRANCHES - // TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in wINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes & set flags - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.${condition} MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc index 10b19c5f4f..1da1181503 100644 --- a/runtime/interpreter/mterp/mterp.cc +++ b/runtime/interpreter/mterp/mterp.cc @@ -21,6 +21,7 @@ #include "entrypoints/entrypoint_utils-inl.h" #include "mterp.h" #include "jit/jit.h" +#include "jit/jit_instrumentation.h" #include "debugger.h" namespace art { @@ -432,7 +433,7 @@ extern "C" bool MterpHandleException(Thread* self, ShadowFrame* shadow_frame) } extern "C" void MterpCheckBefore(Thread* self, ShadowFrame* shadow_frame) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr()); uint16_t inst_data = inst->Fetch16(0); if (inst->Opcode(inst_data) == Instruction::MOVE_EXCEPTION) { @@ -444,7 +445,7 @@ extern "C" void MterpCheckBefore(Thread* self, ShadowFrame* shadow_frame) } extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_frame) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { UNUSED(self); const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr()); uint16_t inst_data = inst->Fetch16(0); @@ -452,7 +453,7 @@ extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_ } extern "C" void MterpLogArrayIndexException(Thread* self, ShadowFrame* shadow_frame) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { UNUSED(self); const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr()); uint16_t inst_data = inst->Fetch16(0); @@ -460,7 +461,7 @@ extern "C" void MterpLogArrayIndexException(Thread* self, ShadowFrame* shadow_fr } extern "C" void MterpLogNegativeArraySizeException(Thread* self, ShadowFrame* shadow_frame) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { UNUSED(self); const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr()); uint16_t inst_data = inst->Fetch16(0); @@ -468,7 +469,7 @@ extern "C" void MterpLogNegativeArraySizeException(Thread* self, ShadowFrame* sh } extern "C" void MterpLogNoSuchMethodException(Thread* self, ShadowFrame* shadow_frame) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { UNUSED(self); const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr()); uint16_t inst_data = inst->Fetch16(0); @@ -476,7 +477,7 @@ extern "C" void MterpLogNoSuchMethodException(Thread* self, ShadowFrame* shadow_ } extern "C" void MterpLogExceptionThrownException(Thread* self, ShadowFrame* shadow_frame) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { UNUSED(self); const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr()); uint16_t inst_data = inst->Fetch16(0); @@ -484,7 +485,7 @@ extern "C" void MterpLogExceptionThrownException(Thread* self, ShadowFrame* shad } extern "C" void MterpLogNullObjectException(Thread* self, ShadowFrame* shadow_frame) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { UNUSED(self); const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr()); uint16_t inst_data = inst->Fetch16(0); @@ -492,7 +493,7 @@ extern "C" void MterpLogNullObjectException(Thread* self, ShadowFrame* shadow_fr } extern "C" void MterpLogFallback(Thread* self, ShadowFrame* shadow_frame) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { UNUSED(self); const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr()); uint16_t inst_data = inst->Fetch16(0); @@ -501,7 +502,7 @@ extern "C" void MterpLogFallback(Thread* self, ShadowFrame* shadow_frame) } extern "C" void MterpLogOSR(Thread* self, ShadowFrame* shadow_frame, int32_t offset) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { UNUSED(self); const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr()); uint16_t inst_data = inst->Fetch16(0); @@ -509,7 +510,7 @@ extern "C" void MterpLogOSR(Thread* self, ShadowFrame* shadow_frame, int32_t off } extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { UNUSED(self); const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr()); uint16_t inst_data = inst->Fetch16(0); @@ -521,7 +522,7 @@ extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, } extern "C" bool MterpSuspendCheck(Thread* self) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { self->AllowThreadSuspension(); return MterpShouldSwitchInterpreters(); } @@ -617,7 +618,7 @@ extern "C" int artSetObjInstanceFromMterp(uint32_t field_idx, mirror::Object* ob } extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t index) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { if (UNLIKELY(arr == nullptr)) { ThrowNullPointerExceptionFromInterpreter(); return nullptr; @@ -631,7 +632,7 @@ extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t i } extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t field_offset) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { if (UNLIKELY(obj == nullptr)) { ThrowNullPointerExceptionFromInterpreter(); return nullptr; @@ -639,13 +640,90 @@ extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset)); } +/* + * Create a hotness_countdown based on the current method hotness_count and profiling + * mode. In short, determine how many hotness events we hit before reporting back + * to the full instrumentation via MterpAddHotnessBatch. Called once on entry to the method, + * and regenerated following batch updates. + */ +extern "C" int MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame) + SHARED_REQUIRES(Locks::mutator_lock_) { + uint16_t hotness_count = method->GetCounter(); + int32_t countdown_value = jit::kJitHotnessDisabled; + jit::Jit* jit = Runtime::Current()->GetJit(); + if (jit != nullptr) { + jit::JitInstrumentationCache* cache = jit->GetInstrumentationCache(); + int32_t warm_threshold = cache->WarmMethodThreshold(); + int32_t hot_threshold = cache->HotMethodThreshold(); + int32_t osr_threshold = cache->OSRMethodThreshold(); + if (hotness_count < warm_threshold) { + countdown_value = warm_threshold - hotness_count; + } else if (hotness_count < hot_threshold) { + countdown_value = hot_threshold - hotness_count; + } else if (hotness_count < osr_threshold) { + countdown_value = osr_threshold - hotness_count; + } else { + countdown_value = jit::kJitCheckForOSR; + } + } + /* + * The actual hotness threshold may exceed the range of our int16_t countdown value. This is + * not a problem, though. We can just break it down into smaller chunks. + */ + countdown_value = std::min(countdown_value, + static_cast<int32_t>(std::numeric_limits<int16_t>::max())); + shadow_frame->SetCachedHotnessCountdown(countdown_value); + shadow_frame->SetHotnessCountdown(countdown_value); + return countdown_value; +} + +/* + * Report a batch of hotness events to the instrumentation and then return the new + * countdown value to the next time we should report. + */ +extern "C" int16_t MterpAddHotnessBatch(ArtMethod* method, + ShadowFrame* shadow_frame, + Thread* self) + SHARED_REQUIRES(Locks::mutator_lock_) { + jit::Jit* jit = Runtime::Current()->GetJit(); + if (jit != nullptr) { + int16_t count = shadow_frame->GetCachedHotnessCountdown() - shadow_frame->GetHotnessCountdown(); + jit->GetInstrumentationCache()->AddSamples(self, method, count); + } + return MterpSetUpHotnessCountdown(method, shadow_frame); +} + +// TUNING: Unused by arm/arm64. Remove when x86/x86_64/mips/mips64 mterps support batch updates. extern "C" bool MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { ArtMethod* method = shadow_frame->GetMethod(); JValue* result = shadow_frame->GetResultRegister(); uint32_t dex_pc = shadow_frame->GetDexPC(); - const auto* const instrumentation = Runtime::Current()->GetInstrumentation(); - instrumentation->Branch(self, method, dex_pc, offset); + jit::Jit* jit = Runtime::Current()->GetJit(); + if ((jit != nullptr) && (offset <= 0)) { + jit->GetInstrumentationCache()->AddSamples(self, method, 1); + } + int16_t countdown_value = MterpSetUpHotnessCountdown(method, shadow_frame); + if (countdown_value == jit::kJitCheckForOSR) { + return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result); + } else { + return false; + } +} + +extern "C" bool MterpMaybeDoOnStackReplacement(Thread* self, + ShadowFrame* shadow_frame, + int32_t offset) + SHARED_REQUIRES(Locks::mutator_lock_) { + ArtMethod* method = shadow_frame->GetMethod(); + JValue* result = shadow_frame->GetResultRegister(); + uint32_t dex_pc = shadow_frame->GetDexPC(); + jit::Jit* jit = Runtime::Current()->GetJit(); + if (offset <= 0) { + // Keep updating hotness in case a compilation request was dropped. Eventually it will retry. + jit->GetInstrumentationCache()->AddSamples(self, method, 1); + } + // Assumes caller has already determined that an OSR check is appropriate. return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result); } diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S index 092474d544..a38a87be35 100644 --- a/runtime/interpreter/mterp/out/mterp_arm.S +++ b/runtime/interpreter/mterp/out/mterp_arm.S @@ -79,7 +79,8 @@ The following registers have fixed assignments: r6 rSELF self (Thread) pointer r7 rINST first 16-bit code unit of current instruction r8 rIBASE interpreted instruction base pointer, used for computed goto - r11 rREFS base of object references in shadow frame (ideally, we'll get rid of this later). + r10 rPROFILE branch profiling countdown + r11 rREFS base of object references in shadow frame (ideally, we'll get rid of this later). Macros are provided for common operations. Each macro MUST emit only one instruction to make instruction-counting easier. They MUST NOT alter @@ -97,12 +98,13 @@ unspecified registers or condition codes. /* During bringup, we'll use the shadow frame model instead of rFP */ /* single-purpose registers, given names for clarity */ -#define rPC r4 -#define rFP r5 -#define rSELF r6 -#define rINST r7 -#define rIBASE r8 -#define rREFS r11 +#define rPC r4 +#define rFP r5 +#define rSELF r6 +#define rINST r7 +#define rIBASE r8 +#define rPROFILE r10 +#define rREFS r11 /* * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs. So, @@ -116,7 +118,7 @@ unspecified registers or condition codes. #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET) #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET) #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET) -#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET) +#define OFF_FP_SHADOWFRAME OFF_FP(0) /* * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects. Must @@ -329,10 +331,8 @@ unspecified registers or condition codes. ExecuteMterpImpl: .fnstart - .save {r4-r10,fp,lr} - stmfd sp!, {r4-r10,fp,lr} @ save 9 regs - .pad #4 - sub sp, sp, #4 @ align 64 + .save {r3-r10,fp,lr} + stmfd sp!, {r3-r10,fp,lr} @ save 10 regs, (r3 just to align 64) /* Remember the return register */ str r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET] @@ -353,6 +353,12 @@ ExecuteMterpImpl: /* Starting ibase */ ldr rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] + /* Set up for backwards branches & osr profiling */ + ldr r0, [rFP, #OFF_FP_METHOD] + add r1, rFP, #OFF_FP_SHADOWFRAME + bl MterpSetUpHotnessCountdown + mov rPROFILE, r0 @ Starting hotness countdown to rPROFILE + /* start executing the instruction at rPC */ FETCH_INST @ load rINST from rPC GET_INST_OPCODE ip @ extract opcode from rINST @@ -1103,35 +1109,8 @@ artMterpAsmInstructionStart = .L_op_nop * double to get a byte offset. */ /* goto +AA */ - /* tuning: use sbfx for 6t2+ targets */ -#if MTERP_PROFILE_BRANCHES - mov r0, rINST, lsl #16 @ r0<- AAxx0000 - movs rINST, r0, asr #24 @ rINST<- ssssssAA (sign-extended) - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r2, rINST, rINST @ r2<- byte offset, set flags - FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST - @ If backwards branch refresh rIBASE - bmi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#else - mov r0, rINST, lsl #16 @ r0<- AAxx0000 - movs rINST, r0, asr #24 @ rINST<- ssssssAA (sign-extended) - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r2, rINST, rINST @ r2<- byte offset, set flags - FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST - @ If backwards branch refresh rIBASE - bmi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#endif + sbfx rINST, rINST, #8, #8 @ rINST<- ssssssAA (sign-extended) + b MterpCommonTakenBranchNoFlags /* ------------------------------ */ .balign 128 @@ -1144,30 +1123,8 @@ artMterpAsmInstructionStart = .L_op_nop * double to get a byte offset. */ /* goto/16 +AAAA */ -#if MTERP_PROFILE_BRANCHES FETCH_S rINST, 1 @ rINST<- ssssAAAA (sign-extended) - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset, flags set - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#else - FETCH_S rINST, 1 @ rINST<- ssssAAAA (sign-extended) - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset, flags set - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#endif + b MterpCommonTakenBranchNoFlags /* ------------------------------ */ .balign 128 @@ -1185,34 +1142,10 @@ artMterpAsmInstructionStart = .L_op_nop * offset to byte offset. */ /* goto/32 +AAAAAAAA */ -#if MTERP_PROFILE_BRANCHES FETCH r0, 1 @ r0<- aaaa (lo) - FETCH r1, 2 @ r1<- AAAA (hi) - orr rINST, r0, r1, lsl #16 @ rINST<- AAAAaaaa - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - ble MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#else - FETCH r0, 1 @ r0<- aaaa (lo) - FETCH r1, 2 @ r1<- AAAA (hi) - orr rINST, r0, r1, lsl #16 @ rINST<- AAAAaaaa - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - ble MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#endif + FETCH r3, 2 @ r1<- AAAA (hi) + orrs rINST, r0, r3, lsl #16 @ rINST<- AAAAaaaa + b MterpCommonTakenBranch /* ------------------------------ */ .balign 128 @@ -1228,29 +1161,6 @@ artMterpAsmInstructionStart = .L_op_nop * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ -#if MTERP_PROFILE_BRANCHES - FETCH r0, 1 @ r0<- bbbb (lo) - FETCH r1, 2 @ r1<- BBBB (hi) - mov r3, rINST, lsr #8 @ r3<- AA - orr r0, r0, r1, lsl #16 @ r0<- BBBBbbbb - GET_VREG r1, r3 @ r1<- vAA - add r0, rPC, r0, lsl #1 @ r0<- PC + BBBBbbbb*2 - bl MterpDoPackedSwitch @ r0<- code-unit branch offset - mov rINST, r0 - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset; clear V - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - ble MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#else FETCH r0, 1 @ r0<- bbbb (lo) FETCH r1, 2 @ r1<- BBBB (hi) mov r3, rINST, lsr #8 @ r3<- AA @@ -1258,14 +1168,8 @@ artMterpAsmInstructionStart = .L_op_nop GET_VREG r1, r3 @ r1<- vAA add r0, rPC, r0, lsl #1 @ r0<- PC + BBBBbbbb*2 bl MterpDoPackedSwitch @ r0<- code-unit branch offset - mov rINST, r0 - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset; clear V - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - ble MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#endif + movs rINST, r0 + b MterpCommonTakenBranch /* ------------------------------ */ .balign 128 @@ -1282,29 +1186,6 @@ artMterpAsmInstructionStart = .L_op_nop * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ -#if MTERP_PROFILE_BRANCHES - FETCH r0, 1 @ r0<- bbbb (lo) - FETCH r1, 2 @ r1<- BBBB (hi) - mov r3, rINST, lsr #8 @ r3<- AA - orr r0, r0, r1, lsl #16 @ r0<- BBBBbbbb - GET_VREG r1, r3 @ r1<- vAA - add r0, rPC, r0, lsl #1 @ r0<- PC + BBBBbbbb*2 - bl MterpDoSparseSwitch @ r0<- code-unit branch offset - mov rINST, r0 - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset; clear V - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - ble MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#else FETCH r0, 1 @ r0<- bbbb (lo) FETCH r1, 2 @ r1<- BBBB (hi) mov r3, rINST, lsr #8 @ r3<- AA @@ -1312,14 +1193,8 @@ artMterpAsmInstructionStart = .L_op_nop GET_VREG r1, r3 @ r1<- vAA add r0, rPC, r0, lsl #1 @ r0<- PC + BBBBbbbb*2 bl MterpDoSparseSwitch @ r0<- code-unit branch offset - mov rINST, r0 - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - adds r1, rINST, rINST @ r1<- byte offset; clear V - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - ble MterpCheckSuspendAndContinue - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction -#endif + movs rINST, r0 + b MterpCommonTakenBranch /* ------------------------------ */ @@ -1485,22 +1360,6 @@ artMterpAsmInstructionStart = .L_op_nop /* * Compare two 64-bit values. Puts 0, 1, or -1 into the destination * register based on the results of the comparison. - * - * We load the full values with LDM, but in practice many values could - * be resolved by only looking at the high word. This could be made - * faster or slower by splitting the LDM into a pair of LDRs. - * - * If we just wanted to set condition flags, we could do this: - * subs ip, r0, r2 - * sbcs ip, r1, r3 - * subeqs ip, r0, r2 - * Leaving { <0, 0, >0 } in ip. However, we have to set it to a specific - * integer value, which we can do with 2 conditional mov/mvn instructions - * (set 1, set -1; if they're equal we already have 0 in ip), giving - * us a constant 5-cycle path plus a branch at the end to the - * instruction epilogue code. The multi-compare approach below needs - * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch - * in the worst case (the 64-bit values are equal). */ /* cmp-long vAA, vBB, vCC */ FETCH r0, 1 @ r0<- CCBB @@ -1511,13 +1370,16 @@ artMterpAsmInstructionStart = .L_op_nop VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC] ldmia r2, {r0-r1} @ r0/r1<- vBB/vBB+1 ldmia r3, {r2-r3} @ r2/r3<- vCC/vCC+1 - cmp r1, r3 @ compare (vBB+1, vCC+1) - blt .Lop_cmp_long_less @ signed compare on high part - bgt .Lop_cmp_long_greater - subs r1, r0, r2 @ r1<- r0 - r2 - bhi .Lop_cmp_long_greater @ unsigned compare on low part - bne .Lop_cmp_long_less - b .Lop_cmp_long_finish @ equal; r1 already holds 0 + cmp r0, r2 + sbcs ip, r1, r3 @ Sets correct CCs for checking LT (but not EQ/NE) + mov ip, #0 + mvnlt ip, #0 @ -1 + cmpeq r0, r2 @ For correct EQ/NE, we may need to repeat the first CMP + orrne ip, #1 + FETCH_ADVANCE_INST 2 @ advance rPC, load rINST + SET_VREG ip, r9 @ vAA<- ip + GET_INST_OPCODE ip @ extract opcode from rINST + GOTO_OPCODE ip @ jump to next instruction /* ------------------------------ */ .balign 128 @@ -1525,9 +1387,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_eq.S */ /* File: arm/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1535,24 +1396,13 @@ artMterpAsmInstructionStart = .L_op_nop mov r1, rINST, lsr #12 @ r1<- B ubfx r0, rINST, #8, #4 @ r0<- A GET_VREG r3, r1 @ r3<- vB - GET_VREG r2, r0 @ r2<- vA + GET_VREG r0, r0 @ r0<- vA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - cmp r2, r3 @ compare (vA, vB) - movne rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r2, rINST, rINST @ convert to bytes, check sign - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, r3 @ compare (vA, vB) + beq MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -1563,9 +1413,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_ne.S */ /* File: arm/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1573,24 +1422,13 @@ artMterpAsmInstructionStart = .L_op_nop mov r1, rINST, lsr #12 @ r1<- B ubfx r0, rINST, #8, #4 @ r0<- A GET_VREG r3, r1 @ r3<- vB - GET_VREG r2, r0 @ r2<- vA + GET_VREG r0, r0 @ r0<- vA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - cmp r2, r3 @ compare (vA, vB) - moveq rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r2, rINST, rINST @ convert to bytes, check sign - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, r3 @ compare (vA, vB) + bne MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -1601,9 +1439,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_lt.S */ /* File: arm/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1611,24 +1448,13 @@ artMterpAsmInstructionStart = .L_op_nop mov r1, rINST, lsr #12 @ r1<- B ubfx r0, rINST, #8, #4 @ r0<- A GET_VREG r3, r1 @ r3<- vB - GET_VREG r2, r0 @ r2<- vA + GET_VREG r0, r0 @ r0<- vA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - cmp r2, r3 @ compare (vA, vB) - movge rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r2, rINST, rINST @ convert to bytes, check sign - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, r3 @ compare (vA, vB) + blt MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -1639,9 +1465,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_ge.S */ /* File: arm/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1649,24 +1474,13 @@ artMterpAsmInstructionStart = .L_op_nop mov r1, rINST, lsr #12 @ r1<- B ubfx r0, rINST, #8, #4 @ r0<- A GET_VREG r3, r1 @ r3<- vB - GET_VREG r2, r0 @ r2<- vA + GET_VREG r0, r0 @ r0<- vA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - cmp r2, r3 @ compare (vA, vB) - movlt rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r2, rINST, rINST @ convert to bytes, check sign - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, r3 @ compare (vA, vB) + bge MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -1677,9 +1491,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_gt.S */ /* File: arm/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1687,24 +1500,13 @@ artMterpAsmInstructionStart = .L_op_nop mov r1, rINST, lsr #12 @ r1<- B ubfx r0, rINST, #8, #4 @ r0<- A GET_VREG r3, r1 @ r3<- vB - GET_VREG r2, r0 @ r2<- vA + GET_VREG r0, r0 @ r0<- vA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - cmp r2, r3 @ compare (vA, vB) - movle rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r2, rINST, rINST @ convert to bytes, check sign - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, r3 @ compare (vA, vB) + bgt MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -1715,9 +1517,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_le.S */ /* File: arm/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1725,24 +1526,13 @@ artMterpAsmInstructionStart = .L_op_nop mov r1, rINST, lsr #12 @ r1<- B ubfx r0, rINST, #8, #4 @ r0<- A GET_VREG r3, r1 @ r3<- vB - GET_VREG r2, r0 @ r2<- vA + GET_VREG r0, r0 @ r0<- vA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - cmp r2, r3 @ compare (vA, vB) - movgt rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r2, rINST, rINST @ convert to bytes, check sign - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, r3 @ compare (vA, vB) + ble MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -1753,32 +1543,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_eqz.S */ /* File: arm/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ mov r0, rINST, lsr #8 @ r0<- AA - GET_VREG r2, r0 @ r2<- vAA + GET_VREG r0, r0 @ r0<- vAA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - cmp r2, #0 @ compare (vA, 0) - movne rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r1, rINST, rINST @ convert to bytes & set flags - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, #0 @ compare (vA, 0) + beq MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -1789,32 +1567,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_nez.S */ /* File: arm/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ mov r0, rINST, lsr #8 @ r0<- AA - GET_VREG r2, r0 @ r2<- vAA + GET_VREG r0, r0 @ r0<- vAA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - cmp r2, #0 @ compare (vA, 0) - moveq rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r1, rINST, rINST @ convert to bytes & set flags - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, #0 @ compare (vA, 0) + bne MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -1825,32 +1591,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_ltz.S */ /* File: arm/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ mov r0, rINST, lsr #8 @ r0<- AA - GET_VREG r2, r0 @ r2<- vAA + GET_VREG r0, r0 @ r0<- vAA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - cmp r2, #0 @ compare (vA, 0) - movge rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r1, rINST, rINST @ convert to bytes & set flags - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, #0 @ compare (vA, 0) + blt MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -1861,32 +1615,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_gez.S */ /* File: arm/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ mov r0, rINST, lsr #8 @ r0<- AA - GET_VREG r2, r0 @ r2<- vAA + GET_VREG r0, r0 @ r0<- vAA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - cmp r2, #0 @ compare (vA, 0) - movlt rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r1, rINST, rINST @ convert to bytes & set flags - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, #0 @ compare (vA, 0) + bge MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -1897,32 +1639,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_gtz.S */ /* File: arm/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ mov r0, rINST, lsr #8 @ r0<- AA - GET_VREG r2, r0 @ r2<- vAA + GET_VREG r0, r0 @ r0<- vAA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - cmp r2, #0 @ compare (vA, 0) - movle rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r1, rINST, rINST @ convert to bytes & set flags - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, #0 @ compare (vA, 0) + bgt MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -1933,32 +1663,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm/op_if_lez.S */ /* File: arm/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ mov r0, rINST, lsr #8 @ r0<- AA - GET_VREG r2, r0 @ r2<- vAA + GET_VREG r0, r0 @ r0<- vAA FETCH_S rINST, 1 @ rINST<- branch offset, in code units - ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] - cmp r2, #0 @ compare (vA, 0) - movgt rINST, #2 -#if MTERP_PROFILE_BRANCHES - @ TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov r0, rSELF - add r1, rFP, #OFF_FP_SHADOWFRAME - mov r2, rINST - bl MterpProfileBranch @ (self, shadow_frame, offset) - cmp r0, #0 - bne MterpOnStackReplacement @ Note: offset must be in rINST -#endif - adds r1, rINST, rINST @ convert to bytes & set flags - FETCH_ADVANCE_INST_RB r1 @ update rPC, load rINST - bmi MterpCheckSuspendAndContinue + cmp r0, #0 @ compare (vA, 0) + ble MterpCommonTakenBranchNoFlags + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -4711,15 +4429,15 @@ constvalop_long_to_double: VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC] ldmia r2, {r0-r1} @ r0/r1<- vBB/vBB+1 ldmia r3, {r2-r3} @ r2/r3<- vCC/vCC+1 - mul ip, r2, r1 @ ip<- ZxW - umull r9, r10, r2, r0 @ r9/r10 <- ZxX - mla r2, r0, r3, ip @ r2<- YxX + (ZxW) + mul ip, r2, r1 @ ip<- ZxW + umull r1, lr, r2, r0 @ r1/lr <- ZxX + mla r2, r0, r3, ip @ r2<- YxX + (ZxW) mov r0, rINST, lsr #8 @ r0<- AA - add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX)) + add r2, r2, lr @ r2<- lr + low(ZxW + (YxX)) VREG_INDEX_TO_ADDR r0, r0 @ r0<- &fp[AA] FETCH_ADVANCE_INST 2 @ advance rPC, load rINST GET_INST_OPCODE ip @ extract opcode from rINST - stmia r0, {r9-r10} @ vAA/vAA+1<- r9/r10 + stmia r0, {r1-r2 } @ vAA/vAA+1<- r1/r2 GOTO_OPCODE ip @ jump to next instruction /* ------------------------------ */ @@ -5877,14 +5595,14 @@ constvalop_long_to_double: VREG_INDEX_TO_ADDR rINST, r9 @ rINST<- &fp[A] ldmia r1, {r2-r3} @ r2/r3<- vBB/vBB+1 ldmia rINST, {r0-r1} @ r0/r1<- vAA/vAA+1 - mul ip, r2, r1 @ ip<- ZxW - umull r9, r10, r2, r0 @ r9/r10 <- ZxX - mla r2, r0, r3, ip @ r2<- YxX + (ZxW) + mul ip, r2, r1 @ ip<- ZxW + umull r1, lr, r2, r0 @ r1/lr <- ZxX + mla r2, r0, r3, ip @ r2<- YxX + (ZxW) mov r0, rINST @ r0<- &fp[A] (free up rINST) FETCH_ADVANCE_INST 1 @ advance rPC, load rINST - add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX)) + add r2, r2, lr @ r2<- r2 + low(ZxW + (YxX)) GET_INST_OPCODE ip @ extract opcode from rINST - stmia r0, {r9-r10} @ vAA/vAA+1<- r9/r10 + stmia r0, {r1-r2} @ vAA/vAA+1<- r1/r2 GOTO_OPCODE ip @ jump to next instruction /* ------------------------------ */ @@ -7616,27 +7334,6 @@ artMterpAsmInstructionEnd: .balign 4 artMterpAsmSisterStart: -/* continuation for op_cmp_long */ - -.Lop_cmp_long_less: - mvn r1, #0 @ r1<- -1 - @ Want to cond code the next mov so we can avoid branch, but don't see it; - @ instead, we just replicate the tail end. - FETCH_ADVANCE_INST 2 @ advance rPC, load rINST - SET_VREG r1, r9 @ vAA<- r1 - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction - -.Lop_cmp_long_greater: - mov r1, #1 @ r1<- 1 - @ fall through to _finish - -.Lop_cmp_long_finish: - FETCH_ADVANCE_INST 2 @ advance rPC, load rINST - SET_VREG r1, r9 @ vAA<- r1 - GET_INST_OPCODE ip @ extract opcode from rINST - GOTO_OPCODE ip @ jump to next instruction - /* continuation for op_float_to_long */ /* * Convert the float in r0 to a long in r0/r1. @@ -12207,21 +11904,117 @@ MterpException: /* NOTE: no fallthrough */ /* - * Check for suspend check request. Assumes rINST already loaded, rPC advanced and - * still needs to get the opcode and branch to it, and flags are in lr. + * Common handling for branches with support for Jit profiling. + * On entry: + * rINST <= signed offset + * rPROFILE <= signed hotness countdown (expanded to 32 bits) + * condition bits <= set to establish sign of offset (use "NoFlags" entry if not) + * + * We have quite a few different cases for branch profiling, OSR detection and + * suspend check support here. + * + * Taken backward branches: + * If profiling active, do hotness countdown and report if we hit zero. + * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. + * Is there a pending suspend request? If so, suspend. + * + * Taken forward branches and not-taken backward branches: + * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. + * + * Our most common case is expected to be a taken backward branch with active jit profiling, + * but no full OSR check and no pending suspend request. + * Next most common case is not-taken branch with no full OSR check. + * + */ +MterpCommonTakenBranchNoFlags: + cmp rINST, #0 +MterpCommonTakenBranch: + bgt .L_forward_branch @ don't add forward branches to hotness +/* + * We need to subtract 1 from positive values and we should not see 0 here, + * so we may use the result of the comparison with -1. */ -MterpCheckSuspendAndContinue: - ldr rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh rIBASE +#if JIT_CHECK_OSR != -1 +# error "JIT_CHECK_OSR must be -1." +#endif + cmp rPROFILE, #JIT_CHECK_OSR + beq .L_osr_check + subgts rPROFILE, #1 + beq .L_add_batch @ counted down to zero - report +.L_resume_backward_branch: + ldr lr, [rSELF, #THREAD_FLAGS_OFFSET] + REFRESH_IBASE + add r2, rINST, rINST @ r2<- byte offset + FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST ands lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST) - bne 1f + bne .L_suspend_request_pending GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction -1: + +.L_suspend_request_pending: EXPORT_PC mov r0, rSELF bl MterpSuspendCheck @ (self) cmp r0, #0 bne MterpFallback + REFRESH_IBASE @ might have changed during suspend + GET_INST_OPCODE ip @ extract opcode from rINST + GOTO_OPCODE ip @ jump to next instruction + +.L_no_count_backwards: + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + bne .L_resume_backward_branch +.L_osr_check: + mov r0, rSELF + add r1, rFP, #OFF_FP_SHADOWFRAME + mov r2, rINST + EXPORT_PC + bl MterpMaybeDoOnStackReplacement @ (self, shadow_frame, offset) + cmp r0, #0 + bne MterpOnStackReplacement + b .L_resume_backward_branch + +.L_forward_branch: + cmp rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry? + beq .L_check_osr_forward +.L_resume_forward_branch: + add r2, rINST, rINST @ r2<- byte offset + FETCH_ADVANCE_INST_RB r2 @ update rPC, load rINST + GET_INST_OPCODE ip @ extract opcode from rINST + GOTO_OPCODE ip @ jump to next instruction + +.L_check_osr_forward: + mov r0, rSELF + add r1, rFP, #OFF_FP_SHADOWFRAME + mov r2, rINST + EXPORT_PC + bl MterpMaybeDoOnStackReplacement @ (self, shadow_frame, offset) + cmp r0, #0 + bne MterpOnStackReplacement + b .L_resume_forward_branch + +.L_add_batch: + add r1, rFP, #OFF_FP_SHADOWFRAME + strh rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] + ldr r0, [rFP, #OFF_FP_METHOD] + mov r2, rSELF + bl MterpAddHotnessBatch @ (method, shadow_frame, self) + mov rPROFILE, r0 @ restore new hotness countdown to rPROFILE + b .L_no_count_backwards + +/* + * Entered from the conditional branch handlers when OSR check request active on + * not-taken path. All Dalvik not-taken conditional branch offsets are 2. + */ +.L_check_not_taken_osr: + mov r0, rSELF + add r1, rFP, #OFF_FP_SHADOWFRAME + mov r2, #2 + EXPORT_PC + bl MterpMaybeDoOnStackReplacement @ (self, shadow_frame, offset) + cmp r0, #0 + bne MterpOnStackReplacement + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -12269,9 +12062,27 @@ MterpReturn: str r1, [r2, #4] mov r0, #1 @ signal return to caller. MterpDone: - add sp, sp, #4 @ un-align 64 - ldmfd sp!, {r4-r10,fp,pc} @ restore 9 regs and return +/* + * At this point, we expect rPROFILE to be non-zero. If negative, hotness is disabled or we're + * checking for OSR. If greater than zero, we might have unreported hotness to register + * (the difference between the ending rPROFILE and the cached hotness counter). rPROFILE + * should only reach zero immediately after a hotness decrement, and is then reset to either + * a negative special state or the new non-zero countdown value. + */ + cmp rPROFILE, #0 + bgt MterpProfileActive @ if > 0, we may have some counts to report. + ldmfd sp!, {r3-r10,fp,pc} @ restore 10 regs and return +MterpProfileActive: + mov rINST, r0 @ stash return value + /* Report cached hotness counts */ + ldr r0, [rFP, #OFF_FP_METHOD] + add r1, rFP, #OFF_FP_SHADOWFRAME + mov r2, rSELF + strh rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] + bl MterpAddHotnessBatch @ (method, shadow_frame, self) + mov r0, rINST @ restore return value + ldmfd sp!, {r3-r10,fp,pc} @ restore 10 regs and return .fnend .size ExecuteMterpImpl, .-ExecuteMterpImpl diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S index 6ae59d857f..55797e676f 100644 --- a/runtime/interpreter/mterp/out/mterp_arm64.S +++ b/runtime/interpreter/mterp/out/mterp_arm64.S @@ -81,6 +81,7 @@ The following registers have fixed assignments: x23 xINST first 16-bit code unit of current instruction x24 xIBASE interpreted instruction base pointer, used for computed goto x25 xREFS base of object references in shadow frame (ideally, we'll get rid of this later). + x26 wPROFILE jit profile hotness countdown x16 ip scratch reg x17 ip2 scratch reg (used by macros) @@ -99,15 +100,17 @@ codes. /* During bringup, we'll use the shadow frame model instead of xFP */ /* single-purpose registers, given names for clarity */ -#define xPC x20 -#define xFP x21 -#define xSELF x22 -#define xINST x23 -#define wINST w23 -#define xIBASE x24 -#define xREFS x25 -#define ip x16 -#define ip2 x17 +#define xPC x20 +#define xFP x21 +#define xSELF x22 +#define xINST x23 +#define wINST w23 +#define xIBASE x24 +#define xREFS x25 +#define wPROFILE w26 +#define xPROFILE x26 +#define ip x16 +#define ip2 x17 /* * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs. So, @@ -121,7 +124,7 @@ codes. #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET) #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET) #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET) -#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET) +#define OFF_FP_SHADOWFRAME OFF_FP(0) /* * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects. Must @@ -323,11 +326,12 @@ codes. ExecuteMterpImpl: .cfi_startproc - stp xIBASE, xREFS, [sp, #-64]! - stp xSELF, xINST, [sp, #16] - stp xPC, xFP, [sp, #32] - stp fp, lr, [sp, #48] - add fp, sp, #48 + stp xPROFILE, x27, [sp, #-80]! + stp xIBASE, xREFS, [sp, #16] + stp xSELF, xINST, [sp, #32] + stp xPC, xFP, [sp, #48] + stp fp, lr, [sp, #64] + add fp, sp, #64 /* Remember the return register */ str x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET] @@ -348,6 +352,12 @@ ExecuteMterpImpl: /* Starting ibase */ ldr xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] + /* Set up for backwards branches & osr profiling */ + ldr x0, [xFP, #OFF_FP_METHOD] + add x1, xFP, #OFF_FP_SHADOWFRAME + bl MterpSetUpHotnessCountdown + mov wPROFILE, w0 // Starting hotness countdown to xPROFILE + /* start executing the instruction at rPC */ FETCH_INST // load wINST from rPC GET_INST_OPCODE ip // extract opcode from wINST @@ -1081,24 +1091,8 @@ artMterpAsmInstructionStart = .L_op_nop * double to get a byte offset. */ /* goto +AA */ - /* tuning: use sbfx for 6t2+ targets */ - lsl w0, wINST, #16 // w0<- AAxx0000 - asr wINST, w0, #24 // wINST<- ssssssAA (sign-extended) -#if MTERP_PROFILE_BRANCHES - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in wINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] // Preload flags for MterpCheckSuspendAndContinue - adds w1, wINST, wINST // Convert dalvik offset to byte offset, setting flags - FETCH_ADVANCE_INST_RB w1 // load wINST and advance xPC - // If backwards branch refresh rIBASE - b.mi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip // extract opcode from wINST - GOTO_OPCODE ip // jump to next instruction + sbfx wINST, wINST, #8, #8 // wINST<- ssssssAA (sign-extended) + b MterpCommonTakenBranchNoFlags /* ------------------------------ */ .balign 128 @@ -1112,20 +1106,7 @@ artMterpAsmInstructionStart = .L_op_nop */ /* goto/16 +AAAA */ FETCH_S wINST, 1 // wINST<- ssssAAAA (sign-extended) -#if MTERP_PROFILE_BRANCHES - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in xINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w1, wINST, wINST // w1<- byte offset, flags set - FETCH_ADVANCE_INST_RB w1 // update rPC, load rINST - b.mi MterpCheckSuspendAndContinue - GET_INST_OPCODE ip // extract opcode from rINST - GOTO_OPCODE ip // jump to next instruction + b MterpCommonTakenBranchNoFlags /* ------------------------------ */ .balign 128 @@ -1146,20 +1127,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w0, 1 // w0<- aaaa (lo) FETCH w1, 2 // w1<- AAAA (hi) orr wINST, w0, w1, lsl #16 // wINST<- AAAAaaaa -#if MTERP_PROFILE_BRANCHES - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in xINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w1, wINST, wINST // w1<- byte offset - FETCH_ADVANCE_INST_RB w1 // update rPC, load xINST - b.le MterpCheckSuspendAndContinue - GET_INST_OPCODE ip // extract opcode from xINST - GOTO_OPCODE ip // jump to next instruction + b MterpCommonTakenBranchNoFlags /* ------------------------------ */ .balign 128 @@ -1183,20 +1151,7 @@ artMterpAsmInstructionStart = .L_op_nop add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2 bl MterpDoPackedSwitch // w0<- code-unit branch offset sbfm xINST, x0, 0, 31 -#if MTERP_PROFILE_BRANCHES - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - mov x2, xINST - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w1, wINST, wINST // w1<- byte offset; clear V - FETCH_ADVANCE_INST_RB w1 // update rPC, load wINST - b.le MterpCheckSuspendAndContinue - GET_INST_OPCODE ip // extract opcode from wINST - GOTO_OPCODE ip // jump to next instruction + b MterpCommonTakenBranchNoFlags /* ------------------------------ */ .balign 128 @@ -1221,20 +1176,7 @@ artMterpAsmInstructionStart = .L_op_nop add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2 bl MterpDoSparseSwitch // w0<- code-unit branch offset sbfm xINST, x0, 0, 31 -#if MTERP_PROFILE_BRANCHES - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - mov x2, xINST - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w1, wINST, wINST // w1<- byte offset; clear V - FETCH_ADVANCE_INST_RB w1 // update rPC, load wINST - b.le MterpCheckSuspendAndContinue - GET_INST_OPCODE ip // extract opcode from wINST - GOTO_OPCODE ip // jump to next instruction + b MterpCommonTakenBranchNoFlags /* ------------------------------ */ @@ -1365,9 +1307,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_eq.S */ /* File: arm64/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1376,23 +1317,12 @@ artMterpAsmInstructionStart = .L_op_nop ubfx w0, wINST, #8, #4 // w0<- A GET_VREG w3, w1 // w3<- vB GET_VREG w2, w0 // w2<- vA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Offset if branch not taken + FETCH_S wINST, 1 // wINST<- branch offset, in code units cmp w2, w3 // compare (vA, vB) - csel wINST, w1, w0, eq // Branch if true, stashing result in callee save reg. -#if MTERP_PROFILE_BRANCHES - // TUINING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 // Sign extend branch offset - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in xINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes, check sign - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.eq MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -1403,9 +1333,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_ne.S */ /* File: arm64/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1414,23 +1343,12 @@ artMterpAsmInstructionStart = .L_op_nop ubfx w0, wINST, #8, #4 // w0<- A GET_VREG w3, w1 // w3<- vB GET_VREG w2, w0 // w2<- vA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Offset if branch not taken + FETCH_S wINST, 1 // wINST<- branch offset, in code units cmp w2, w3 // compare (vA, vB) - csel wINST, w1, w0, ne // Branch if true, stashing result in callee save reg. -#if MTERP_PROFILE_BRANCHES - // TUINING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 // Sign extend branch offset - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in xINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes, check sign - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.ne MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -1441,9 +1359,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_lt.S */ /* File: arm64/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1452,23 +1369,12 @@ artMterpAsmInstructionStart = .L_op_nop ubfx w0, wINST, #8, #4 // w0<- A GET_VREG w3, w1 // w3<- vB GET_VREG w2, w0 // w2<- vA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Offset if branch not taken + FETCH_S wINST, 1 // wINST<- branch offset, in code units cmp w2, w3 // compare (vA, vB) - csel wINST, w1, w0, lt // Branch if true, stashing result in callee save reg. -#if MTERP_PROFILE_BRANCHES - // TUINING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 // Sign extend branch offset - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in xINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes, check sign - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.lt MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -1479,9 +1385,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_ge.S */ /* File: arm64/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1490,23 +1395,12 @@ artMterpAsmInstructionStart = .L_op_nop ubfx w0, wINST, #8, #4 // w0<- A GET_VREG w3, w1 // w3<- vB GET_VREG w2, w0 // w2<- vA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Offset if branch not taken + FETCH_S wINST, 1 // wINST<- branch offset, in code units cmp w2, w3 // compare (vA, vB) - csel wINST, w1, w0, ge // Branch if true, stashing result in callee save reg. -#if MTERP_PROFILE_BRANCHES - // TUINING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 // Sign extend branch offset - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in xINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes, check sign - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.ge MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -1517,9 +1411,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_gt.S */ /* File: arm64/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1528,23 +1421,12 @@ artMterpAsmInstructionStart = .L_op_nop ubfx w0, wINST, #8, #4 // w0<- A GET_VREG w3, w1 // w3<- vB GET_VREG w2, w0 // w2<- vA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Offset if branch not taken + FETCH_S wINST, 1 // wINST<- branch offset, in code units cmp w2, w3 // compare (vA, vB) - csel wINST, w1, w0, gt // Branch if true, stashing result in callee save reg. -#if MTERP_PROFILE_BRANCHES - // TUINING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 // Sign extend branch offset - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in xINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes, check sign - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.gt MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -1555,9 +1437,8 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_le.S */ /* File: arm64/bincmp.S */ /* - * Generic two-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic two-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le */ @@ -1566,23 +1447,12 @@ artMterpAsmInstructionStart = .L_op_nop ubfx w0, wINST, #8, #4 // w0<- A GET_VREG w3, w1 // w3<- vB GET_VREG w2, w0 // w2<- vA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Offset if branch not taken + FETCH_S wINST, 1 // wINST<- branch offset, in code units cmp w2, w3 // compare (vA, vB) - csel wINST, w1, w0, le // Branch if true, stashing result in callee save reg. -#if MTERP_PROFILE_BRANCHES - // TUINING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 // Sign extend branch offset - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in xINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes, check sign - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.le MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -1593,32 +1463,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_eqz.S */ /* File: arm64/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Branch offset if not taken + FETCH_S wINST, 1 // w1<- branch offset, in code units cmp w2, #0 // compare (vA, 0) - csel wINST, w1, w0, eq // Branch if true, stashing result in callee save reg -#if MTERP_PROFILE_BRANCHES - // TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in wINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes & set flags - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.eq MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -1629,32 +1487,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_nez.S */ /* File: arm64/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Branch offset if not taken + FETCH_S wINST, 1 // w1<- branch offset, in code units cmp w2, #0 // compare (vA, 0) - csel wINST, w1, w0, ne // Branch if true, stashing result in callee save reg -#if MTERP_PROFILE_BRANCHES - // TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in wINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes & set flags - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.ne MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -1665,32 +1511,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_ltz.S */ /* File: arm64/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Branch offset if not taken + FETCH_S wINST, 1 // w1<- branch offset, in code units cmp w2, #0 // compare (vA, 0) - csel wINST, w1, w0, lt // Branch if true, stashing result in callee save reg -#if MTERP_PROFILE_BRANCHES - // TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in wINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes & set flags - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.lt MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -1701,32 +1535,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_gez.S */ /* File: arm64/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Branch offset if not taken + FETCH_S wINST, 1 // w1<- branch offset, in code units cmp w2, #0 // compare (vA, 0) - csel wINST, w1, w0, ge // Branch if true, stashing result in callee save reg -#if MTERP_PROFILE_BRANCHES - // TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in wINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes & set flags - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.ge MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -1737,32 +1559,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_gtz.S */ /* File: arm64/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Branch offset if not taken + FETCH_S wINST, 1 // w1<- branch offset, in code units cmp w2, #0 // compare (vA, 0) - csel wINST, w1, w0, gt // Branch if true, stashing result in callee save reg -#if MTERP_PROFILE_BRANCHES - // TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in wINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes & set flags - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.gt MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -1773,32 +1583,20 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_if_lez.S */ /* File: arm64/zcmp.S */ /* - * Generic one-operand compare-and-branch operation. Provide a "revcmp" - * fragment that specifies the *reverse* comparison to perform, e.g. - * for "if-le" you would use "gt". + * Generic one-operand compare-and-branch operation. Provide a "condition" + * fragment that specifies the comparison to perform. * * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez */ /* if-cmp vAA, +BBBB */ lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA - FETCH_S w1, 1 // w1<- branch offset, in code units - mov w0, #2 // Branch offset if not taken + FETCH_S wINST, 1 // w1<- branch offset, in code units cmp w2, #0 // compare (vA, 0) - csel wINST, w1, w0, le // Branch if true, stashing result in callee save reg -#if MTERP_PROFILE_BRANCHES - // TUNING: once measurements are complete, remove #if and hand-schedule. - EXPORT_PC - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 - bl MterpProfileBranch // (self, shadow_frame, offset) - cbnz w0, MterpOnStackReplacement // Note: offset must be in wINST -#endif - ldr w7, [xSELF, #THREAD_FLAGS_OFFSET] - adds w2, wINST, wINST // convert to bytes & set flags - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - b.mi MterpCheckSuspendAndContinue + b.le MterpCommonTakenBranchNoFlags + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_not_taken_osr + FETCH_ADVANCE_INST 2 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -11596,6 +11394,107 @@ MterpException: GET_INST_OPCODE ip GOTO_OPCODE ip /* NOTE: no fallthrough */ +/* + * Common handling for branches with support for Jit profiling. + * On entry: + * wINST <= signed offset + * wPROFILE <= signed hotness countdown (expanded to 32 bits) + * condition bits <= set to establish sign of offset (use "NoFlags" entry if not) + * + * We have quite a few different cases for branch profiling, OSR detection and + * suspend check support here. + * + * Taken backward branches: + * If profiling active, do hotness countdown and report if we hit zero. + * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. + * Is there a pending suspend request? If so, suspend. + * + * Taken forward branches and not-taken backward branches: + * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. + * + * Our most common case is expected to be a taken backward branch with active jit profiling, + * but no full OSR check and no pending suspend request. + * Next most common case is not-taken branch with no full OSR check. + * + */ +MterpCommonTakenBranchNoFlags: + cmp wINST, #0 + b.gt .L_forward_branch // don't add forward branches to hotness + tbnz wPROFILE, #31, .L_no_count_backwards // go if negative + subs wPROFILE, wPROFILE, #1 // countdown + b.eq .L_add_batch // counted down to zero - report +.L_resume_backward_branch: + ldr lr, [xSELF, #THREAD_FLAGS_OFFSET] + add w2, wINST, wINST // w2<- byte offset + FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST + REFRESH_IBASE + ands lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST) + b.ne .L_suspend_request_pending + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + +.L_suspend_request_pending: + EXPORT_PC + mov x0, xSELF + bl MterpSuspendCheck // (self) + cbnz x0, MterpFallback + REFRESH_IBASE // might have changed during suspend + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + +.L_no_count_backwards: + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.ne .L_resume_backward_branch + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, xINST + EXPORT_PC + bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) + cbnz x0, MterpOnStackReplacement + b .L_resume_backward_branch + +.L_forward_branch: + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_osr_forward +.L_resume_forward_branch: + add w2, wINST, wINST // w2<- byte offset + FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + +.L_check_osr_forward: + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, xINST + EXPORT_PC + bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) + cbnz x0, MterpOnStackReplacement + b .L_resume_forward_branch + +.L_add_batch: + add x1, xFP, #OFF_FP_SHADOWFRAME + strh wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] + ldr x0, [xFP, #OFF_FP_METHOD] + mov x2, xSELF + bl MterpAddHotnessBatch // (method, shadow_frame, self) + mov wPROFILE, w0 // restore new hotness countdown to wPROFILE + b .L_no_count_backwards + +/* + * Entered from the conditional branch handlers when OSR check request active on + * not-taken path. All Dalvik not-taken conditional branch offsets are 2. + */ +.L_check_not_taken_osr: + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, #2 + EXPORT_PC + bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) + cbnz x0, MterpOnStackReplacement + FETCH_ADVANCE_INST 2 + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + /* * Check for suspend check request. Assumes wINST already loaded, xPC advanced and @@ -11664,10 +11563,36 @@ MterpReturn: check2: mov x0, #1 // signal return to caller. MterpDone: - ldp fp, lr, [sp, #48] - ldp xPC, xFP, [sp, #32] - ldp xSELF, xINST, [sp, #16] - ldp xIBASE, xREFS, [sp], #64 +/* + * At this point, we expect wPROFILE to be non-zero. If negative, hotness is disabled or we're + * checking for OSR. If greater than zero, we might have unreported hotness to register + * (the difference between the ending wPROFILE and the cached hotness counter). wPROFILE + * should only reach zero immediately after a hotness decrement, and is then reset to either + * a negative special state or the new non-zero countdown value. + */ + cmp wPROFILE, #0 + bgt MterpProfileActive // if > 0, we may have some counts to report. + ldp fp, lr, [sp, #64] + ldp xPC, xFP, [sp, #48] + ldp xSELF, xINST, [sp, #32] + ldp xIBASE, xREFS, [sp, #16] + ldp xPROFILE, x27, [sp], #80 + ret + +MterpProfileActive: + mov xINST, x0 // stash return value + /* Report cached hotness counts */ + ldr x0, [xFP, #OFF_FP_METHOD] + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, xSELF + strh wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] + bl MterpAddHotnessBatch // (method, shadow_frame, self) + mov x0, xINST // restore return value + ldp fp, lr, [sp, #64] + ldp xPC, xFP, [sp, #48] + ldp xSELF, xINST, [sp, #32] + ldp xIBASE, xREFS, [sp, #16] + ldp xPROFILE, x27, [sp], #80 ret .cfi_endproc diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc index 4615ec9aa4..80ffedcaa0 100644 --- a/runtime/interpreter/unstarted_runtime.cc +++ b/runtime/interpreter/unstarted_runtime.cc @@ -524,9 +524,7 @@ void UnstartedRuntime::UnstartedThreadLocalGet( } } -void UnstartedRuntime::UnstartedMathCeil( - Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) { - double in = shadow_frame->GetVRegDouble(arg_offset); +static double ComputeCeil(double in) { double out; // Special cases: // 1) NaN, infinity, +0, -0 -> out := in. All are guaranteed by cmath. @@ -536,7 +534,21 @@ void UnstartedRuntime::UnstartedMathCeil( } else { out = ceil(in); } - result->SetD(out); + return out; +} + +void UnstartedRuntime::UnstartedMathCeil( + Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) { + double in = shadow_frame->GetVRegDouble(arg_offset); + result->SetD(ComputeCeil(in)); +} + +void UnstartedRuntime::UnstartedMathFloor( + Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) { + double in = shadow_frame->GetVRegDouble(arg_offset); + // From the JavaDocs: + // "Note that the value of Math.ceil(x) is exactly the value of -Math.floor(-x)." + result->SetD(-ComputeCeil(-in)); } void UnstartedRuntime::UnstartedObjectHashCode( diff --git a/runtime/interpreter/unstarted_runtime_list.h b/runtime/interpreter/unstarted_runtime_list.h index a3ed5581f0..3312701135 100644 --- a/runtime/interpreter/unstarted_runtime_list.h +++ b/runtime/interpreter/unstarted_runtime_list.h @@ -36,6 +36,7 @@ V(SystemGetSecurityManager, "java.lang.SecurityManager java.lang.System.getSecurityManager()") \ V(ThreadLocalGet, "java.lang.Object java.lang.ThreadLocal.get()") \ V(MathCeil, "double java.lang.Math.ceil(double)") \ + V(MathFloor, "double java.lang.Math.floor(double)") \ V(ObjectHashCode, "int java.lang.Object.hashCode()") \ V(DoubleDoubleToRawLongBits, "long java.lang.Double.doubleToRawLongBits(double)") \ V(DexCacheGetDexNative, "com.android.dex.Dex java.lang.DexCache.getDexNative()") \ diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index 73aaf0496e..4db9f71a90 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -208,7 +208,7 @@ bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool osr) { return false; } bool success = jit_compile_method_(jit_compiler_handle_, method_to_compile, self, osr); - code_cache_->DoneCompiling(method_to_compile, self); + code_cache_->DoneCompiling(method_to_compile, self, osr); return success; } @@ -319,11 +319,6 @@ bool Jit::MaybeDoOnStackReplacement(Thread* thread, return false; } - if (kRuntimeISA == kMips || kRuntimeISA == kMips64) { - VLOG(jit) << "OSR not supported on this platform: " << kRuntimeISA; - return false; - } - if (UNLIKELY(__builtin_frame_address(0) < thread->GetStackEnd())) { // Don't attempt to do an OSR if we are close to the stack limit. Since // the interpreter frames are still on stack, OSR has the potential diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index 37ff6a5dd6..820ae6acab 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -927,12 +927,12 @@ bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr return false; } - if (info->IsMethodBeingCompiled()) { + if (info->IsMethodBeingCompiled(osr)) { VLOG(jit) << PrettyMethod(method) << " is already being compiled"; return false; } - info->SetIsMethodBeingCompiled(true); + info->SetIsMethodBeingCompiled(true, osr); return true; } @@ -952,10 +952,10 @@ void JitCodeCache::DoneCompilerUse(ArtMethod* method, Thread* self) { info->DecrementInlineUse(); } -void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED) { +void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED, bool osr) { ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*)); - DCHECK(info->IsMethodBeingCompiled()); - info->SetIsMethodBeingCompiled(false); + DCHECK(info->IsMethodBeingCompiled(osr)); + info->SetIsMethodBeingCompiled(false, osr); } size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) { diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index 6faa8f15b6..9f18c700d4 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -80,7 +80,7 @@ class JitCodeCache { SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_); - void DoneCompiling(ArtMethod* method, Thread* self) + void DoneCompiling(ArtMethod* method, Thread* self, bool osr) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_); diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc index d751e5aae9..d2180c7ce2 100644 --- a/runtime/jit/jit_instrumentation.cc +++ b/runtime/jit/jit_instrumentation.cc @@ -80,9 +80,9 @@ class JitCompileTask FINAL : public Task { DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask); }; -JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold, - size_t warm_method_threshold, - size_t osr_method_threshold) +JitInstrumentationCache::JitInstrumentationCache(uint16_t hot_method_threshold, + uint16_t warm_method_threshold, + uint16_t osr_method_threshold) : hot_method_threshold_(hot_method_threshold), warm_method_threshold_(warm_method_threshold), osr_method_threshold_(osr_method_threshold), @@ -130,44 +130,61 @@ void JitInstrumentationCache::DeleteThreadPool(Thread* self) { } } -void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) { +void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, uint16_t count) { // Since we don't have on-stack replacement, some methods can remain in the interpreter longer - // than we want resulting in samples even after the method is compiled. - if (method->IsClassInitializer() || method->IsNative()) { + // than we want resulting in samples even after the method is compiled. Also, if the + // jit is no longer interested in hotness samples because we're shutting down, just return. + if (method->IsClassInitializer() || method->IsNative() || (thread_pool_ == nullptr)) { + if (thread_pool_ == nullptr) { + // Should only see this when shutting down. + DCHECK(Runtime::Current()->IsShuttingDown(self)); + } return; } DCHECK(thread_pool_ != nullptr); + DCHECK_GT(warm_method_threshold_, 0); + DCHECK_GT(hot_method_threshold_, warm_method_threshold_); + DCHECK_GT(osr_method_threshold_, hot_method_threshold_); + + int32_t starting_count = method->GetCounter(); + int32_t new_count = starting_count + count; // int32 here to avoid wrap-around; + if (starting_count < warm_method_threshold_) { + if (new_count >= warm_method_threshold_) { + bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false); + if (success) { + VLOG(jit) << "Start profiling " << PrettyMethod(method); + } - uint16_t sample_count = method->IncrementCounter(); - if (sample_count == warm_method_threshold_) { - bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false); - if (success) { - VLOG(jit) << "Start profiling " << PrettyMethod(method); - } + if (thread_pool_ == nullptr) { + // Calling ProfilingInfo::Create might put us in a suspended state, which could + // lead to the thread pool being deleted when we are shutting down. + DCHECK(Runtime::Current()->IsShuttingDown(self)); + return; + } - if (thread_pool_ == nullptr) { - // Calling ProfilingInfo::Create might put us in a suspended state, which could - // lead to the thread pool being deleted when we are shutting down. - DCHECK(Runtime::Current()->IsShuttingDown(self)); - return; + if (!success) { + // We failed allocating. Instead of doing the collection on the Java thread, we push + // an allocation to a compiler thread, that will do the collection. + thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile)); + } } - - if (!success) { - // We failed allocating. Instead of doing the collection on the Java thread, we push - // an allocation to a compiler thread, that will do the collection. - thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile)); + // Avoid jumping more than one state at a time. + new_count = std::min(new_count, hot_method_threshold_ - 1); + } else if (starting_count < hot_method_threshold_) { + if (new_count >= hot_method_threshold_) { + DCHECK(thread_pool_ != nullptr); + thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile)); + } + // Avoid jumping more than one state at a time. + new_count = std::min(new_count, osr_method_threshold_ - 1); + } else if (starting_count < osr_method_threshold_) { + if (new_count >= osr_method_threshold_) { + DCHECK(thread_pool_ != nullptr); + thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr)); } } - - if (sample_count == hot_method_threshold_) { - DCHECK(thread_pool_ != nullptr); - thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile)); - } - - if (sample_count == osr_method_threshold_) { - DCHECK(thread_pool_ != nullptr); - thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr)); - } + // Update hotness counter + method->SetCounter(new_count); } JitInstrumentationListener::JitInstrumentationListener(JitInstrumentationCache* cache) diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h index d1c5c44a07..7ffd4ebb84 100644 --- a/runtime/jit/jit_instrumentation.h +++ b/runtime/jit/jit_instrumentation.h @@ -40,6 +40,8 @@ union JValue; class Thread; namespace jit { +static constexpr int16_t kJitCheckForOSR = -1; +static constexpr int16_t kJitHotnessDisabled = -2; class JitInstrumentationCache; @@ -84,7 +86,6 @@ class JitInstrumentationListener : public instrumentation::InstrumentationListen static constexpr uint32_t kJitEvents = instrumentation::Instrumentation::kMethodEntered | - instrumentation::Instrumentation::kBranch | instrumentation::Instrumentation::kInvokeVirtualOrInterface; private: @@ -96,25 +97,33 @@ class JitInstrumentationListener : public instrumentation::InstrumentationListen // Keeps track of which methods are hot. class JitInstrumentationCache { public: - JitInstrumentationCache(size_t hot_method_threshold, - size_t warm_method_threshold, - size_t osr_method_threshold); - void AddSamples(Thread* self, ArtMethod* method, size_t samples) + JitInstrumentationCache(uint16_t hot_method_threshold, + uint16_t warm_method_threshold, + uint16_t osr_method_threshold); + void AddSamples(Thread* self, ArtMethod* method, uint16_t samples) SHARED_REQUIRES(Locks::mutator_lock_); void CreateThreadPool(); void DeleteThreadPool(Thread* self); + size_t OSRMethodThreshold() const { + return osr_method_threshold_; + } + size_t HotMethodThreshold() const { return hot_method_threshold_; } + size_t WarmMethodThreshold() const { + return warm_method_threshold_; + } + // Wait until there is no more pending compilation tasks. void WaitForCompilationToFinish(Thread* self); private: - size_t hot_method_threshold_; - size_t warm_method_threshold_; - size_t osr_method_threshold_; + uint16_t hot_method_threshold_; + uint16_t warm_method_threshold_; + uint16_t osr_method_threshold_; JitInstrumentationListener listener_; std::unique_ptr<ThreadPool> thread_pool_; diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h index 55d627ab48..3a71bbaec1 100644 --- a/runtime/jit/profiling_info.h +++ b/runtime/jit/profiling_info.h @@ -119,12 +119,18 @@ class ProfilingInfo { InlineCache* GetInlineCache(uint32_t dex_pc); - bool IsMethodBeingCompiled() const { - return is_method_being_compiled_; + bool IsMethodBeingCompiled(bool osr) const { + return osr + ? is_osr_method_being_compiled_ + : is_method_being_compiled_; } - void SetIsMethodBeingCompiled(bool value) { - is_method_being_compiled_ = value; + void SetIsMethodBeingCompiled(bool value, bool osr) { + if (osr) { + is_osr_method_being_compiled_ = value; + } else { + is_method_being_compiled_ = value; + } } void SetSavedEntryPoint(const void* entry_point) { @@ -155,7 +161,8 @@ class ProfilingInfo { } bool IsInUseByCompiler() const { - return IsMethodBeingCompiled() || (current_inline_uses_ > 0); + return IsMethodBeingCompiled(/*osr*/ true) || IsMethodBeingCompiled(/*osr*/ false) || + (current_inline_uses_ > 0); } private: @@ -181,6 +188,7 @@ class ProfilingInfo { // is implicitly guarded by the JIT code cache lock. // TODO: Make the JIT code cache lock global. bool is_method_being_compiled_; + bool is_osr_method_being_compiled_; // When the compiler inlines the method associated to this ProfilingInfo, // it updates this counter so that the GC does not try to clear the inline caches. diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc index 32e95e7459..78e372ad02 100644 --- a/runtime/oat_file_assistant.cc +++ b/runtime/oat_file_assistant.cc @@ -495,7 +495,7 @@ bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) { return true; } - if (file.GetOatHeader().GetImageFileLocationOatChecksum() != image_info->oat_checksum) { + if (file.GetOatHeader().GetImageFileLocationOatChecksum() != GetCombinedImageChecksum()) { VLOG(oat) << "Oat image checksum does not match image checksum."; return true; } @@ -931,8 +931,7 @@ const OatFileAssistant::ImageInfo* OatFileAssistant::GetImageInfo() { cached_image_info_.patch_delta = image_header.GetPatchDelta(); } else { std::unique_ptr<ImageHeader> image_header( - gc::space::ImageSpace::ReadImageHeaderOrDie( - cached_image_info_.location.c_str(), isa_)); + gc::space::ImageSpace::ReadImageHeaderOrDie(cached_image_info_.location.c_str(), isa_)); cached_image_info_.oat_checksum = image_header->GetOatChecksum(); cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>( image_header->GetOatDataBegin()); @@ -940,10 +939,39 @@ const OatFileAssistant::ImageInfo* OatFileAssistant::GetImageInfo() { } } image_info_load_succeeded_ = (!image_spaces.empty()); + + combined_image_checksum_ = CalculateCombinedImageChecksum(isa_); } return image_info_load_succeeded_ ? &cached_image_info_ : nullptr; } +// TODO: Use something better than xor. +uint32_t OatFileAssistant::CalculateCombinedImageChecksum(InstructionSet isa) { + uint32_t checksum = 0; + std::vector<gc::space::ImageSpace*> image_spaces = + Runtime::Current()->GetHeap()->GetBootImageSpaces(); + if (isa == kRuntimeISA) { + for (gc::space::ImageSpace* image_space : image_spaces) { + checksum ^= image_space->GetImageHeader().GetOatChecksum(); + } + } else { + for (gc::space::ImageSpace* image_space : image_spaces) { + std::string location = image_space->GetImageLocation(); + std::unique_ptr<ImageHeader> image_header( + gc::space::ImageSpace::ReadImageHeaderOrDie(location.c_str(), isa)); + checksum ^= image_header->GetOatChecksum(); + } + } + return checksum; +} + +uint32_t OatFileAssistant::GetCombinedImageChecksum() { + if (!image_info_load_attempted_) { + GetImageInfo(); + } + return combined_image_checksum_; +} + gc::space::ImageSpace* OatFileAssistant::OpenImageSpace(const OatFile* oat_file) { DCHECK(oat_file != nullptr); std::string art_file = ArtFileName(oat_file); diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h index 17f72febea..d3228deac7 100644 --- a/runtime/oat_file_assistant.h +++ b/runtime/oat_file_assistant.h @@ -279,6 +279,8 @@ class OatFileAssistant { static bool DexFilenameToOdexFilename(const std::string& location, InstructionSet isa, std::string* odex_filename, std::string* error_msg); + static uint32_t CalculateCombinedImageChecksum(InstructionSet isa = kRuntimeISA); + private: struct ImageInfo { uint32_t oat_checksum = 0; @@ -352,6 +354,8 @@ class OatFileAssistant { // The caller shouldn't clean up or free the returned pointer. const ImageInfo* GetImageInfo(); + uint32_t GetCombinedImageChecksum(); + // To implement Lock(), we lock a dummy file where the oat file would go // (adding ".flock" to the target file name) and retain the lock for the // remaining lifetime of the OatFileAssistant object. @@ -423,6 +427,7 @@ class OatFileAssistant { bool image_info_load_attempted_ = false; bool image_info_load_succeeded_ = false; ImageInfo cached_image_info_; + uint32_t combined_image_checksum_ = 0; // For debugging only. // If this flag is set, the oat or odex file has been released to the user diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc index bddfa4f21a..f50d1cb748 100644 --- a/runtime/oat_file_assistant_test.cc +++ b/runtime/oat_file_assistant_test.cc @@ -239,7 +239,8 @@ class OatFileAssistantTest : public CommonRuntimeTest { ASSERT_TRUE(!image_spaces.empty() && image_spaces[0] != nullptr); const ImageHeader& image_header = image_spaces[0]->GetImageHeader(); const OatHeader& oat_header = odex_file->GetOatHeader(); - EXPECT_EQ(image_header.GetOatChecksum(), oat_header.GetImageFileLocationOatChecksum()); + uint32_t combined_checksum = OatFileAssistant::CalculateCombinedImageChecksum(); + EXPECT_EQ(combined_checksum, oat_header.GetImageFileLocationOatChecksum()); EXPECT_NE(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()), oat_header.GetImageFileLocationOatDataBegin()); EXPECT_NE(image_header.GetPatchDelta(), oat_header.GetImagePatchDelta()); diff --git a/runtime/stack.h b/runtime/stack.h index 51f7d6368b..7301184a9e 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -187,6 +187,22 @@ class ShadowFrame { return (dex_pc_ptr_ == nullptr) ? dex_pc_ : dex_pc_ptr_ - code_item_->insns_; } + int16_t GetCachedHotnessCountdown() const { + return cached_hotness_countdown_; + } + + void SetCachedHotnessCountdown(int16_t cached_hotness_countdown) { + cached_hotness_countdown_ = cached_hotness_countdown; + } + + int16_t GetHotnessCountdown() const { + return hotness_countdown_; + } + + void SetHotnessCountdown(int16_t hotness_countdown) { + hotness_countdown_ = hotness_countdown; + } + void SetDexPC(uint32_t dex_pc) { dex_pc_ = dex_pc; dex_pc_ptr_ = nullptr; @@ -397,6 +413,14 @@ class ShadowFrame { return OFFSETOF_MEMBER(ShadowFrame, code_item_); } + static size_t CachedHotnessCountdownOffset() { + return OFFSETOF_MEMBER(ShadowFrame, cached_hotness_countdown_); + } + + static size_t HotnessCountdownOffset() { + return OFFSETOF_MEMBER(ShadowFrame, hotness_countdown_); + } + // Create ShadowFrame for interpreter using provided memory. static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs, ShadowFrame* link, @@ -406,7 +430,7 @@ class ShadowFrame { return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true); } - uint16_t* GetDexPCPtr() { + const uint16_t* GetDexPCPtr() { return dex_pc_ptr_; } @@ -443,11 +467,13 @@ class ShadowFrame { ShadowFrame* link_; ArtMethod* method_; JValue* result_register_; - uint16_t* dex_pc_ptr_; + const uint16_t* dex_pc_ptr_; const DexFile::CodeItem* code_item_; LockCountData lock_count_data_; // This may contain GC roots when lock counting is active. const uint32_t number_of_vregs_; uint32_t dex_pc_; + int16_t cached_hotness_countdown_; + int16_t hotness_countdown_; // This is a two-part array: // - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4 diff --git a/test/031-class-attributes/expected.txt b/test/031-class-attributes/expected.txt index 72656ae916..de99872b52 100644 --- a/test/031-class-attributes/expected.txt +++ b/test/031-class-attributes/expected.txt @@ -84,7 +84,7 @@ abstract final [LClassAttrs$PackagePrivateInnerInterface; enclosingCon: null enclosingMeth: null modifiers: 1 - package: package otherpackage, Unknown, version 0.0 + package: package otherpackage declaredClasses: [0] member classes: [0] isAnnotation: false diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index d13d990700..28a99de099 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -358,7 +358,7 @@ if [ "$RELOCATE" = "y" ]; then # in 512 byte blocks and set it as the ulimit. This should be more than enough # room. if [ ! `uname` = "Darwin" ]; then # TODO: Darwin doesn't support "du -B..." - ulimit -S $(du -c -B512 ${ANDROID_HOST_OUT}/framework | tail -1 | cut -f1) || exit 1 + ulimit -S $(du -c -B512 ${ANDROID_HOST_OUT}/framework 2>/dev/null | tail -1 | cut -f1) || exit 1 fi fi else @@ -381,14 +381,16 @@ fi dex2oat_cmdline="true" mkdir_cmdline="mkdir -p ${DEX_LOCATION}/dalvik-cache/$ISA" -app_image="--app-image-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.art | cut -d/ -f 2- | sed "s:/:@:g")" +# Pick a base that will force the app image to get relocated. +app_image="--base=0x4000 --app-image-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.art" if [ "$PREBUILD" = "y" ]; then + mkdir_cmdline="${mkdir_cmdline} && mkdir -p ${DEX_LOCATION}/oat/$ISA" dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/dex2oatd \ $COMPILE_FLAGS \ --boot-image=${BOOT_IMAGE} \ --dex-file=$DEX_LOCATION/$TEST_NAME.jar \ - --oat-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g") \ + --oat-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.odex \ ${app_image} \ --instruction-set=$ISA" if [ "x$INSTRUCTION_SET_FEATURES" != "x" ] ; then diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt index 2533ce284e..38b6ea60f0 100644 --- a/tools/libcore_failures.txt +++ b/tools/libcore_failures.txt @@ -267,6 +267,24 @@ "libcore.util.NativeAllocationRegistryTest#testNullArguments"] }, { + description: "libnativehelper_compat_libc++.so not found by dlopen on ARM64", + result: EXEC_FAILED, + modes: [device], + bug: 28082914, + names: ["libcore.java.lang.ThreadTest#testContextClassLoaderIsInherited", + "libcore.java.lang.ThreadTest#testContextClassLoaderIsNotNull", + "libcore.java.lang.ThreadTest#testGetAllStackTracesIncludesAllGroups", + "libcore.java.lang.ThreadTest#testGetStackTrace", + "libcore.java.lang.ThreadTest#testJavaContextClassLoader", + "libcore.java.lang.ThreadTest#testLeakingStartedThreads", + "libcore.java.lang.ThreadTest#testLeakingUnstartedThreads", + "libcore.java.lang.ThreadTest#testNativeThreadNames", + "libcore.java.lang.ThreadTest#testThreadInterrupted", + "libcore.java.lang.ThreadTest#testThreadSleep", + "libcore.java.lang.ThreadTest#testThreadSleepIllegalArguments", + "libcore.java.lang.ThreadTest#testThreadWakeup"] +}, +{ description: "Only work with --mode=activity", result: EXEC_FAILED, names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ] |