diff options
110 files changed, 7008 insertions, 1162 deletions
diff --git a/Android.mk b/Android.mk index fcf70ff2eb..0d0003abb0 100644 --- a/Android.mk +++ b/Android.mk @@ -122,6 +122,16 @@ include $(art_path)/build/Android.gtest.mk include $(art_path)/test/Android.run-test.mk include $(art_path)/benchmark/Android.mk +TEST_ART_ADB_ROOT_AND_REMOUNT := \ + (adb root && \ + adb wait-for-device remount && \ + ((adb shell touch /system/testfile && \ + (adb shell rm /system/testfile || true)) || \ + (adb disable-verity && \ + adb reboot && \ + adb wait-for-device root && \ + adb wait-for-device remount))) + # Sync test files to the target, depends upon all things that must be pushed to the target. .PHONY: test-art-target-sync # Check if we need to sync. In case ART_TEST_ANDROID_ROOT is not empty, @@ -130,12 +140,11 @@ include $(art_path)/benchmark/Android.mk ifneq ($(ART_TEST_NO_SYNC),true) ifeq ($(ART_TEST_ANDROID_ROOT),) test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS) - adb root - adb wait-for-device remount + $(TEST_ART_ADB_ROOT_AND_REMOUNT) adb sync else test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS) - adb root + $(TEST_ART_ADB_ROOT_AND_REMOUNT) adb wait-for-device push $(ANDROID_PRODUCT_OUT)/system $(ART_TEST_ANDROID_ROOT) adb push $(ANDROID_PRODUCT_OUT)/data /data endif @@ -374,8 +383,7 @@ oat-target: $(ART_TARGET_DEPENDENCIES) $(DEFAULT_DEX_PREOPT_INSTALLED_IMAGE) $(O .PHONY: oat-target-sync oat-target-sync: oat-target - adb root - adb wait-for-device remount + $(TEST_ART_ADB_ROOT_AND_REMOUNT) adb sync ######################################################################## diff --git a/compiler/Android.mk b/compiler/Android.mk index 42ddfd83ab..564bd7e7bb 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -81,6 +81,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/load_store_elimination.cc \ optimizing/locations.cc \ optimizing/nodes.cc \ + optimizing/nodes_arm64.cc \ optimizing/optimization.cc \ optimizing/optimizing_compiler.cc \ optimizing/parallel_move_resolver.cc \ @@ -219,7 +220,8 @@ LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips := \ utils/mips/assembler_mips.h LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips64 := \ - $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips) + $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips) \ + utils/mips64/assembler_mips64.h LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86 := LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86_64 := \ diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index 7b0e5af246..1b57b7d1d2 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -128,6 +128,7 @@ class CommonCompilerTest : public CommonRuntimeTest { #define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \ if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) { \ switch (GetInstructionSet()) { \ + case kArm64: \ case kThumb2: \ case kX86: \ case kX86_64: \ diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index e42a73723b..d67087edd9 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1114,25 +1114,23 @@ bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) { } bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) { - if (IsBootImage() && - IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) { - { - ScopedObjectAccess soa(Thread::Current()); - mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache( - soa.Self(), dex_file, false); - mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx); - if (resolved_class == nullptr) { - // Erroneous class. - stats_->TypeNotInDexCache(); - return false; - } - } + bool result = false; + if ((IsBootImage() && + IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) || + Runtime::Current()->UseJit()) { + ScopedObjectAccess soa(Thread::Current()); + mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache( + soa.Self(), dex_file, false); + mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx); + result = (resolved_class != nullptr); + } + + if (result) { stats_->TypeInDexCache(); - return true; } else { stats_->TypeNotInDexCache(); - return false; } + return result; } bool CompilerDriver::CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index dae785b688..d90d6100b9 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -482,6 +482,10 @@ class CompilerDriver { return &compiled_method_storage_; } + // Can we assume that the klass is loaded? + bool CanAssumeClassIsLoaded(mirror::Class* klass) + SHARED_REQUIRES(Locks::mutator_lock_); + private: // Return whether the declaring class of `resolved_member` is // available to `referrer_class` for read or write access using two @@ -516,10 +520,6 @@ class CompilerDriver { bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); - // Can we assume that the klass is loaded? - bool CanAssumeClassIsLoaded(mirror::Class* klass) - SHARED_REQUIRES(Locks::mutator_lock_); - // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics. // The only external contract is that unresolved method has flags 0 and resolved non-0. enum { diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 3d9e7e7cda..341742e4dc 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -330,10 +330,20 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) { } void ImageWriter::PrepareDexCacheArraySlots() { + // Prepare dex cache array starts based on the ordering specified in the CompilerDriver. + uint32_t size = 0u; + for (const DexFile* dex_file : compiler_driver_.GetDexFilesForOatFile()) { + dex_cache_array_starts_.Put(dex_file, size); + DexCacheArraysLayout layout(target_ptr_size_, dex_file); + size += layout.Size(); + } + // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned() + // when AssignImageBinSlot() assigns their indexes out or order. + bin_slot_sizes_[kBinDexCacheArray] = size; + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); Thread* const self = Thread::Current(); ReaderMutexLock mu(self, *class_linker->DexLock()); - uint32_t size = 0u; for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root)); @@ -341,22 +351,18 @@ void ImageWriter::PrepareDexCacheArraySlots() { continue; } const DexFile* dex_file = dex_cache->GetDexFile(); - dex_cache_array_starts_.Put(dex_file, size); DexCacheArraysLayout layout(target_ptr_size_, dex_file); DCHECK(layout.Valid()); + uint32_t start = dex_cache_array_starts_.Get(dex_file); DCHECK_EQ(dex_file->NumTypeIds() != 0u, dex_cache->GetResolvedTypes() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), size + layout.TypesOffset()); + AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), start + layout.TypesOffset()); DCHECK_EQ(dex_file->NumMethodIds() != 0u, dex_cache->GetResolvedMethods() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), size + layout.MethodsOffset()); + AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), start + layout.MethodsOffset()); DCHECK_EQ(dex_file->NumFieldIds() != 0u, dex_cache->GetResolvedFields() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), size + layout.FieldsOffset()); + AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), start + layout.FieldsOffset()); DCHECK_EQ(dex_file->NumStringIds() != 0u, dex_cache->GetStrings() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetStrings(), size + layout.StringsOffset()); - size += layout.Size(); + AddDexCacheArrayRelocation(dex_cache->GetStrings(), start + layout.StringsOffset()); } - // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned() - // when AssignImageBinSlot() assigns their indexes out or order. - bin_slot_sizes_[kBinDexCacheArray] = size; } void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset) { @@ -586,6 +592,17 @@ bool ImageWriter::IsBootClassLoaderNonImageClass(mirror::Class* klass) { } bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) { + bool early_exit = false; + std::unordered_set<mirror::Class*> visited; + return ContainsBootClassLoaderNonImageClassInternal(klass, &early_exit, &visited); +} + +bool ImageWriter::ContainsBootClassLoaderNonImageClassInternal( + mirror::Class* klass, + bool* early_exit, + std::unordered_set<mirror::Class*>* visited) { + DCHECK(early_exit != nullptr); + DCHECK(visited != nullptr); if (klass == nullptr) { return false; } @@ -594,14 +611,22 @@ bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) { // Already computed, return the found value. return found->second; } - // Place holder value to prevent infinite recursion. - prune_class_memo_.emplace(klass, false); + // Circular dependencies, return false but do not store the result in the memoization table. + if (visited->find(klass) != visited->end()) { + *early_exit = true; + return false; + } + visited->emplace(klass); bool result = IsBootClassLoaderNonImageClass(klass); + bool my_early_exit = false; // Only for ourselves, ignore caller. if (!result) { // Check interfaces since these wont be visited through VisitReferences.) mirror::IfTable* if_table = klass->GetIfTable(); for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) { - result = result || ContainsBootClassLoaderNonImageClass(if_table->GetInterface(i)); + result = result || ContainsBootClassLoaderNonImageClassInternal( + if_table->GetInterface(i), + &my_early_exit, + visited); } } // Check static fields and their classes. @@ -615,16 +640,38 @@ bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) { mirror::Object* ref = klass->GetFieldObject<mirror::Object>(field_offset); if (ref != nullptr) { if (ref->IsClass()) { - result = result || ContainsBootClassLoaderNonImageClass(ref->AsClass()); + result = result || + ContainsBootClassLoaderNonImageClassInternal( + ref->AsClass(), + &my_early_exit, + visited); } - result = result || ContainsBootClassLoaderNonImageClass(ref->GetClass()); + result = result || + ContainsBootClassLoaderNonImageClassInternal( + ref->GetClass(), + &my_early_exit, + visited); } field_offset = MemberOffset(field_offset.Uint32Value() + sizeof(mirror::HeapReference<mirror::Object>)); } } - result = result || ContainsBootClassLoaderNonImageClass(klass->GetSuperClass()); - prune_class_memo_[klass] = result; + result = result || + ContainsBootClassLoaderNonImageClassInternal( + klass->GetSuperClass(), + &my_early_exit, + visited); + // Erase the element we stored earlier since we are exiting the function. + auto it = visited->find(klass); + DCHECK(it != visited->end()); + visited->erase(it); + // Only store result if it is true or none of the calls early exited due to circular + // dependencies. If visited is empty then we are the root caller, in this case the cycle was in + // a child call and we can remember the result. + if (result == true || !my_early_exit || visited->empty()) { + prune_class_memo_[klass] = result; + } + *early_exit |= my_early_exit; return result; } diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 22cb91a56d..889cd10dc4 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -343,6 +343,12 @@ class ImageWriter FINAL { bool ContainsBootClassLoaderNonImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); + // early_exit is true if we had a cyclic dependency anywhere down the chain. + bool ContainsBootClassLoaderNonImageClassInternal(mirror::Class* klass, + bool* early_exit, + std::unordered_set<mirror::Class*>* visited) + SHARED_REQUIRES(Locks::mutator_lock_); + static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type); uintptr_t NativeOffsetInImage(void* obj); diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index f985745e7a..f0cafc847f 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -61,40 +61,6 @@ static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne(); } -// Returns an instruction with the opposite boolean value from 'cond'. -static HInstruction* GetOppositeCondition(HInstruction* cond) { - HGraph* graph = cond->GetBlock()->GetGraph(); - ArenaAllocator* allocator = graph->GetArena(); - - if (cond->IsCondition()) { - HInstruction* lhs = cond->InputAt(0); - HInstruction* rhs = cond->InputAt(1); - switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite* - case kCondEQ: return new (allocator) HEqual(lhs, rhs); - case kCondNE: return new (allocator) HNotEqual(lhs, rhs); - case kCondLT: return new (allocator) HLessThan(lhs, rhs); - case kCondLE: return new (allocator) HLessThanOrEqual(lhs, rhs); - case kCondGT: return new (allocator) HGreaterThan(lhs, rhs); - case kCondGE: return new (allocator) HGreaterThanOrEqual(lhs, rhs); - case kCondB: return new (allocator) HBelow(lhs, rhs); - case kCondBE: return new (allocator) HBelowOrEqual(lhs, rhs); - case kCondA: return new (allocator) HAbove(lhs, rhs); - case kCondAE: return new (allocator) HAboveOrEqual(lhs, rhs); - } - } else if (cond->IsIntConstant()) { - HIntConstant* int_const = cond->AsIntConstant(); - if (int_const->IsZero()) { - return graph->GetIntConstant(1); - } else { - DCHECK(int_const->IsOne()); - return graph->GetIntConstant(0); - } - } - // General case when 'cond' is another instruction of type boolean, - // as verified by SSAChecker. - return new (allocator) HBooleanNot(cond); -} - void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { DCHECK(block->EndsWithIf()); @@ -126,10 +92,7 @@ void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { HInstruction* replacement; if (NegatesCondition(true_value, false_value)) { - replacement = GetOppositeCondition(if_condition); - if (replacement->GetBlock() == nullptr) { - block->InsertInstructionBefore(replacement, if_instruction); - } + replacement = graph_->InsertOppositeCondition(if_condition, if_instruction); } else if (PreservesCondition(true_value, false_value)) { replacement = if_condition; } else { diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 3257de1858..d7754e8ea9 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -876,12 +876,96 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, clinit_check); } +bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) { + bool finalizable; + bool can_throw = NeedsAccessCheck(type_index, &finalizable); + + // Only the non-resolved entrypoint handles the finalizable class case. If we + // need access checks, then we haven't resolved the method and the class may + // again be finalizable. + QuickEntrypointEnum entrypoint = (finalizable || can_throw) + ? kQuickAllocObject + : kQuickAllocObjectInitialized; + + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<3> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache(hs.NewHandle( + dex_compilation_unit_->GetClassLinker()->FindDexCache( + soa.Self(), *dex_compilation_unit_->GetDexFile()))); + Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index))); + const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile(); + Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( + outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file))); + + if (outer_dex_cache.Get() != dex_cache.Get()) { + // We currently do not support inlining allocations across dex files. + return false; + } + + HLoadClass* load_class = new (arena_) HLoadClass( + graph_->GetCurrentMethod(), + type_index, + outer_dex_file, + IsOutermostCompilingClass(type_index), + dex_pc, + /*needs_access_check*/ can_throw, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index)); + + current_block_->AddInstruction(load_class); + HInstruction* cls = load_class; + if (!IsInitialized(resolved_class)) { + cls = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(cls); + } + + current_block_->AddInstruction(new (arena_) HNewInstance( + cls, + graph_->GetCurrentMethod(), + dex_pc, + type_index, + *dex_compilation_unit_->GetDexFile(), + can_throw, + finalizable, + entrypoint)); + return true; +} + +static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class) + SHARED_REQUIRES(Locks::mutator_lock_) { + return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class); +} + +bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const { + if (cls.Get() == nullptr) { + return false; + } + + // `CanAssumeClassIsLoaded` will return true if we're JITting, or will + // check whether the class is in an image for the AOT compilation. + if (cls->IsInitialized() && + compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) { + return true; + } + + if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) { + return true; + } + + // TODO: We should walk over the inlined methods, but we don't pass + // that information to the builder. + if (IsSubClass(GetCompilingClass(), cls.Get())) { + return true; + } + + return false; +} + HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( uint32_t dex_pc, uint32_t method_idx, HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<4> hs(soa.Self()); + StackHandleScope<5> hs(soa.Self()); Handle<mirror::DexCache> dex_cache(hs.NewHandle( dex_compilation_unit_->GetClassLinker()->FindDexCache( soa.Self(), *dex_compilation_unit_->GetDexFile()))); @@ -896,6 +980,7 @@ HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file))); Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass())); + Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass())); // The index at which the method's class is stored in the DexCache's type array. uint32_t storage_index = DexFile::kDexNoIndex; @@ -913,41 +998,21 @@ HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( HClinitCheck* clinit_check = nullptr; - if (!outer_class->IsInterface() - && outer_class->IsSubClass(resolved_method->GetDeclaringClass())) { - // If the outer class is the declaring class or a subclass - // of the declaring class, no class initialization is needed - // before the static method call. - // Note that in case of inlining, we do not need to add clinit checks - // to calls that satisfy this subclass check with any inlined methods. This - // will be detected by the optimization passes. + if (IsInitialized(resolved_method_class)) { *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; } else if (storage_index != DexFile::kDexNoIndex) { - // If the method's class type index is available, check - // whether we should add an explicit class initialization - // check for its declaring class before the static method call. - - // TODO: find out why this check is needed. - bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( - *outer_compilation_unit_->GetDexFile(), storage_index); - bool is_initialized = - resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; - - if (is_initialized) { - *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; - } else { - *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; - HLoadClass* load_class = new (arena_) HLoadClass( - graph_->GetCurrentMethod(), - storage_index, - *dex_compilation_unit_->GetDexFile(), - is_outer_class, - dex_pc, - /*needs_access_check*/ false); - current_block_->AddInstruction(load_class); - clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); - current_block_->AddInstruction(clinit_check); - } + *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; + HLoadClass* load_class = new (arena_) HLoadClass( + graph_->GetCurrentMethod(), + storage_index, + outer_dex_file, + is_outer_class, + dex_pc, + /*needs_access_check*/ false, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index)); + current_block_->AddInstruction(load_class); + clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(clinit_check); } return clinit_check; } @@ -1272,7 +1337,7 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint16_t field_index = instruction.VRegB_21c(); ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<4> hs(soa.Self()); + StackHandleScope<5> hs(soa.Self()); Handle<mirror::DexCache> dex_cache(hs.NewHandle( dex_compilation_unit_->GetClassLinker()->FindDexCache( soa.Self(), *dex_compilation_unit_->GetDexFile()))); @@ -1318,26 +1383,26 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, } } - // TODO: find out why this check is needed. - bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( - *outer_compilation_unit_->GetDexFile(), storage_index); - bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; - + bool is_in_cache = + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index); HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(), storage_index, - *dex_compilation_unit_->GetDexFile(), + outer_dex_file, is_outer_class, dex_pc, - /*needs_access_check*/ false); + /*needs_access_check*/ false, + is_in_cache); current_block_->AddInstruction(constant); HInstruction* cls = constant; - if (!is_initialized && !is_outer_class) { + + Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass())); + if (!IsInitialized(klass)) { cls = new (arena_) HClinitCheck(constant, dex_pc); current_block_->AddInstruction(cls); } - uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex(); + uint16_t class_def_index = klass->GetDexClassDefIndex(); if (is_put) { // We need to keep the class alive before loading the value. Temporaries temps(graph_); @@ -1601,19 +1666,20 @@ void HGraphBuilder::BuildTypeCheck(const Instruction& instruction, ScopedObjectAccess soa(Thread::Current()); StackHandleScope<2> hs(soa.Self()); + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); Handle<mirror::DexCache> dex_cache(hs.NewHandle( - dex_compilation_unit_->GetClassLinker()->FindDexCache( - soa.Self(), *dex_compilation_unit_->GetDexFile()))); + dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file))); Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index))); HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc); HLoadClass* cls = new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, - *dex_compilation_unit_->GetDexFile(), + dex_file, IsOutermostCompilingClass(type_index), dex_pc, - !can_access); + !can_access, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index)); current_block_->AddInstruction(cls); // The class needs a temporary before being used by the type check. @@ -2509,20 +2575,9 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(fake_string); UpdateLocal(register_index, fake_string, dex_pc); } else { - bool finalizable; - bool can_throw = NeedsAccessCheck(type_index, &finalizable); - QuickEntrypointEnum entrypoint = can_throw - ? kQuickAllocObjectWithAccessCheck - : kQuickAllocObject; - - current_block_->AddInstruction(new (arena_) HNewInstance( - graph_->GetCurrentMethod(), - dex_pc, - type_index, - *dex_compilation_unit_->GetDexFile(), - can_throw, - finalizable, - entrypoint)); + if (!BuildNewInstance(type_index, dex_pc)) { + return false; + } UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc); } break; @@ -2750,10 +2805,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, - *dex_compilation_unit_->GetDexFile(), + *dex_file_, IsOutermostCompilingClass(type_index), dex_pc, - !can_access)); + !can_access, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index))); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc); break; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index f857ef0e12..5ada93f684 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -308,6 +308,13 @@ class HGraphBuilder : public ValueObject { uint32_t method_idx, HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement); + // Build a HNewInstance instruction. + bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc); + + // Return whether the compiler can assume `cls` is initialized. + bool IsInitialized(Handle<mirror::Class> cls) const + SHARED_REQUIRES(Locks::mutator_lock_); + ArenaAllocator* const arena_; // A list of the size of the dex code holding block information for diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index cb6bed08ec..a98d9c68b7 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -77,6 +77,7 @@ class NullCheckSlowPathARM : public SlowPathCode { } arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -101,6 +102,7 @@ class DivZeroCheckSlowPathARM : public SlowPathCode { } arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -123,6 +125,7 @@ class SuspendCheckSlowPathARM : public SlowPathCode { SaveLiveRegisters(codegen, instruction_->GetLocations()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ b(GetReturnLabel()); @@ -179,6 +182,7 @@ class BoundsCheckSlowPathARM : public SlowPathCode { Primitive::kPrimInt); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -214,6 +218,11 @@ class LoadClassSlowPathARM : public SlowPathCode { ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -260,6 +269,7 @@ class LoadStringSlowPathARM : public SlowPathCode { __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); RestoreLiveRegisters(codegen, locations); @@ -351,6 +361,7 @@ class DeoptimizationSlowPathARM : public SlowPathCode { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; } @@ -393,6 +404,7 @@ class ArraySetSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -2410,6 +2422,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: @@ -2418,6 +2431,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; default: @@ -2463,6 +2477,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); break; case Primitive::kPrimDouble: @@ -2985,6 +3000,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R0, out.AsRegister<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; } @@ -2999,6 +3015,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); break; } @@ -3127,22 +3144,26 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { DCHECK_EQ(R1, out.AsRegister<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; } case Primitive::kPrimLong: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); break; } case Primitive::kPrimFloat: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } @@ -3361,7 +3382,19 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { __ mov(o_l, ShifterOperand(high)); __ LoadImmediate(o_h, 0); } - } else { // shift_value < 32 + } else if (shift_value == 1) { + if (op->IsShl()) { + __ Lsls(o_l, low, 1); + __ adc(o_h, high, ShifterOperand(high)); + } else if (op->IsShr()) { + __ Asrs(o_h, high, 1); + __ Rrx(o_l, low); + } else { + __ Lsrs(o_h, high, 1); + __ Rrx(o_l, low); + } + } else { + DCHECK(2 <= shift_value && shift_value < 32) << shift_value; if (op->IsShl()) { __ Lsl(o_h, high, shift_value); __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value)); @@ -3413,20 +3446,19 @@ void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(R0)); } void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) { @@ -3448,6 +3480,7 @@ void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) { @@ -4320,7 +4353,7 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { if (needs_write_barrier) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for read barrier too. + locations->AddTemp(Location::RequiresRegister()); } } @@ -4937,6 +4970,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -4958,7 +4992,6 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); } } else { - DCHECK(cls->CanCallRuntime()); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ LoadFromOffset(kLoadWord, @@ -4977,14 +5010,19 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadWord, out, out, cache_offset); } - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -5097,6 +5135,7 @@ void LocationsBuilderARM::VisitThrow(HThrow* instruction) { void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) { codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { @@ -5537,6 +5576,11 @@ void InstructionCodeGeneratorARM::VisitMonitorOperation(HMonitorOperation* instr instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 2776b7d6c9..ac16268834 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -42,6 +42,9 @@ using namespace vixl; // NOLINT(build/namespaces) namespace art { +template<class MirrorType> +class GcRoot; + namespace arm64 { using helpers::CPURegisterFrom; @@ -431,15 +434,6 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = InputRegisterAt(instruction_, 0); - Register temp = WRegisterFrom(locations->GetTemp(0)); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ Ldr(temp, HeapOperand(obj, class_offset)); - arm64_codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -454,11 +448,11 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { if (instruction_->IsInstanceOf()) { arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, + const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, - const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); @@ -494,6 +488,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } @@ -571,6 +566,271 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ Ldr(out, HeapOperand(out, class_offset); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + + // Note: In the case of a HArrayGet instruction, when the base + // address is a HArm64IntermediateAddress instruction, it does not + // point to the array object itself, but to an offset within this + // object. However, the read barrier entry point needs the array + // object address to be passed as first argument. So we + // temporarily set back `obj_` to that address, and restore its + // initial value later. + if (instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { + if (kIsDebugBuild) { + HArm64IntermediateAddress* intermediate_address = + instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); + uint32_t intermediate_address_offset = + intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); + DCHECK_EQ(intermediate_address_offset, offset_); + DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); + } + Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); + __ Sub(obj_reg, obj_reg, offset_); + } + + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = RegisterFrom(index_, Primitive::kPrimInt); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); + if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { + // We are about to change the value of `index_reg` (see the + // calls to vixl::MacroAssembler::Lsl and + // vixl::MacroAssembler::Mov below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ Mov(free_reg.W(), index_reg); + index_reg = free_reg; + index = LocationFrom(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type)); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ Add(index_reg, index_reg, Operand(offset_)); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + LocationFrom(calling_convention.GetRegisterAt(0)), + type, + nullptr); + parallel_move.AddMove(obj_, + LocationFrom(calling_convention.GetRegisterAt(1)), + type, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + LocationFrom(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); + } + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + + // Restore the value of `obj_` when it corresponds to a + // HArm64IntermediateAddress instruction. + if (instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { + if (kIsDebugBuild) { + HArm64IntermediateAddress* intermediate_address = + instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); + uint32_t intermediate_address_offset = + intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); + DCHECK_EQ(intermediate_address_offset, offset_); + DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); + } + Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); + __ Add(obj_reg, obj_reg, offset_); + } + + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(XRegisterFrom(ref_).code()); + size_t obj = static_cast<int>(XRegisterFrom(obj_).code()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return Register(VIXLRegCodeFromART(i), kXRegSize); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on ARM64 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + // The argument of the ReadBarrierForRootSlow is not a managed + // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`; + // thus we need a 64-bit move here, and we cannot use + // + // arm64_codegen->MoveLocation( + // LocationFrom(calling_convention.GetRegisterAt(0)), + // root_, + // type); + // + // which would emit a 32-bit move, as `type` is a (32-bit wide) + // reference type (`Primitive::kPrimNot`). + __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); +}; + #undef __ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { @@ -1401,13 +1661,25 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { } void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps for an object field get when read barriers + // are enabled: we do not want the load to overwrite the object's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } @@ -1436,7 +1708,11 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W()); + LocationSummary* locations = instruction->GetLocations(); + Location base = locations->InAt(0); + Location out = locations->Out(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset); } } @@ -1613,6 +1889,82 @@ void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { HandleBinaryOp(instruction); } +void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp( + HArm64DataProcWithShifterOp* instruction) { + DCHECK(instruction->GetType() == Primitive::kPrimInt || + instruction->GetType() == Primitive::kPrimLong); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + if (instruction->GetInstrKind() == HInstruction::kNeg) { + locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( + HArm64DataProcWithShifterOp* instruction) { + Primitive::Type type = instruction->GetType(); + HInstruction::InstructionKind kind = instruction->GetInstrKind(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + Register out = OutputRegister(instruction); + Register left; + if (kind != HInstruction::kNeg) { + left = InputRegisterAt(instruction, 0); + } + // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the + // shifter operand operation, the IR generating `right_reg` (input to the type + // conversion) can have a different type from the current instruction's type, + // so we manually indicate the type. + Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type); + int64_t shift_amount = (type == Primitive::kPrimInt) + ? static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxIntShiftValue) + : static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxLongShiftValue); + + Operand right_operand(0); + + HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); + if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) { + right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind)); + } else { + right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount); + } + + // Logical binary operations do not support extension operations in the + // operand. Note that VIXL would still manage if it was passed by generating + // the extension as a separate instruction. + // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`. + DCHECK(!right_operand.IsExtendedRegister() || + (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor && + kind != HInstruction::kNeg)); + switch (kind) { + case HInstruction::kAdd: + __ Add(out, left, right_operand); + break; + case HInstruction::kAnd: + __ And(out, left, right_operand); + break; + case HInstruction::kNeg: + DCHECK(instruction->InputAt(0)->AsConstant()->IsZero()); + __ Neg(out, right_operand); + break; + case HInstruction::kOr: + __ Orr(out, left, right_operand); + break; + case HInstruction::kSub: + __ Sub(out, left, right_operand); + break; + case HInstruction::kXor: + __ Eor(out, left, right_operand); + break; + default: + LOG(FATAL) << "Unexpected operation kind: " << kind; + UNREACHABLE(); + } +} + void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -1628,23 +1980,75 @@ void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( Operand(InputOperandAt(instruction, 1))); } +void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); + locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex, + Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + Register res = OutputRegister(instr); + Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex); + Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex); + Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex); + + // Avoid emitting code that could trigger Cortex A53's erratum 835769. + // This fixup should be carried out for all multiply-accumulate instructions: + // madd, msub, smaddl, smsubl, umaddl and umsubl. + if (instr->GetType() == Primitive::kPrimLong && + codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { + MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); + vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize; + if (prev->IsLoadOrStore()) { + // Make sure we emit only exactly one nop. + vixl::CodeBufferCheckScope scope(masm, + vixl::kInstructionSize, + vixl::CodeBufferCheckScope::kCheck, + vixl::CodeBufferCheckScope::kExactSize); + __ nop(); + } + } + + if (instr->GetOpKind() == HInstruction::kAdd) { + __ Madd(res, mul_left, mul_right, accumulator); + } else { + DCHECK(instr->GetOpKind() == HInstruction::kSub); + __ Msub(res, mul_left, mul_right, accumulator); + } +} + void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { Primitive::Type type = instruction->GetType(); Register obj = InputRegisterAt(instruction, 0); - Location index = instruction->GetLocations()->InAt(1); - size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); + LocationSummary* locations = instruction->GetLocations(); + Location index = locations->InAt(1); + uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); MemOperand source = HeapOperand(obj); CPURegister dest = OutputCPURegister(instruction); @@ -1676,8 +2080,22 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { codegen_->Load(type, dest, source); codegen_->MaybeRecordImplicitNullCheck(instruction); - if (instruction->GetType() == Primitive::kPrimNot) { - GetAssembler()->MaybeUnpoisonHeapReference(dest.W()); + if (type == Primitive::kPrimNot) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + Location obj_loc = locations->InAt(0); + Location out = locations->Out(); + if (index.IsConstant()) { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + // Note: when `obj_loc` is a HArm64IntermediateAddress, it does + // not contain the base address of the array object, which is + // needed by the read barrier entry point. So the read barrier + // slow path will temporarily set back `obj_loc` to the right + // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode). + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index); + } } } @@ -1695,12 +2113,19 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { + Primitive::Type value_type = instruction->GetComponentType(); + + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + if (Primitive::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { locations->SetInAt(2, Location::RequiresRegister()); @@ -1710,7 +2135,7 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); LocationSummary* locations = instruction->GetLocations(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -1724,7 +2149,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { BlockPoolsScope block_pools(masm); if (!needs_write_barrier) { - DCHECK(!may_need_runtime_call); + DCHECK(!may_need_runtime_call_for_type_check); if (index.IsConstant()) { offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); destination = HeapOperand(array, offset); @@ -1774,7 +2199,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -1789,26 +2214,66 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { __ Bind(&non_zero); } - Register temp2 = temps.AcquireSameSizeAs(array); - __ Ldr(temp, HeapOperand(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Ldr(temp, HeapOperand(temp, component_offset)); - __ Ldr(temp2, HeapOperand(Register(value), class_offset)); - // No need to poison/unpoison, we're comparing two poisoned references. - __ Cmp(temp, temp2); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - vixl::Label do_put; - __ B(eq, &do_put); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Ldr(temp, HeapOperand(temp, super_offset)); - // No need to unpoison, we're comparing against null. - __ Cbnz(temp, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ Mov(temp2, temp); + // // /* HeapReference<Class> */ temp = temp->component_type_ + // __ Ldr(temp, HeapOperand(temp, component_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp_loc, temp_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = value->klass_ + // __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc); + // + // __ Cmp(temp, temp2); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ B(slow_path->GetEntryLabel()); } else { - __ B(ne, slow_path->GetEntryLabel()); + Register temp2 = temps.AcquireSameSizeAs(array); + // /* HeapReference<Class> */ temp = array->klass_ + __ Ldr(temp, HeapOperand(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ Ldr(temp, HeapOperand(temp, component_offset)); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor `temp2`, as we are comparing two poisoned references. + __ Cmp(temp, temp2); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + vixl::Label do_put; + __ B(eq, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->super_class_ + __ Ldr(temp, HeapOperand(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ B(ne, slow_path->GetEntryLabel()); + } + temps.Release(temp2); } - temps.Release(temp2); } if (kPoisonHeapReferences) { @@ -1824,7 +2289,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } __ Str(source, destination); - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -2491,40 +2956,44 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // The out register is used as a temporary, so it overlaps with the inputs. - // Note that TypeCheckSlowPathARM64 uses this register too. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // The "out" register is used as a temporary, so it overlaps with the inputs. + // Note that TypeCheckSlowPathARM64 uses this register too. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); + Location out_loc = locations->Out(); Register out = OutputRegister(instruction); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); @@ -2540,15 +3009,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cbz(obj, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ Ldr(target, HeapOperand(obj.W(), class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ Ldr(out, HeapOperand(obj.W(), class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -2559,13 +3022,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. vixl::Label loop, success; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ Ldr(out, HeapOperand(out, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Cmp(out, cls); @@ -2576,14 +3049,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. vixl::Label loop, success; __ Bind(&loop); __ Cmp(out, cls); __ B(eq, &success); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ Ldr(out, HeapOperand(out, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ Cbnz(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -2594,14 +3077,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. vixl::Label exact_check; __ Cmp(out, cls); __ B(eq, &exact_check); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ Ldr(out, HeapOperand(out, component_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Ldrh(out, HeapOperand(out, primitive_offset)); @@ -2612,11 +3105,12 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ B(&done); break; } + case TypeCheckKind::kArrayCheck: { __ Cmp(out, cls); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -2625,13 +3119,25 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved and interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ B(&done); } @@ -2657,58 +3163,62 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Note that TypeCheckSlowPathARM64 uses this register too. - locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Note that TypeCheckSlowPathARM64 uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); - Register temp; - if (!locations->WillCall()) { - temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0)); - } - + Location temp_loc = locations->GetTemp(0); + Register temp = WRegisterFrom(temp_loc); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCodeARM64* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCodeARM64* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); vixl::Label done; // Avoid null check if we know obj is not null. @@ -2716,76 +3226,159 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cbz(obj, &done); } - if (locations->WillCall()) { - __ Ldr(obj, HeapOperand(obj, class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(obj); - } else { - __ Ldr(temp, HeapOperand(obj, class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { __ Cmp(temp, cls); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ B(ne, slow_path->GetEntryLabel()); + __ B(ne, type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - vixl::Label loop; + vixl::Label loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ Ldr(temp, HeapOperand(temp, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - // Jump to the slow path to throw the exception. - __ Cbz(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. + __ Cbnz(temp, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); __ Cmp(temp, cls); __ B(ne, &loop); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. vixl::Label loop; __ Bind(&loop); __ Cmp(temp, cls); __ B(eq, &done); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ Ldr(temp, HeapOperand(temp, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ Cbnz(temp, &loop); - // Jump to the slow path to throw the exception. - __ B(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + vixl::Label check_non_primitive_component_type; __ Cmp(temp, cls); __ B(eq, &done); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ Ldr(temp, HeapOperand(temp, component_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Cbz(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. + __ Cbnz(temp, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ Ldrh(temp, HeapOperand(temp, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Cbz(temp, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved + // and interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ B(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { @@ -2828,10 +3421,11 @@ void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0)); + LocationSummary* locations = invoke->GetLocations(); + Register temp = XRegisterFrom(locations->GetTemp(0)); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value(); - Location receiver = invoke->GetLocations()->InAt(0); + Location receiver = locations->InAt(0); Offset class_offset = mirror::Object::ClassOffset(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); @@ -2843,14 +3437,22 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok scratch_scope.Exclude(ip1); __ Mov(ip1, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ Ldr(temp.W(), StackOperandFrom(receiver)); + // /* HeapReference<Class> */ temp = temp->klass_ __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); // temp = temp->GetImtEntryAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -2972,7 +3574,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset)); } - // temp = current_method->dex_cache_resolved_methods_; + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ Ldr(reg.X(), MemOperand(method_reg.X(), ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value())); @@ -3027,8 +3629,16 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te BlockPoolsScope block_pools(GetVIXLAssembler()); DCHECK(receiver.IsRegister()); + // /* HeapReference<Class> */ temp = receiver->klass_ __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); // temp = temp->GetMethodAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -3141,7 +3751,8 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, LocationFrom(calling_convention.GetRegisterAt(0)), - LocationFrom(vixl::x0)); + LocationFrom(vixl::x0), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { @@ -3151,30 +3762,56 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } + Location out_loc = cls->GetLocations()->Out(); Register out = OutputRegister(cls); Register current_method = InputRegisterAt(cls, 0); if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ Add(out.X(), current_method.X(), declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ Ldr(out, MemOperand(current_method, declaring_class_offset)); + } } else { - DCHECK(cls->CanCallRuntime()); MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - // TODO: We will need a read barrier here. - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Cbz(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ Add(out.X(), out.X(), cache_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); } else { - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ Ldr(out, MemOperand(out.X(), cache_offset)); + } + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Cbz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3220,12 +3857,35 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); + Location out_loc = load->GetLocations()->Out(); Register out = OutputRegister(load); Register current_method = InputRegisterAt(load, 0); - __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - // TODO: We will need a read barrier here. + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ Add(out.X(), current_method.X(), declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ Ldr(out, MemOperand(current_method, declaring_class_offset)); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ + __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ Add(out.X(), out.X(), cache_offset); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ Ldr(out, MemOperand(out.X(), cache_offset)); + } + __ Cbz(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -3260,7 +3920,11 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderARM64::VisitMul(HMul* mul) { @@ -3349,8 +4013,6 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { locations->SetOut(LocationFrom(x0)); locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); - CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, - void*, uint32_t, int32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { @@ -3372,17 +4034,12 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); - DCHECK(type_index.Is(w0)); - __ Mov(type_index, instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), @@ -3559,6 +4216,11 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) : QUICK_ENTRY_POINT(pFmod); codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr); + if (type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } break; } @@ -3803,9 +4465,7 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers int min_size = std::min(result_size, input_size); Register output = OutputRegister(conversion); Register source = InputRegisterAt(conversion, 0); - if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) { - __ Ubfx(output, source, 0, result_size * kBitsPerByte); - } else if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { // 'int' values are used directly as W registers, discarding the top // bits, so we don't need to sign-extend and can just perform a move. // We do not pass the `kDiscardForSameWReg` argument to force clearing the @@ -3814,9 +4474,11 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers // 32bit input value as a 64bit value assuming that the top 32 bits are // zero. __ Mov(output.W(), source.W()); - } else if ((result_type == Primitive::kPrimChar) || - ((input_type == Primitive::kPrimChar) && (result_size > input_size))) { - __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); + } else if (result_type == Primitive::kPrimChar || + (input_type == Primitive::kPrimChar && input_size < result_size)) { + __ Ubfx(output, + output.IsX() ? source.X() : source.W(), + 0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte); } else { __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); } @@ -3951,6 +4613,82 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst } } +void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); + } +} + +void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 881afcc123..7950f078ad 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -424,6 +424,51 @@ class CodeGeneratorARM64 : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 801e203de5..9dc9167824 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -415,13 +415,11 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { dex_pc, this, IsDirectEntrypoint(kQuickInstanceofNonTrivial)); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, - uint32_t, - const mirror::Class*, - const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), @@ -461,6 +459,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { dex_pc, this, IsDirectEntrypoint(kQuickDeoptimize)); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } @@ -2638,6 +2637,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, Register obj = locations->InAt(0).AsRegister<Register>(); LoadOperandType load_type = kLoadUnsignedByte; bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (type) { case Primitive::kPrimBoolean: @@ -2668,8 +2668,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (is_volatile && load_type == kLoadDoubleword) { InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), - obj, field_info.GetFieldOffset().Uint32Value()); + __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2692,21 +2691,34 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->Out().IsRegisterPair()); dst = locations->Out().AsRegisterPairLow<Register>(); + Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); + if (obj == dst) { + __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ LoadFromOffset(kLoadWord, dst, obj, offset); + } else { + __ LoadFromOffset(kLoadWord, dst, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); + } } else { DCHECK(locations->Out().IsRegister()); dst = locations->Out().AsRegister<Register>(); + __ LoadFromOffset(load_type, dst, obj, offset); } - __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value()); } else { DCHECK(locations->Out().IsFpuRegister()); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ LoadSFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadSFromOffset(dst, obj, offset); } else { - __ LoadDFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadDFromOffset(dst, obj, offset); } } - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Longs are handled earlier. + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } if (is_volatile) { @@ -2752,6 +2764,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, Register obj = locations->InAt(0).AsRegister<Register>(); StoreOperandType store_type = kStoreByte; bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (type) { case Primitive::kPrimBoolean: @@ -2782,8 +2795,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (is_volatile && store_type == kStoreDoubleword) { InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), - obj, field_info.GetFieldOffset().Uint32Value()); + __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check. __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2806,21 +2818,28 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->InAt(1).IsRegisterPair()); src = locations->InAt(1).AsRegisterPairLow<Register>(); + Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>(); + __ StoreToOffset(kStoreWord, src, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize); } else { DCHECK(locations->InAt(1).IsRegister()); src = locations->InAt(1).AsRegister<Register>(); + __ StoreToOffset(store_type, src, obj, offset); } - __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value()); } else { DCHECK(locations->InAt(1).IsFpuRegister()); FRegister src = locations->InAt(1).AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ StoreSToOffset(src, obj, field_info.GetFieldOffset().Uint32Value()); + __ StoreSToOffset(src, obj, offset); } else { - __ StoreDToOffset(src, obj, field_info.GetFieldOffset().Uint32Value()); + __ StoreDToOffset(src, obj, offset); } } - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Longs are handled earlier. + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } // TODO: memory barriers? @@ -3170,6 +3189,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { cls->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess)); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -3181,21 +3201,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value()); __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Beqz(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( + cls, + cls, + cls->GetDexPc(), + cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Beqz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3478,17 +3503,12 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - Register current_method_register = calling_convention.GetRegisterAt(1); - __ Lw(current_method_register, SP, kCurrentMethodStackOffset); - // Move an uint16_t value to a register. - __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime( GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, @@ -3705,7 +3725,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { instruction, instruction->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickFmodf)); - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { @@ -3713,7 +3733,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { instruction, instruction->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickFmod)); - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } default: diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 7b33075358..934f24bfb0 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -27,8 +27,8 @@ #include "mirror/class-inl.h" #include "offsets.h" #include "thread.h" -#include "utils/mips64/assembler_mips64.h" #include "utils/assembler.h" +#include "utils/mips64/assembler_mips64.h" #include "utils/stack_checks.h" namespace art { @@ -210,7 +210,7 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; } @@ -257,7 +257,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { type); RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; } @@ -312,13 +312,13 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { - __ B(GetReturnLabel()); + __ Bc(GetReturnLabel()); } else { - __ B(mips64_codegen->GetLabelOf(successor_)); + __ Bc(mips64_codegen->GetLabelOf(successor_)); } } - Label* GetReturnLabel() { + Mips64Label* GetReturnLabel() { DCHECK(successor_ == nullptr); return &return_label_; } @@ -331,7 +331,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { HBasicBlock* const successor_; // If `successor_` is null, the label to branch to after the suspend check. - Label return_label_; + Mips64Label return_label_; DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64); }; @@ -366,13 +366,11 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { instruction_, dex_pc, this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, - uint32_t, - const mirror::Class*, - const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); @@ -380,7 +378,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; } @@ -404,6 +402,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } @@ -441,6 +440,32 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value() void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { + // Ensure that we fix up branches. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset; + uint32_t new_position = __ GetAdjustedPosition(old_position); + DCHECK_GE(new_position, old_position); + stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + } + + // Adjust pc offsets for the disassembly information. + if (disasm_info_ != nullptr) { + GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval(); + frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start); + frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end); + for (auto& it : *disasm_info_->GetInstructionIntervals()) { + it.second.start = __ GetAdjustedPosition(it.second.start); + it.second.end = __ GetAdjustedPosition(it.second.end); + } + for (auto& it : *disasm_info_->GetSlowPathIntervals()) { + it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start); + it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end); + } + } + CodeGenerator::Finalize(allocator); } @@ -603,6 +628,7 @@ void CodeGeneratorMIPS64::GenerateFrameExit() { } __ Jr(RA); + __ Nop(); __ cfi().RestoreState(); __ cfi().DefCFAOffset(GetFrameSize()); @@ -939,7 +965,7 @@ Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const { } void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) { - Label done; + Mips64Label done; GpuRegister card = AT; GpuRegister temp = TMP; __ Beqzc(value, &done); @@ -1048,6 +1074,7 @@ void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset, // TODO: anything related to T9/GP/GOT/PIC/.so's? __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); __ Jalr(T9); + __ Nop(); RecordPcInfo(instruction, dex_pc, slow_path); } @@ -1079,7 +1106,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc __ Bind(slow_path->GetReturnLabel()); } else { __ Beqzc(TMP, codegen_->GetLabelOf(successor)); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); // slow_path will return to GetLabelOf(successor). } } @@ -1583,6 +1610,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } break; } @@ -1669,12 +1697,7 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) // length is limited by the maximum positive signed 32-bit integer. // Unsigned comparison of length and index checks for index < 0 // and for length <= index simultaneously. - // Mips R6 requires lhs != rhs for compact branches. - if (index == length) { - __ B(slow_path->GetEntryLabel()); - } else { - __ Bgeuc(index, length, slow_path->GetEntryLabel()); - } + __ Bgeuc(index, length, slow_path->GetEntryLabel()); } void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { @@ -1796,6 +1819,19 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { : QUICK_ENTRY_POINT(pCmplDouble); } codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr); + if (in_type == Primitive::kPrimFloat) { + if (instruction->IsGtBias()) { + CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>(); + } else { + CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>(); + } + } else { + if (instruction->IsGtBias()) { + CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>(); + } else { + CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>(); + } + } break; } @@ -2264,7 +2300,7 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio if (value.IsConstant()) { int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant()); if (divisor == 0) { - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); } else { // A division by a non-null constant is valid. We don't need to perform // any check, so simply fall through. @@ -2316,7 +2352,7 @@ void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); } if (!codegen_->GoesToNextBlock(block, successor)) { - __ B(codegen_->GetLabelOf(successor)); + __ Bc(codegen_->GetLabelOf(successor)); } } @@ -2341,8 +2377,8 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target) { + Mips64Label* true_target, + Mips64Label* false_target) { HInstruction* cond = instruction->InputAt(condition_input_index); if (true_target == nullptr && false_target == nullptr) { @@ -2352,12 +2388,12 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // Constant condition, statically compared against 1. if (cond->AsIntConstant()->IsOne()) { if (true_target != nullptr) { - __ B(true_target); + __ Bc(true_target); } } else { DCHECK(cond->AsIntConstant()->IsZero()); if (false_target != nullptr) { - __ B(false_target); + __ Bc(false_target); } } return; @@ -2397,7 +2433,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc } IfCondition if_cond; - Label* non_fallthrough_target; + Mips64Label* non_fallthrough_target; if (true_target == nullptr) { if_cond = condition->GetOppositeCondition(); non_fallthrough_target = false_target; @@ -2435,7 +2471,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero break; case kCondAE: - __ B(non_fallthrough_target); // always true + __ Bc(non_fallthrough_target); // always true break; } } else { @@ -2443,60 +2479,37 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc rhs_reg = TMP; __ LoadConst32(rhs_reg, rhs_imm); } - // It looks like we can get here with lhs == rhs. Should that be possible at all? - // Mips R6 requires lhs != rhs for compact branches. - if (lhs == rhs_reg) { - DCHECK(!use_imm); - switch (if_cond) { - case kCondEQ: - case kCondGE: - case kCondLE: - case kCondBE: - case kCondAE: - // if lhs == rhs for a positive condition, then it is a branch - __ B(non_fallthrough_target); - break; - case kCondNE: - case kCondLT: - case kCondGT: - case kCondB: - case kCondA: - // if lhs == rhs for a negative condition, then it is a NOP - break; - } - } else { - switch (if_cond) { - case kCondEQ: - __ Beqc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondNE: - __ Bnec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLT: - __ Bltc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondGE: - __ Bgec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLE: - __ Bgec(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bltc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondB: - __ Bltuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondAE: - __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondBE: - __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondA: - __ Bltuc(rhs_reg, lhs, non_fallthrough_target); - break; - } + switch (if_cond) { + case kCondEQ: + __ Beqc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondNE: + __ Bnec(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondLT: + __ Bltc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondGE: + __ Bgec(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondLE: + __ Bgec(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondGT: + __ Bltc(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondB: + __ Bltuc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondAE: + __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondBE: + __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondA: + __ Bltuc(rhs_reg, lhs, non_fallthrough_target); + break; } } } @@ -2504,7 +2517,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // If neither branch falls through (case 3), the conditional branch to `true_target` // was already emitted (case 2) and we need to emit a jump to `false_target`. if (true_target != nullptr && false_target != nullptr) { - __ B(false_target); + __ Bc(false_target); } } @@ -2518,9 +2531,9 @@ void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) { void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); - Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + Mips64Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? nullptr : codegen_->GetLabelOf(true_successor); - Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } @@ -2695,7 +2708,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - Label done; + Mips64Label done; // Return 0 if `obj` is null. // TODO: Avoid this check if we know `obj` is not null. @@ -2790,6 +2803,7 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -2924,13 +2938,14 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Jalr(&frame_entry_label_, T9); + __ Jialc(&frame_entry_label_, T9); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: // LR = invoke->GetDirectCodePtr(); __ LoadConst64(T9, invoke->GetDirectCodePtr()); // LR() __ Jalr(T9); + __ Nop(); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: @@ -2947,6 +2962,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kMips64WordSize).Int32Value()); // T9() __ Jalr(T9); + __ Nop(); break; } DCHECK(!IsLeafMethod()); @@ -2988,6 +3004,7 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); } void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -3016,6 +3033,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -3027,22 +3045,26 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadUnsignedWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); __ LoadFromOffset(kLoadDoubleword, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value()); __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); // TODO: We will need a read barrier here. - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Beqzc(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( + cls, + cls, + cls->GetDexPc(), + cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Beqzc(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3132,7 +3154,11 @@ void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* in instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderMIPS64::VisitMul(HMul* mul) { @@ -3266,15 +3292,12 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = instruction->GetLocations(); - // Move an uint16_t value to a register. - __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex()); codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), @@ -3454,6 +3477,11 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) : QUICK_ENTRY_POINT(pFmod); codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr); + if (type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } break; } default: @@ -3763,6 +3791,11 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver conversion, conversion->GetDexPc(), nullptr); + if (result_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + } else { + CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); @@ -3778,6 +3811,19 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver conversion, conversion->GetDexPc(), nullptr); + if (result_type != Primitive::kPrimLong) { + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + } + } else { + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + } + } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); @@ -3929,7 +3975,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); for (int32_t i = 0; i < num_entries; i++) { int32_t case_value = lower_bound + i; - Label* succ = codegen_->GetLabelOf(successors[i]); + Mips64Label* succ = codegen_->GetLabelOf(successors[i]); if (case_value == 0) { __ Beqzc(value_reg, succ); } else { @@ -3940,7 +3986,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins // And the default for any other value. if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ B(codegen_->GetLabelOf(default_block)); + __ Bc(codegen_->GetLabelOf(default_block)); } } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index a078dd1819..85e3a4a3ce 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -158,12 +158,12 @@ class SlowPathCodeMIPS64 : public SlowPathCode { public: SlowPathCodeMIPS64() : entry_label_(), exit_label_() {} - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } + Mips64Label* GetEntryLabel() { return &entry_label_; } + Mips64Label* GetExitLabel() { return &exit_label_; } private: - Label entry_label_; - Label exit_label_; + Mips64Label entry_label_; + Mips64Label exit_label_; DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64); }; @@ -231,8 +231,8 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target); + Mips64Label* true_target, + Mips64Label* false_target); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -265,7 +265,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; } uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE { - return GetLabelOf(block)->Position(); + return assembler_.GetLabelLocation(GetLabelOf(block)); } HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } @@ -298,12 +298,12 @@ class CodeGeneratorMIPS64 : public CodeGenerator { return isa_features_; } - Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_, block); + Mips64Label* GetLabelOf(HBasicBlock* block) const { + return CommonGetLabelOf<Mips64Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_ = CommonInitializeLabels<Label>(); + block_labels_ = CommonInitializeLabels<Mips64Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -349,8 +349,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { private: // Labels for each block that will be compiled. - Label* block_labels_; // Indexed by block id. - Label frame_entry_label_; + Mips64Label* block_labels_; // Indexed by block id. + Mips64Label frame_entry_label_; LocationsBuilderMIPS64 location_builder_; InstructionCodeGeneratorMIPS64 instruction_visitor_; ParallelMoveResolverMIPS64 move_resolver_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index a87e8ede04..1fc09a81bc 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -67,6 +67,7 @@ class NullCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -93,6 +94,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -152,6 +154,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -177,6 +180,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -222,6 +226,7 @@ class LoadStringSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); RestoreLiveRegisters(codegen, locations); @@ -257,6 +262,11 @@ class LoadClassSlowPathX86 : public SlowPathCode { x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType), at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -368,6 +378,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } @@ -410,6 +421,7 @@ class ArraySetSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -2460,6 +2472,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: @@ -2468,6 +2481,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; default: @@ -3298,11 +3312,13 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); } else { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); } break; } @@ -3769,19 +3785,18 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3798,13 +3813,13 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); - // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -4856,7 +4871,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); // Possibly used for read barrier too. + locations->AddTemp(Location::RegisterLocation(ECX)); } } @@ -5503,6 +5518,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -5524,7 +5540,6 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(current_method, declaring_class_offset)); } } else { - DCHECK(cls->CanCallRuntime()); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ movl(out, Address(current_method, @@ -5541,15 +5556,22 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(out, cache_offset)); } - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + + if (!cls->IsInDexCache()) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -5661,6 +5683,7 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { @@ -6150,6 +6173,11 @@ void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instr instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index dcc180804d..534ee1c5ab 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -65,6 +65,7 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -91,6 +92,7 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -149,6 +151,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -203,6 +206,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -240,6 +244,11 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } Location out = locations->Out(); // Move the class to the desired location. @@ -290,6 +299,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); @@ -386,6 +396,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { deoptimize, deoptimize->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } @@ -428,6 +439,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -3765,22 +3777,19 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(RAX)); } void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), - instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3799,13 +3808,13 @@ void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), instruction->GetTypeIndex()); - // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -4500,8 +4509,6 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { // This first temporary register is possibly used for heap // reference poisoning and/or read barrier emission too. locations->AddTemp(Location::RequiresRegister()); - // This second temporary register is possibly used for read - // barrier emission too. locations->AddTemp(Location::RequiresRegister()); } } @@ -5129,6 +5136,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -5150,7 +5158,6 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(current_method, declaring_class_offset)); } } else { - DCHECK(cls->CanCallRuntime()); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ movq(out, Address(current_method, @@ -5167,15 +5174,20 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(out, cache_offset)); } - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -5278,6 +5290,7 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -5772,6 +5785,11 @@ void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* in instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index e1a8c9cc0f..af8b8b562a 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ +#include "code_generator.h" #include "locations.h" #include "nodes.h" #include "utils/arm64/assembler_arm64.h" @@ -255,6 +256,67 @@ static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers, return true; } +static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HArm64DataProcWithShifterOp::kASR: return vixl::ASR; + case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL; + case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + return vixl::NO_SHIFT; + } +} + +static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB; + case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH; + case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW; + case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB; + case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH; + case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + return vixl::NO_EXTEND; + } +} + +static inline bool CanFitInShifterOperand(HInstruction* instruction) { + if (instruction->IsTypeConversion()) { + HTypeConversion* conversion = instruction->AsTypeConversion(); + Primitive::Type result_type = conversion->GetResultType(); + Primitive::Type input_type = conversion->GetInputType(); + // We don't expect to see the same type as input and result. + return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) && + (result_type != input_type); + } else { + return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) || + (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) || + (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant()); + } +} + +static inline bool HasShifterOperand(HInstruction* instr) { + // `neg` instructions are an alias of `sub` using the zero register as the + // first register input. + bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() || + instr->IsOr() || instr->IsSub() || instr->IsXor(); + return res; +} + +static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { + DCHECK(HasShifterOperand(instruction)); + // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg` + // does *not* support extension. This is because the `extended register` form + // of the `sub` instruction interprets the left register with code 31 as the + // stack pointer and not the zero register. (So does the `immediate` form.) In + // the other form `shifted register, the register with code 31 is interpreted + // as the zero register. + return instruction->IsAdd() || instruction->IsSub(); +} + } // namespace helpers } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index d166d0061f..48bcd10b10 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -422,6 +422,19 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } +#ifdef ART_ENABLE_CODEGEN_arm64 + void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind(); + if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) { + StartAttributeStream("shift") << instruction->GetShiftAmount(); + } + } + + void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetOpKind(); + } +#endif + bool IsPass(const char* name) { return strcmp(pass_name_, name) == 0; } diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index c36de84064..4af111b784 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -377,9 +377,10 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { HInstruction* current = block->GetFirstInstruction(); while (current != nullptr) { - set->Kill(current->GetSideEffects()); // Save the next instruction in case `current` is removed from the graph. HInstruction* next = current->GetNext(); + // Do not kill the set with the side effects of the instruction just now: if + // the instruction is GVN'ed, we don't need to kill. if (current->CanBeMoved()) { if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { // For commutative ops, (x op y) will be treated the same as (y op x) @@ -395,8 +396,11 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { current->ReplaceWith(existing); current->GetBlock()->RemoveInstruction(current); } else { + set->Kill(current->GetSideEffects()); set->Add(current); } + } else { + set->Kill(current->GetSideEffects()); } current = next; } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 9ad2dd1c8e..2f3df7fc68 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -169,16 +169,6 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { // src instruction->ReplaceWith(input_other); instruction->GetBlock()->RemoveInstruction(instruction); - } else if (instruction->IsShl() && input_cst->IsOne()) { - // Replace Shl looking like - // SHL dst, src, 1 - // with - // ADD dst, src, src - HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(), - input_other, - input_other); - instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add); - RecordSimplification(); } } } @@ -372,9 +362,8 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { block->RemoveInstruction(equal); RecordSimplification(); } else if (input_const->AsIntConstant()->IsZero()) { - // Replace (bool_value == false) with !bool_value - block->ReplaceAndRemoveInstructionWith( - equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal)); + block->RemoveInstruction(equal); RecordSimplification(); } else { // Replace (bool_value == integer_not_zero_nor_one_constant) with false @@ -399,9 +388,8 @@ void InstructionSimplifierVisitor::VisitNotEqual(HNotEqual* not_equal) { // We are comparing the boolean to a constant which is of type int and can // be any constant. if (input_const->AsIntConstant()->IsOne()) { - // Replace (bool_value != true) with !bool_value - block->ReplaceAndRemoveInstructionWith( - not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal)); + block->RemoveInstruction(not_equal); RecordSimplification(); } else if (input_const->AsIntConstant()->IsZero()) { // Replace (bool_value != false) with bool_value diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index eb79f469eb..6a34b13320 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -16,11 +16,16 @@ #include "instruction_simplifier_arm64.h" +#include "common_arm64.h" #include "mirror/array-inl.h" namespace art { namespace arm64 { +using helpers::CanFitInShifterOperand; +using helpers::HasShifterOperand; +using helpers::ShifterOperandSupportsExtension; + void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access, HInstruction* array, HInstruction* index, @@ -62,6 +67,169 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio RecordSimplification(); } +bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge) { + DCHECK(HasShifterOperand(use)); + DCHECK(use->IsBinaryOperation() || use->IsNeg()); + DCHECK(CanFitInShifterOperand(bitfield_op)); + DCHECK(!bitfield_op->HasEnvironmentUses()); + + Primitive::Type type = use->GetType(); + if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) { + return false; + } + + HInstruction* left; + HInstruction* right; + if (use->IsBinaryOperation()) { + left = use->InputAt(0); + right = use->InputAt(1); + } else { + DCHECK(use->IsNeg()); + right = use->AsNeg()->InputAt(0); + left = GetGraph()->GetConstant(right->GetType(), 0); + } + DCHECK(left == bitfield_op || right == bitfield_op); + + if (left == right) { + // TODO: Handle special transformations in this situation? + // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`? + // Or should this be part of a separate transformation logic? + return false; + } + + bool is_commutative = use->IsBinaryOperation() && use->AsBinaryOperation()->IsCommutative(); + HInstruction* other_input; + if (bitfield_op == right) { + other_input = left; + } else { + if (is_commutative) { + other_input = right; + } else { + return false; + } + } + + HArm64DataProcWithShifterOp::OpKind op_kind; + int shift_amount = 0; + HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount); + + if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) && + !ShifterOperandSupportsExtension(use)) { + return false; + } + + if (do_merge) { + HArm64DataProcWithShifterOp* alu_with_op = + new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use, + other_input, + bitfield_op->InputAt(0), + op_kind, + shift_amount, + use->GetDexPc()); + use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op); + if (bitfield_op->GetUses().IsEmpty()) { + bitfield_op->GetBlock()->RemoveInstruction(bitfield_op); + } + RecordSimplification(); + } + + return true; +} + +// Merge a bitfield move instruction into its uses if it can be merged in all of them. +bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) { + DCHECK(CanFitInShifterOperand(bitfield_op)); + + if (bitfield_op->HasEnvironmentUses()) { + return false; + } + + const HUseList<HInstruction*>& uses = bitfield_op->GetUses(); + + // Check whether we can merge the instruction in all its users' shifter operand. + for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) { + HInstruction* use = it_use.Current()->GetUser(); + if (!HasShifterOperand(use)) { + return false; + } + if (!CanMergeIntoShifterOperand(use, bitfield_op)) { + return false; + } + } + + // Merge the instruction into its uses. + for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) { + HInstruction* use = it_use.Current()->GetUser(); + bool merged = MergeIntoShifterOperand(use, bitfield_op); + DCHECK(merged); + } + + return true; +} + +bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns( + HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) { + DCHECK(Primitive::IsIntOrLongType(mul->GetType())); + DCHECK(input_binop->IsAdd() || input_binop->IsSub()); + DCHECK_NE(input_binop, input_other); + if (!input_binop->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + // Try to interpret patterns like + // a * (b <+/-> 1) + // as + // (a * b) <+/-> a + HInstruction* input_a = input_other; + HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. + HInstruction::InstructionKind op_kind; + + if (input_binop->IsAdd()) { + if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { + // Interpret + // a * (b + 1) + // as + // (a * b) + a + input_b = input_binop->GetLeastConstantLeft(); + op_kind = HInstruction::kAdd; + } + } else { + DCHECK(input_binop->IsSub()); + if (input_binop->GetRight()->IsConstant() && + input_binop->GetRight()->AsConstant()->IsMinusOne()) { + // Interpret + // a * (b - (-1)) + // as + // a + (a * b) + input_b = input_binop->GetLeft(); + op_kind = HInstruction::kAdd; + } else if (input_binop->GetLeft()->IsConstant() && + input_binop->GetLeft()->AsConstant()->IsOne()) { + // Interpret + // a * (1 - b) + // as + // a - (a * b) + input_b = input_binop->GetRight(); + op_kind = HInstruction::kSub; + } + } + + if (input_b == nullptr) { + // We did not find a pattern we can optimize. + return false; + } + + HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate( + mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); + + mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); + input_binop->GetBlock()->RemoveInstruction(input_binop); + + return false; +} + void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { TryExtractArrayAccessAddress(instruction, instruction->GetArray(), @@ -76,5 +244,110 @@ void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { Primitive::ComponentSize(instruction->GetComponentType())); } +void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntOrLongType(type)) { + return; + } + + HInstruction* use = instruction->HasNonEnvironmentUses() + ? instruction->GetUses().GetFirst()->GetUser() + : nullptr; + + if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) { + // Replace code looking like + // MUL tmp, x, y + // SUB dst, acc, tmp + // with + // MULSUB dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HBinaryOperation* binop = use->AsBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // Be careful after GVN. This should not happen since the `HMul` has only + // one use. + DCHECK_NE(binop_left, binop_right); + if (binop_right == instruction) { + accumulator = binop_left; + } else if (use->IsAdd()) { + DCHECK_EQ(binop_left, instruction); + accumulator = binop_right; + } + + if (accumulator != nullptr) { + HArm64MultiplyAccumulate* mulacc = + new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type, + binop->GetKind(), + accumulator, + instruction->GetLeft(), + instruction->GetRight()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!instruction->HasUses()); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + } + + // Use multiply accumulate instruction for a few simple patterns. + // We prefer not applying the following transformations if the left and + // right inputs perform the same operation. + // We rely on GVN having squashed the inputs if appropriate. However the + // results are still correct even if that did not happen. + if (instruction->GetLeft() == instruction->GetRight()) { + return; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + if ((right->IsAdd() || right->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) { + return; + } + if ((left->IsAdd() || left->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) { + return; + } +} + +void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitShr(HShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) { + Primitive::Type result_type = instruction->GetResultType(); + Primitive::Type input_type = instruction->GetInputType(); + + if (input_type == result_type) { + // We let the arch-independent code handle this. + return; + } + + if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 4b697dba0e..b7f490bb8c 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -39,9 +39,30 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { HInstruction* array, HInstruction* index, int access_size); + bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); + bool TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge); + bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + return TryMergeIntoShifterOperand(use, bitfield_op, false); + } + bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); + return TryMergeIntoShifterOperand(use, bitfield_op, true); + } + + bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, + HBinaryOperation* input_binop, + HInstruction* input_other); + // HInstruction visitors, sorted alphabetically. void VisitArrayGet(HArrayGet* instruction) OVERRIDE; void VisitArraySet(HArraySet* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 059abf090d..b04dcceb05 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -143,7 +143,23 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathARM64 slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect, + // ReadBarrierSlowPathARM64 for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } #define __ masm-> @@ -818,9 +834,12 @@ static void GenUnsafeGet(HInvoke* invoke, (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)); vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_; - Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. - Register trg = RegisterFrom(locations->Out(), type); + Location base_loc = locations->InAt(1); + Register base = WRegisterFrom(base_loc); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = XRegisterFrom(offset_loc); // Long offset. + Location trg_loc = locations->Out(); + Register trg = RegisterFrom(trg_loc, type); bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); MemOperand mem_op(base.X(), offset); @@ -837,13 +856,18 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == Primitive::kPrimNot) { DCHECK(trg.IsW()); - codegen->GetAssembler()->MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); } } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1057,6 +1081,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat if (use_acquire_release) { __ Bind(&loop_head); __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1065,6 +1092,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Dmb(InnerShareable, BarrierWrites); __ Bind(&loop_head); __ Ldxr(tmp_value, MemOperand(tmp_ptr)); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1090,7 +1120,11 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic does not always work when heap // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it // off temporarily as a quick fix. + // // TODO(rpl): Fix it and turn it back on. + // + // TODO(rpl): Also, we should investigate whether we need a read + // barrier in the generated code. if (kPoisonHeapReferences) { return; } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index a94e3a8c23..326844526e 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -138,6 +138,221 @@ bool IntrinsicLocationsBuilderMIPS::TryDispatch(HInvoke* invoke) { #define __ assembler-> +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + + if (is64bit) { + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + __ Mfc1(out_lo, in); + __ Mfhc1(out_hi, in); + } else { + Register out = locations->Out().AsRegister<Register>(); + + __ Mfc1(out, in); + } +} + +// long java.lang.Double.doubleToRawLongBits(double) +void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} + +// int java.lang.Float.floatToRawIntBits(float) +void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + if (is64bit) { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + + __ Mtc1(in_lo, out); + __ Mthc1(in_hi, out); + } else { + Register in = locations->InAt(0).AsRegister<Register>(); + + __ Mtc1(in, out); + } +} + +// double java.lang.Double.longBitsToDouble(long) +void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +// float java.lang.Float.intBitsToFloat(int) +void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenReverseBytes(LocationSummary* locations, + Primitive::Type type, + MipsAssembler* assembler, + bool isR2OrNewer) { + DCHECK(type == Primitive::kPrimShort || + type == Primitive::kPrimInt || + type == Primitive::kPrimLong); + + if (type == Primitive::kPrimShort) { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (isR2OrNewer) { + __ Wsbh(out, in); + __ Seh(out, out); + } else { + __ Sll(TMP, in, 24); + __ Sra(TMP, TMP, 16); + __ Sll(out, in, 16); + __ Srl(out, out, 24); + __ Or(out, out, TMP); + } + } else if (type == Primitive::kPrimInt) { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (isR2OrNewer) { + __ Rotr(out, in, 16); + __ Wsbh(out, out); + } else { + // MIPS32r1 + // __ Rotr(out, in, 16); + __ Sll(TMP, in, 16); + __ Srl(out, in, 16); + __ Or(out, out, TMP); + // __ Wsbh(out, out); + __ LoadConst32(AT, 0x00FF00FF); + __ And(TMP, out, AT); + __ Sll(TMP, TMP, 8); + __ Srl(out, out, 8); + __ And(out, out, AT); + __ Or(out, out, TMP); + } + } else if (type == Primitive::kPrimLong) { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + if (isR2OrNewer) { + __ Rotr(AT, in_hi, 16); + __ Rotr(TMP, in_lo, 16); + __ Wsbh(out_lo, AT); + __ Wsbh(out_hi, TMP); + } else { + // When calling CreateIntToIntLocations() we promised that the + // use of the out_lo/out_hi wouldn't overlap with the use of + // in_lo/in_hi. Be very careful not to write to out_lo/out_hi + // until we're completely done reading from in_lo/in_hi. + // __ Rotr(TMP, in_lo, 16); + __ Sll(TMP, in_lo, 16); + __ Srl(AT, in_lo, 16); + __ Or(TMP, TMP, AT); // Hold in TMP until it's safe + // to write to out_hi. + // __ Rotr(out_lo, in_hi, 16); + __ Sll(AT, in_hi, 16); + __ Srl(out_lo, in_hi, 16); // Here we are finally done reading + // from in_lo/in_hi so it's okay to + // write to out_lo/out_hi. + __ Or(out_lo, out_lo, AT); + // __ Wsbh(out_hi, out_hi); + __ LoadConst32(AT, 0x00FF00FF); + __ And(out_hi, TMP, AT); + __ Sll(out_hi, out_hi, 8); + __ Srl(TMP, TMP, 8); + __ And(TMP, TMP, AT); + __ Or(out_hi, out_hi, TMP); + // __ Wsbh(out_lo, out_lo); + __ And(TMP, out_lo, AT); // AT already holds the correct mask value + __ Sll(TMP, TMP, 8); + __ Srl(out_lo, out_lo, 8); + __ And(out_lo, out_lo, AT); + __ Or(out_lo, out_lo, TMP); + } + } +} + +// int java.lang.Integer.reverseBytes(int) +void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimInt, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + +// long java.lang.Long.reverseBytes(long) +void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimLong, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + +// short java.lang.Short.reverseBytes(short) +void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimShort, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + // boolean java.lang.String.equals(Object anObject) void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -250,15 +465,8 @@ void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(IntegerReverse) UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(ShortReverseBytes) -UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) -UNIMPLEMENTED_INTRINSIC(LongReverseBytes) UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros) UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros) -UNIMPLEMENTED_INTRINSIC(FloatIntBitsToFloat) -UNIMPLEMENTED_INTRINSIC(DoubleLongBitsToDouble) -UNIMPLEMENTED_INTRINSIC(FloatFloatToRawIntBits) -UNIMPLEMENTED_INTRINSIC(DoubleDoubleToRawLongBits) UNIMPLEMENTED_INTRINSIC(MathAbsDouble) UNIMPLEMENTED_INTRINSIC(MathAbsFloat) UNIMPLEMENTED_INTRINSIC(MathAbsInt) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 3654159f83..ecee11dea6 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -115,7 +115,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, invoke_->GetLocations()); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; } @@ -806,7 +806,7 @@ static void GenRoundingMode(LocationSummary* locations, DCHECK_NE(in, out); - Label done; + Mips64Label done; // double floor/ceil(double in) { // if in.isNaN || in.isInfinite || in.isZero { @@ -1256,7 +1256,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); // result = tmp_value != 0; - Label loop_head, exit_loop; + Mips64Label loop_head, exit_loop; __ Daddu(TMP, base, offset); __ Sync(0); __ Bind(&loop_head); @@ -1418,10 +1418,10 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>(); GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>(); - Label loop; - Label end; - Label return_true; - Label return_false; + Mips64Label loop; + Mips64Label end; + Mips64Label return_true; + Mips64Label return_false; // Get offsets of count, value, and class fields within a string object. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); @@ -1485,7 +1485,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { // If loop does not result in returning false, we return true. __ Bind(&return_true); __ LoadConst64(out, 1); - __ B(&end); + __ Bc(&end); // Return false and exit the function. __ Bind(&return_false); @@ -1514,7 +1514,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // full slow-path down and branch unconditionally. slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke); codegen->AddSlowPath(slow_path); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 0a39ff31bf..890598d687 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2090,4 +2090,46 @@ void HInstruction::RemoveEnvironmentUsers() { env_uses_.Clear(); } +// Returns an instruction with the opposite boolean value from 'cond'. +HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* cursor) { + ArenaAllocator* allocator = GetArena(); + + if (cond->IsCondition() && + !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType())) { + // Can't reverse floating point conditions. We have to use HBooleanNot in that case. + HInstruction* lhs = cond->InputAt(0); + HInstruction* rhs = cond->InputAt(1); + HInstruction* replacement = nullptr; + switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite* + case kCondEQ: replacement = new (allocator) HEqual(lhs, rhs); break; + case kCondNE: replacement = new (allocator) HNotEqual(lhs, rhs); break; + case kCondLT: replacement = new (allocator) HLessThan(lhs, rhs); break; + case kCondLE: replacement = new (allocator) HLessThanOrEqual(lhs, rhs); break; + case kCondGT: replacement = new (allocator) HGreaterThan(lhs, rhs); break; + case kCondGE: replacement = new (allocator) HGreaterThanOrEqual(lhs, rhs); break; + case kCondB: replacement = new (allocator) HBelow(lhs, rhs); break; + case kCondBE: replacement = new (allocator) HBelowOrEqual(lhs, rhs); break; + case kCondA: replacement = new (allocator) HAbove(lhs, rhs); break; + case kCondAE: replacement = new (allocator) HAboveOrEqual(lhs, rhs); break; + default: + LOG(FATAL) << "Unexpected condition"; + UNREACHABLE(); + } + cursor->GetBlock()->InsertInstructionBefore(replacement, cursor); + return replacement; + } else if (cond->IsIntConstant()) { + HIntConstant* int_const = cond->AsIntConstant(); + if (int_const->IsZero()) { + return GetIntConstant(1); + } else { + DCHECK(int_const->IsOne()); + return GetIntConstant(0); + } + } else { + HInstruction* replacement = new (allocator) HBooleanNot(cond); + cursor->GetBlock()->InsertInstructionBefore(replacement, cursor); + return replacement; + } +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 4f894b07c7..d5110a7172 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -371,6 +371,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + // Returns an instruction with the opposite boolean value from 'cond'. + // The instruction has been inserted into the graph, either as a constant, or + // before cursor. + HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor); + private: void FindBackEdges(ArenaBitVector* visited); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; @@ -1096,7 +1101,9 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) #else #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ - M(Arm64IntermediateAddress, Instruction) + M(Arm64DataProcWithShifterOp, Instruction) \ + M(Arm64IntermediateAddress, Instruction) \ + M(Arm64MultiplyAccumulate, Instruction) #endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) @@ -1626,6 +1633,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { return holder_; } + + bool IsFromInlinedInvoke() const { + return GetParent() != nullptr; + } + private: // Record instructions' use entries of this environment for constant-time removal. // It should only be called by HInstruction when a new environment use is added. @@ -3238,7 +3250,7 @@ class HInvoke : public HInstruction { void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache); bool IsFromInlinedInvoke() const { - return GetEnvironment()->GetParent() != nullptr; + return GetEnvironment()->IsFromInlinedInvoke(); } bool CanThrow() const OVERRIDE { return true; } @@ -3652,9 +3664,10 @@ class HInvokeInterface : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); }; -class HNewInstance : public HExpression<1> { +class HNewInstance : public HExpression<2> { public: - HNewInstance(HCurrentMethod* current_method, + HNewInstance(HInstruction* cls, + HCurrentMethod* current_method, uint32_t dex_pc, uint16_t type_index, const DexFile& dex_file, @@ -3667,7 +3680,8 @@ class HNewInstance : public HExpression<1> { can_throw_(can_throw), finalizable_(finalizable), entrypoint_(entrypoint) { - SetRawInputAt(0, current_method); + SetRawInputAt(0, cls); + SetRawInputAt(1, current_method); } uint16_t GetTypeIndex() const { return type_index_; } @@ -3687,6 +3701,10 @@ class HNewInstance : public HExpression<1> { QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; } + void SetEntrypoint(QuickEntrypointEnum entrypoint) { + entrypoint_ = entrypoint; + } + DECLARE_INSTRUCTION(NewInstance); private: @@ -3694,7 +3712,7 @@ class HNewInstance : public HExpression<1> { const DexFile& dex_file_; const bool can_throw_; const bool finalizable_; - const QuickEntrypointEnum entrypoint_; + QuickEntrypointEnum entrypoint_; DISALLOW_COPY_AND_ASSIGN(HNewInstance); }; @@ -4302,9 +4320,13 @@ class HPhi : public HInstruction { : HInstruction(SideEffects::None(), dex_pc), inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)), reg_number_(reg_number), - type_(type), - is_live_(false), + type_(ToPhiType(type)), + // Phis are constructed live and marked dead if conflicting or unused. + // Individual steps of SsaBuilder should assume that if a phi has been + // marked dead, it can be ignored and will be removed by SsaPhiElimination. + is_live_(true), can_be_null_(true) { + DCHECK_NE(type_, Primitive::kPrimVoid); } // Returns a type equivalent to the given `type`, but that a `HPhi` can hold. @@ -4775,13 +4797,15 @@ class HLoadClass : public HExpression<1> { const DexFile& dex_file, bool is_referrers_class, uint32_t dex_pc, - bool needs_access_check) + bool needs_access_check, + bool is_in_dex_cache) : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc), type_index_(type_index), dex_file_(dex_file), is_referrers_class_(is_referrers_class), generate_clinit_check_(false), needs_access_check_(needs_access_check), + is_in_dex_cache_(is_in_dex_cache), loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { // Referrers class should not need access check. We never inline unverified // methods so we can't possibly end up in this situation. @@ -4806,14 +4830,13 @@ class HLoadClass : public HExpression<1> { bool CanBeNull() const OVERRIDE { return false; } bool NeedsEnvironment() const OVERRIDE { - // Will call runtime and load the class if the class is not loaded yet. - // TODO: finer grain decision. - return !is_referrers_class_; + return CanCallRuntime(); } bool MustGenerateClinitCheck() const { return generate_clinit_check_; } + void SetMustGenerateClinitCheck(bool generate_clinit_check) { // The entrypoint the code generator is going to call does not do // clinit of the class. @@ -4822,7 +4845,9 @@ class HLoadClass : public HExpression<1> { } bool CanCallRuntime() const { - return MustGenerateClinitCheck() || !is_referrers_class_ || needs_access_check_; + return MustGenerateClinitCheck() || + (!is_referrers_class_ && !is_in_dex_cache_) || + needs_access_check_; } bool NeedsAccessCheck() const { @@ -4830,8 +4855,6 @@ class HLoadClass : public HExpression<1> { } bool CanThrow() const OVERRIDE { - // May call runtime and and therefore can throw. - // TODO: finer grain decision. return CanCallRuntime(); } @@ -4853,6 +4876,8 @@ class HLoadClass : public HExpression<1> { return SideEffects::CanTriggerGC(); } + bool IsInDexCache() const { return is_in_dex_cache_; } + DECLARE_INSTRUCTION(LoadClass); private: @@ -4862,7 +4887,8 @@ class HLoadClass : public HExpression<1> { // Whether this instruction must generate the initialization check. // Used for code generation. bool generate_clinit_check_; - bool needs_access_check_; + const bool needs_access_check_; + const bool is_in_dex_cache_; ReferenceTypeInfo loaded_class_rti_; @@ -4927,6 +4953,7 @@ class HClinitCheck : public HExpression<1> { return true; } + bool CanThrow() const OVERRIDE { return true; } HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); } diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_arm64.cc new file mode 100644 index 0000000000..ac2f093847 --- /dev/null +++ b/compiler/optimizing/nodes_arm64.cc @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common_arm64.h" +#include "nodes.h" + +namespace art { + +using arm64::helpers::CanFitInShifterOperand; + +void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount) { + DCHECK(CanFitInShifterOperand(instruction)); + if (instruction->IsShl()) { + *op_kind = kLSL; + *shift_amount = instruction->AsShl()->GetRight()->AsIntConstant()->GetValue(); + } else if (instruction->IsShr()) { + *op_kind = kASR; + *shift_amount = instruction->AsShr()->GetRight()->AsIntConstant()->GetValue(); + } else if (instruction->IsUShr()) { + *op_kind = kLSR; + *shift_amount = instruction->AsUShr()->GetRight()->AsIntConstant()->GetValue(); + } else { + DCHECK(instruction->IsTypeConversion()); + Primitive::Type result_type = instruction->AsTypeConversion()->GetResultType(); + Primitive::Type input_type = instruction->AsTypeConversion()->GetInputType(); + int result_size = Primitive::ComponentSize(result_type); + int input_size = Primitive::ComponentSize(input_type); + int min_size = std::min(result_size, input_size); + // This follows the logic in + // `InstructionCodeGeneratorARM64::VisitTypeConversion()`. + if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + // There is actually nothing to do. The register will be used as a W + // register, discarding the top bits. This is represented by the default + // encoding 'LSL 0'. + *op_kind = kLSL; + *shift_amount = 0; + } else if (result_type == Primitive::kPrimChar || + (input_type == Primitive::kPrimChar && input_size < result_size)) { + *op_kind = kUXTH; + } else { + switch (min_size) { + case 1: *op_kind = kSXTB; break; + case 2: *op_kind = kSXTH; break; + case 4: *op_kind = kSXTW; break; + default: + LOG(FATAL) << "Unexpected min size " << min_size; + } + } + } +} + +std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) { + switch (op) { + case HArm64DataProcWithShifterOp::kLSL: return os << "LSL"; + case HArm64DataProcWithShifterOp::kLSR: return os << "LSR"; + case HArm64DataProcWithShifterOp::kASR: return os << "ASR"; + case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB"; + case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH"; + case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW"; + case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB"; + case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH"; + case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW"; + default: + LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op); + UNREACHABLE(); + } +} + +} // namespace art diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 885d3a29ee..e8439354af 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -19,6 +19,79 @@ namespace art { +class HArm64DataProcWithShifterOp : public HExpression<2> { + public: + enum OpKind { + kLSL, // Logical shift left. + kLSR, // Logical shift right. + kASR, // Arithmetic shift right. + kUXTB, // Unsigned extend byte. + kUXTH, // Unsigned extend half-word. + kUXTW, // Unsigned extend word. + kSXTB, // Signed extend byte. + kSXTH, // Signed extend half-word. + kSXTW, // Signed extend word. + + // Aliases. + kFirstShiftOp = kLSL, + kLastShiftOp = kASR, + kFirstExtensionOp = kUXTB, + kLastExtensionOp = kSXTW + }; + HArm64DataProcWithShifterOp(HInstruction* instr, + HInstruction* left, + HInstruction* right, + OpKind op, + // The shift argument is unused if the operation + // is an extension. + int shift = 0, + uint32_t dex_pc = kNoDexPc) + : HExpression(instr->GetType(), SideEffects::None(), dex_pc), + instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) { + DCHECK(!instr->HasSideEffects()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other_instr) const OVERRIDE { + HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp(); + return instr_kind_ == other->instr_kind_ && + op_kind_ == other->op_kind_ && + shift_amount_ == other->shift_amount_; + } + + static bool IsShiftOp(OpKind op_kind) { + return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp; + } + + static bool IsExtensionOp(OpKind op_kind) { + return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp; + } + + // Find the operation kind and shift amount from a bitfield move instruction. + static void GetOpInfoFromInstruction(HInstruction* bitfield_op, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount); + + InstructionKind GetInstrKind() const { return instr_kind_; } + OpKind GetOpKind() const { return op_kind_; } + int GetShiftAmount() const { return shift_amount_; } + + DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp); + + private: + InstructionKind instr_kind_; + OpKind op_kind_; + int shift_amount_; + + friend std::ostream& operator<<(std::ostream& os, OpKind op); + + DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp); +}; + +std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op); + // This instruction computes an intermediate address pointing in the 'middle' of an object. The // result pointer cannot be handled by GC, so extra care is taken to make sure that this value is // never used across anything that can trigger GC. @@ -42,6 +115,40 @@ class HArm64IntermediateAddress : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress); }; +class HArm64MultiplyAccumulate : public HExpression<3> { + public: + HArm64MultiplyAccumulate(Primitive::Type type, + InstructionKind op, + HInstruction* accumulator, + HInstruction* mul_left, + HInstruction* mul_right, + uint32_t dex_pc = kNoDexPc) + : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { + SetRawInputAt(kInputAccumulatorIndex, accumulator); + SetRawInputAt(kInputMulLeftIndex, mul_left); + SetRawInputAt(kInputMulRightIndex, mul_right); + } + + static constexpr int kInputAccumulatorIndex = 0; + static constexpr int kInputMulLeftIndex = 1; + static constexpr int kInputMulRightIndex = 2; + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_; + } + + InstructionKind GetOpKind() const { return op_kind_; } + + DECLARE_INSTRUCTION(Arm64MultiplyAccumulate); + + private: + // Indicates if this is a MADD or MSUB. + InstructionKind op_kind_; + + DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index 34f1fe5949..2b0d522b31 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -25,6 +25,7 @@ #include "utils/assembler.h" #include "utils/arm/assembler_thumb2.h" #include "utils/mips/assembler_mips.h" +#include "utils/mips64/assembler_mips64.h" #include "optimizing/optimizing_cfi_test_expected.inc" @@ -212,6 +213,34 @@ TEST_F(OptimizingCFITest, kMipsAdjust) { Check(kMips, "kMips_adjust", expected_asm, expected_cfi); } +TEST_F(OptimizingCFITest, kMips64Adjust) { + // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. + static constexpr size_t kNumNops = 1u + (1u << 15); + std::vector<uint8_t> expected_asm( + expected_asm_kMips64_adjust_head, + expected_asm_kMips64_adjust_head + arraysize(expected_asm_kMips64_adjust_head)); + expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u); + expected_asm.insert( + expected_asm.end(), + expected_asm_kMips64_adjust_tail, + expected_asm_kMips64_adjust_tail + arraysize(expected_asm_kMips64_adjust_tail)); + std::vector<uint8_t> expected_cfi( + expected_cfi_kMips64_adjust, + expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust)); + SetUpFrame(kMips64); +#define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())-> + mips64::Mips64Label target; + __ Beqc(mips64::A1, mips64::A2, &target); + // Push the target out of range of BEQC. + for (size_t i = 0; i != kNumNops; ++i) { + __ Nop(); + } + __ Bind(&target); +#undef __ + Finish(); + Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi); +} + #endif // __ANDROID__ } // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 4571ebf2d4..de857295c7 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -413,3 +413,57 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = { // 0x0002007c: nop // 0x00020080: .cfi_restore_state // 0x00020080: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64_adjust_head[] = { + 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, + 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, + 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60, + 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8, +}; +static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = { + 0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, + 0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, + 0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64_adjust[] = { + 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, + 0x4C, 0x0E, 0x40, 0x04, 0x14, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, + 0x4C, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: daddiu r29, r29, -40 +// 0x00000004: .cfi_def_cfa_offset: 40 +// 0x00000004: sd r31, +32(r29) +// 0x00000008: .cfi_offset: r31 at cfa-8 +// 0x00000008: sd r17, +24(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-16 +// 0x0000000c: sd r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-24 +// 0x00000010: sdc1 f25, +8(r29) +// 0x00000014: sdc1 f24, +0(r29) +// 0x00000018: daddiu r29, r29, -24 +// 0x0000001c: .cfi_def_cfa_offset: 64 +// 0x0000001c: sd r4, +0(r29) +// 0x00000020: bnec r5, r6, 0x0000002c ; +12 +// 0x00000024: auipc r1, 2 +// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080 +// 0x0000002c: nop +// ... +// 0x0002002c: nop +// 0x00020030: .cfi_remember_state +// 0x00020030: daddiu r29, r29, 24 +// 0x00020034: .cfi_def_cfa_offset: 40 +// 0x00020034: ldc1 f24, +0(r29) +// 0x00020038: ldc1 f25, +8(r29) +// 0x0002003c: ld r16, +16(r29) +// 0x00020040: .cfi_restore: r16 +// 0x00020040: ld r17, +24(r29) +// 0x00020044: .cfi_restore: r17 +// 0x00020044: ld r31, +32(r29) +// 0x00020048: .cfi_restore: r31 +// 0x00020048: daddiu r29, r29, 40 +// 0x0002004c: .cfi_def_cfa_offset: 0 +// 0x0002004c: jr r31 +// 0x00020050: nop +// 0x00020054: .cfi_restore_state +// 0x00020054: .cfi_def_cfa_offset: 64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 2204921c53..8440813a87 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -110,24 +110,23 @@ class PassScope; class PassObserver : public ValueObject { public: PassObserver(HGraph* graph, - const char* method_name, CodeGenerator* codegen, std::ostream* visualizer_output, CompilerDriver* compiler_driver) : graph_(graph), - method_name_(method_name), + cached_method_name_(), timing_logger_enabled_(compiler_driver->GetDumpPasses()), - timing_logger_(method_name, true, true), + timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), disasm_info_(graph->GetArena()), visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()), visualizer_(visualizer_output, graph, *codegen), graph_in_bad_state_(false) { if (timing_logger_enabled_ || visualizer_enabled_) { - if (!IsVerboseMethod(compiler_driver, method_name)) { + if (!IsVerboseMethod(compiler_driver, GetMethodName())) { timing_logger_enabled_ = visualizer_enabled_ = false; } if (visualizer_enabled_) { - visualizer_.PrintHeader(method_name_); + visualizer_.PrintHeader(GetMethodName()); codegen->SetDisassemblyInformation(&disasm_info_); } } @@ -135,7 +134,7 @@ class PassObserver : public ValueObject { ~PassObserver() { if (timing_logger_enabled_) { - LOG(INFO) << "TIMINGS " << method_name_; + LOG(INFO) << "TIMINGS " << GetMethodName(); LOG(INFO) << Dumpable<TimingLogger>(timing_logger_); } } @@ -148,6 +147,14 @@ class PassObserver : public ValueObject { void SetGraphInBadState() { graph_in_bad_state_ = true; } + const char* GetMethodName() { + // PrettyMethod() is expensive, so we delay calling it until we actually have to. + if (cached_method_name_.empty()) { + cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile()); + } + return cached_method_name_.c_str(); + } + private: void StartPass(const char* pass_name) { // Dump graph first, then start timer. @@ -206,7 +213,8 @@ class PassObserver : public ValueObject { } HGraph* const graph_; - const char* method_name_; + + std::string cached_method_name_; bool timing_logger_enabled_; TimingLogger timing_logger_; @@ -383,10 +391,11 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { || instruction_set == kX86_64; } -// Read barrier are supported only on ARM, x86 and x86-64 at the moment. +// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment. // TODO: Add support for other architectures and remove this function static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { - return instruction_set == kThumb2 + return instruction_set == kArm64 + || instruction_set == kThumb2 || instruction_set == kX86 || instruction_set == kX86_64; } @@ -664,7 +673,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, jobject class_loader, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { - std::string method_name = PrettyMethod(method_idx, dex_file); MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); @@ -728,7 +736,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()); PassObserver pass_observer(graph, - method_name.c_str(), codegen.get(), visualizer_output_.get(), compiler_driver); @@ -756,7 +763,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, interpreter_metadata, dex_cache); - VLOG(compiler) << "Building " << method_name; + VLOG(compiler) << "Building " << pass_observer.GetMethodName(); { PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer); @@ -766,13 +773,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, } } - VLOG(compiler) << "Optimizing " << method_name; + VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName(); if (run_optimizations_) { { PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); if (!graph->TryBuildingSsa()) { // We could not transform the graph to SSA, bailout. - LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop"; + LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName() + << ": it contains a non natural loop"; MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); pass_observer.SetGraphInBadState(); return nullptr; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f3d075caaa..d1770b75ab 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -48,22 +48,34 @@ void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) { } void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { - // Try to find a static invoke from which this check originated. - HInvokeStaticOrDirect* invoke = nullptr; + // Try to find a static invoke or a new-instance from which this check originated. + HInstruction* implicit_clinit = nullptr; for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) { HInstruction* user = it.Current()->GetUser(); - if (user->IsInvokeStaticOrDirect() && CanMoveClinitCheck(check, user)) { - invoke = user->AsInvokeStaticOrDirect(); - DCHECK(invoke->IsStaticWithExplicitClinitCheck()); - invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); + if ((user->IsInvokeStaticOrDirect() || user->IsNewInstance()) && + CanMoveClinitCheck(check, user)) { + implicit_clinit = user; + if (user->IsInvokeStaticOrDirect()) { + DCHECK(user->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()); + user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck( + HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); + } else { + DCHECK(user->IsNewInstance()); + // We delegate the initialization duty to the allocation. + if (user->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectInitialized) { + user->AsNewInstance()->SetEntrypoint(kQuickAllocObjectResolved); + } + } break; } } - // If we found a static invoke for merging, remove the check from all other static invokes. - if (invoke != nullptr) { + // If we found a static invoke or new-instance for merging, remove the check + // from dominated static invokes. + if (implicit_clinit != nullptr) { for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) { HInstruction* user = it.Current()->GetUser(); - DCHECK(invoke->StrictlyDominates(user)); // All other uses must be dominated. + // All other uses must be dominated. + DCHECK(implicit_clinit->StrictlyDominates(user) || (implicit_clinit == user)); it.Advance(); // Advance before we remove the node, reference to the next node is preserved. if (user->IsInvokeStaticOrDirect()) { user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck( @@ -77,8 +89,8 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { check->ReplaceWith(load_class); - if (invoke != nullptr) { - // Remove the check from the graph. It has been merged into the invoke. + if (implicit_clinit != nullptr) { + // Remove the check from the graph. It has been merged into the invoke or new-instance. check->GetBlock()->RemoveInstruction(check); // Check if we can merge the load class as well. if (can_merge_with_load_class && !load_class->HasUses()) { @@ -92,6 +104,29 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { } } +void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) { + HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass(); + bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse(); + // Change the entrypoint to kQuickAllocObject if either: + // - the class is finalizable (only kQuickAllocObject handles finalizable classes), + // - the class needs access checks (we do not know if it's finalizable), + // - or the load class has only one use. + if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) { + instruction->SetEntrypoint(kQuickAllocObject); + instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0); + // The allocation entry point that deals with access checks does not work with inlined + // methods, so we need to check whether this allocation comes from an inlined method. + if (has_only_one_use && !instruction->GetEnvironment()->IsFromInlinedInvoke()) { + // We can remove the load class from the graph. If it needed access checks, we delegate + // the access check to the allocation. + if (load_class->NeedsAccessCheck()) { + instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck); + } + load_class->GetBlock()->RemoveInstruction(load_class); + } + } +} + void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { bool needs_materialization = false; if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) { diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index a70fb309df..9b2434250d 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -40,6 +40,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; + void VisitNewInstance(HNewInstance* instruction) OVERRIDE; bool CanMoveClinitCheck(HInstruction* input, HInstruction* user); diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index c98f43e461..bde54ee977 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -63,7 +63,6 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); phi->ReplaceInput(equivalent, i); if (equivalent->IsPhi()) { - equivalent->AsPhi()->SetLive(); AddToWorklist(equivalent->AsPhi()); } else if (equivalent == input) { // The input has changed its type. It can be an input of other phis, diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 5190eb3b26..9e6cfbe653 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -22,6 +22,13 @@ namespace art { +// Returns whether this is a loop header phi which was eagerly created but later +// found inconsistent due to the vreg being undefined in one of its predecessors. +// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination. +static bool IsUndefinedLoopHeaderPhi(HPhi* phi) { + return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size(); +} + /** * A debuggable application may require to reviving phis, to ensure their * associated DEX register is available to a debugger. This class implements @@ -165,17 +172,15 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) { void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); + if (IsUndefinedLoopHeaderPhi(phi)) { + DCHECK(phi->IsDead()); + continue; + } if (phi->IsDead() && phi->HasEnvironmentUses()) { phi->SetLive(); if (block->IsLoopHeader()) { - // Give a type to the loop phi to guarantee convergence of the algorithm. - // Note that the dead phi may already have a type if it is an equivalent - // generated for a typed LoadLocal. In that case we do not change the - // type because it could lead to an unsupported PrimNot/Float/Double -> - // PrimInt/Long transition and create same type equivalents. - if (phi->GetType() == Primitive::kPrimVoid) { - phi->SetType(phi->InputAt(0)->GetType()); - } + // Loop phis must have a type to guarantee convergence of the algorithm. + DCHECK_NE(phi->GetType(), Primitive::kPrimVoid); AddToWorklist(phi); } else { // Because we are doing a reverse post order visit, all inputs of @@ -220,6 +225,27 @@ void DeadPhiHandling::Run() { ProcessWorklist(); } +void SsaBuilder::SetLoopHeaderPhiInputs() { + for (size_t i = loop_headers_.size(); i > 0; --i) { + HBasicBlock* block = loop_headers_[i - 1]; + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + size_t vreg = phi->GetRegNumber(); + for (HBasicBlock* predecessor : block->GetPredecessors()) { + HInstruction* value = ValueOfLocal(predecessor, vreg); + if (value == nullptr) { + // Vreg is undefined at this predecessor. Mark it dead and leave with + // fewer inputs than predecessors. SsaChecker will fail if not removed. + phi->SetDead(); + break; + } else { + phi->AddInput(value); + } + } + } + } +} + void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) { @@ -283,15 +309,7 @@ void SsaBuilder::BuildSsa() { } // 2) Set inputs of loop phis. - for (HBasicBlock* block : loop_headers_) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->AsPhi(); - for (HBasicBlock* predecessor : block->GetPredecessors()) { - HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber()); - phi->AddInput(input); - } - } - } + SetLoopHeaderPhiInputs(); // 3) Mark dead phis. This will mark phis that are only used by environments: // at the DEX level, the type of these phis does not need to be consistent, but @@ -403,8 +421,13 @@ ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) { for (size_t i = 0; i < vregs; ++i) { // No point in creating the catch phi if it is already undefined at // the first throwing instruction. - if ((*current_locals_)[i] != nullptr) { - HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid); + HInstruction* current_local_value = (*current_locals_)[i]; + if (current_local_value != nullptr) { + HPhi* phi = new (arena) HPhi( + arena, + i, + 0, + current_local_value->GetType()); block->AddPhi(phi); (*locals)[i] = phi; } @@ -451,7 +474,10 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local); if (incoming != nullptr) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + 0, + incoming->GetType()); block->AddPhi(phi); (*current_locals_)[local] = phi; } @@ -484,8 +510,12 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { } if (is_different) { + HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local); HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + block->GetPredecessors().size(), + first_input->GetType()); for (size_t i = 0; i < block->GetPredecessors().size(); i++) { HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local); phi->SetRawInputAt(i, pred_value); @@ -583,8 +613,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: phi->GetBlock()->InsertPhiAfter(new_phi, phi); return new_phi; } else { - DCHECK_EQ(next->GetType(), type); - return next->AsPhi(); + HPhi* next_phi = next->AsPhi(); + DCHECK_EQ(next_phi->GetType(), type); + if (next_phi->IsDead()) { + // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis) + // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This + // cannot revive undefined loop header phis because they cannot have uses. + DCHECK(!IsUndefinedLoopHeaderPhi(next_phi)); + next_phi->SetLive(); + } + return next_phi; } } @@ -638,7 +676,36 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1); + uint32_t reg_number = store->GetLocal()->GetRegNumber(); + HInstruction* stored_value = store->InputAt(1); + Primitive::Type stored_type = stored_value->GetType(); + DCHECK_NE(stored_type, Primitive::kPrimVoid); + + // Storing into vreg `reg_number` may implicitly invalidate the surrounding + // registers. Consider the following cases: + // (1) Storing a wide value must overwrite previous values in both `reg_number` + // and `reg_number+1`. We store `nullptr` in `reg_number+1`. + // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number` + // must invalidate it. We store `nullptr` in `reg_number-1`. + // Consequently, storing a wide value into the high vreg of another wide value + // will invalidate both `reg_number-1` and `reg_number+1`. + + if (reg_number != 0) { + HInstruction* local_low = (*current_locals_)[reg_number - 1]; + if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) { + // The vreg we are storing into was previously the high vreg of a pair. + // We need to invalidate its low vreg. + DCHECK((*current_locals_)[reg_number] == nullptr); + (*current_locals_)[reg_number - 1] = nullptr; + } + } + + (*current_locals_)[reg_number] = stored_value; + if (Primitive::Is64BitType(stored_type)) { + // We are storing a pair. Invalidate the instruction in the high vreg. + (*current_locals_)[reg_number + 1] = nullptr; + } + store->GetBlock()->RemoveInstruction(store); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 79f1a28ac8..dcce5e4c2c 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -81,6 +81,7 @@ class SsaBuilder : public HGraphVisitor { static constexpr const char* kSsaBuilderPassName = "ssa_builder"; private: + void SetLoopHeaderPhiInputs(); void FixNullConstantType(); void EquivalentPhisCleanup(); diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 72f9ddd506..a3219dcc38 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -16,6 +16,8 @@ #include "ssa_phi_elimination.h" +#include "base/arena_containers.h" + namespace art { void SsaDeadPhiElimination::Run() { @@ -24,22 +26,36 @@ void SsaDeadPhiElimination::Run() { } void SsaDeadPhiElimination::MarkDeadPhis() { + // Phis are constructed live and should not be revived if previously marked + // dead. This algorithm temporarily breaks that invariant but we DCHECK that + // only phis which were initially live are revived. + ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter()); + // Add to the worklist phis referenced by non-phi instructions. for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); - // Set dead ahead of running through uses. The phi may have no use. - phi->SetDead(); + if (phi->IsDead()) { + continue; + } + + bool has_non_phi_use = false; for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { - HUseListNode<HInstruction*>* current = use_it.Current(); - HInstruction* user = current->GetUser(); - if (!user->IsPhi()) { - worklist_.push_back(phi); - phi->SetLive(); + if (!use_it.Current()->GetUser()->IsPhi()) { + has_non_phi_use = true; break; } } + + if (has_non_phi_use) { + worklist_.push_back(phi); + } else { + phi->SetDead(); + if (kIsDebugBuild) { + initially_live.insert(phi); + } + } } } @@ -48,10 +64,13 @@ void SsaDeadPhiElimination::MarkDeadPhis() { HPhi* phi = worklist_.back(); worklist_.pop_back(); for (HInputIterator it(phi); !it.Done(); it.Advance()) { - HInstruction* input = it.Current(); - if (input->IsPhi() && input->AsPhi()->IsDead()) { - worklist_.push_back(input->AsPhi()); - input->AsPhi()->SetLive(); + HPhi* input = it.Current()->AsPhi(); + if (input != nullptr && input->IsDead()) { + // Input is a dead phi. Revive it and add to the worklist. We make sure + // that the phi was not dead initially (see definition of `initially_live`). + DCHECK(ContainsElement(initially_live, input)); + input->SetLive(); + worklist_.push_back(input); } } } @@ -118,7 +137,6 @@ void SsaRedundantPhiElimination::Run() { } if (phi->InputCount() == 0) { - DCHECK(phi->IsCatchPhi()); DCHECK(phi->IsDead()); continue; } diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index 68e39568bb..dead8fd9a8 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -342,9 +342,9 @@ bool Address::CanHoldLoadOffsetThumb(LoadOperandType type, int offset) { return IsAbsoluteUint<12>(offset); case kLoadSWord: case kLoadDWord: - return IsAbsoluteUint<10>(offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; // VFP addressing mode. case kLoadWordPair: - return IsAbsoluteUint<10>(offset); + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -360,9 +360,9 @@ bool Address::CanHoldStoreOffsetThumb(StoreOperandType type, int offset) { return IsAbsoluteUint<12>(offset); case kStoreSWord: case kStoreDWord: - return IsAbsoluteUint<10>(offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; // VFP addressing mode. case kStoreWordPair: - return IsAbsoluteUint<10>(offset); + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 5233dcbbb0..ce3a87275d 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -389,8 +389,6 @@ class Arm32Assembler FINAL : public ArmAssembler { void EmitBranch(Condition cond, Label* label, bool link); static int32_t EncodeBranchOffset(int offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); - int32_t EncodeTstOffset(int offset, int32_t inst); - int DecodeTstOffset(int32_t inst); bool ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op); }; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 297cc54e29..7ad5b440e0 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -1349,7 +1349,8 @@ void Thumb2Assembler::Emit32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED, int32_t encoding = 0; if (so.IsImmediate()) { // Check special cases. - if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) { + if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12)) && + /* Prefer T3 encoding to T4. */ !ShifterOperandCanAlwaysHold(so.GetImmediate())) { if (set_cc != kCcSet) { if (opcode == SUB) { thumb_opcode = 5U; @@ -3220,7 +3221,7 @@ void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm, void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) { CheckCondition(cond); - EmitShift(rd, rm, RRX, rm, cond, set_cc); + EmitShift(rd, rm, RRX, 0, cond, set_cc); } @@ -3469,6 +3470,73 @@ void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) } } +int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) { + switch (type) { + case kLoadSignedByte: + case kLoadSignedHalfword: + case kLoadUnsignedHalfword: + case kLoadUnsignedByte: + case kLoadWord: + // We can encode imm12 offset. + return 0xfffu; + case kLoadSWord: + case kLoadDWord: + case kLoadWordPair: + // We can encode imm8:'00' offset. + return 0xff << 2; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +int32_t Thumb2Assembler::GetAllowedStoreOffsetBits(StoreOperandType type) { + switch (type) { + case kStoreHalfword: + case kStoreByte: + case kStoreWord: + // We can encode imm12 offset. + return 0xfff; + case kStoreSWord: + case kStoreDWord: + case kStoreWordPair: + // We can encode imm8:'00' offset. + return 0xff << 2; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +bool Thumb2Assembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits, + int32_t offset, + /*out*/ int32_t* add_to_base, + /*out*/ int32_t* offset_for_load_store) { + int32_t other_bits = offset & ~allowed_offset_bits; + if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) { + *add_to_base = offset & ~allowed_offset_bits; + *offset_for_load_store = offset & allowed_offset_bits; + return true; + } + return false; +} + +int32_t Thumb2Assembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits, + Register temp, + Register base, + int32_t offset, + Condition cond) { + DCHECK_NE(offset & ~allowed_offset_bits, 0); + int32_t add_to_base, offset_for_load; + if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { + AddConstant(temp, base, add_to_base, cond, kCcKeep); + return offset_for_load; + } else { + LoadImmediate(temp, offset, cond); + add(temp, temp, ShifterOperand(base), cond, kCcKeep); + return 0; + } +} // Implementation note: this method must emit at most one instruction when // Address::CanHoldLoadOffsetThumb. @@ -3479,12 +3547,26 @@ void Thumb2Assembler::LoadFromOffset(LoadOperandType type, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(type, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); - base = IP; - offset = 0; + // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks. + int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type); + DCHECK_NE(offset & ~allowed_offset_bits, 0); + int32_t add_to_base, offset_for_load; + if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { + // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. + AddConstant(reg, base, add_to_base, cond, kCcKeep); + base = reg; + offset = offset_for_load; + } else { + Register temp = (reg == base) ? IP : reg; + LoadImmediate(temp, offset, cond); + // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD. + // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. + add(reg, reg, ShifterOperand((reg == base) ? IP : base), cond, kCcKeep); + base = reg; + offset = 0; + } } - CHECK(Address::CanHoldLoadOffsetThumb(type, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(type, offset)); switch (type) { case kLoadSignedByte: ldrsb(reg, Address(base, offset), cond); @@ -3510,7 +3592,6 @@ void Thumb2Assembler::LoadFromOffset(LoadOperandType type, } } - // Implementation note: this method must emit at most one instruction when // Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset. void Thumb2Assembler::LoadSFromOffset(SRegister reg, @@ -3519,12 +3600,10 @@ void Thumb2Assembler::LoadSFromOffset(SRegister reg, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadSWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)); vldrs(reg, Address(base, offset), cond); } @@ -3537,12 +3616,10 @@ void Thumb2Assembler::LoadDFromOffset(DRegister reg, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadDWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)); vldrd(reg, Address(base, offset), cond); } @@ -3573,12 +3650,12 @@ void Thumb2Assembler::StoreToOffset(StoreOperandType type, offset += kRegisterSize; } } - LoadImmediate(tmp_reg, offset, cond); - add(tmp_reg, tmp_reg, ShifterOperand(base), AL); + // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset() + // and in the "unsplittable" path get rid of the "add" by using the store indexed instead. + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset, cond); base = tmp_reg; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(type, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(type, offset)); switch (type) { case kStoreByte: strb(reg, Address(base, offset), cond); @@ -3611,12 +3688,10 @@ void Thumb2Assembler::StoreSToOffset(SRegister reg, Condition cond) { if (!Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreSWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)); vstrs(reg, Address(base, offset), cond); } @@ -3629,12 +3704,10 @@ void Thumb2Assembler::StoreDToOffset(DRegister reg, Condition cond) { if (!Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreDWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)); vstrd(reg, Address(base, offset), cond); } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index e18361300a..9aeece8e57 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -729,13 +729,23 @@ class Thumb2Assembler FINAL : public ArmAssembler { void EmitBranch(Condition cond, Label* label, bool link, bool x); static int32_t EncodeBranchOffset(int32_t offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); - int32_t EncodeTstOffset(int offset, int32_t inst); - int DecodeTstOffset(int32_t inst); void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, Condition cond = AL, SetCc set_cc = kCcDontCare); void EmitShift(Register rd, Register rn, Shift shift, Register rm, Condition cond = AL, SetCc set_cc = kCcDontCare); + static int32_t GetAllowedLoadOffsetBits(LoadOperandType type); + static int32_t GetAllowedStoreOffsetBits(StoreOperandType type); + bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits, + int32_t offset, + /*out*/ int32_t* add_to_base, + /*out*/ int32_t* offset_for_load_store); + int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits, + Register temp, + Register base, + int32_t offset, + Condition cond); + // Whether the assembler can relocate branches. If false, unresolved branches will be // emitted on 32bits. bool can_relocate_branches_; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index cb4b20b5ba..7b32b0fd26 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -243,7 +243,7 @@ TEST_F(AssemblerThumb2Test, sub) { const char* expected = "subs r1, r0, #42\n" - "subw r1, r0, #42\n" + "sub.w r1, r0, #42\n" "subs r1, r0, r2, asr #31\n" "sub r1, r0, r2, asr #31\n"; DriverStr(expected, "sub"); @@ -257,7 +257,7 @@ TEST_F(AssemblerThumb2Test, add) { const char* expected = "adds r1, r0, #42\n" - "addw r1, r0, #42\n" + "add.w r1, r0, #42\n" "adds r1, r0, r2, asr #31\n" "add r1, r0, r2, asr #31\n"; DriverStr(expected, "add"); @@ -305,21 +305,18 @@ TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) { __ StoreToOffset(type, arm::IP, arm::R5, offset); const char* expected = - "mov ip, #4096\n" // LoadImmediate(ip, 4096) - "add ip, ip, sp\n" + "add.w ip, sp, #4096\n" // AddConstant(ip, sp, 4096) "str r0, [ip, #0]\n" - "str r5, [sp, #-4]!\n" // Push(r5) - "movw r5, #4100\n" // LoadImmediate(r5, 4096 + kRegisterSize) - "add r5, r5, sp\n" - "str ip, [r5, #0]\n" - "ldr r5, [sp], #4\n" // Pop(r5) - - "str r6, [sp, #-4]!\n" // Push(r6) - "mov r6, #4096\n" // LoadImmediate(r6, 4096) - "add r6, r6, r5\n" - "str ip, [r6, #0]\n" - "ldr r6, [sp], #4\n"; // Pop(r6) + "str r5, [sp, #-4]!\n" // Push(r5) + "add.w r5, sp, #4096\n" // AddConstant(r5, 4100 & ~0xfff) + "str ip, [r5, #4]\n" // StoreToOffset(type, ip, r5, 4100 & 0xfff) + "ldr r5, [sp], #4\n" // Pop(r5) + + "str r6, [sp, #-4]!\n" // Push(r6) + "add.w r6, r5, #4096\n" // AddConstant(r6, r5, 4096 & ~0xfff) + "str ip, [r6, #0]\n" // StoreToOffset(type, ip, r6, 4096 & 0xfff) + "ldr r6, [sp], #4\n"; // Pop(r6) DriverStr(expected, "StoreWordToNonThumbOffset"); } @@ -360,20 +357,17 @@ TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) { __ StoreToOffset(type, arm::R11, arm::R5, offset); const char* expected = - "mov ip, #1024\n" // LoadImmediate(ip, 1024) - "add ip, ip, sp\n" + "add.w ip, sp, #1024\n" // AddConstant(ip, sp, 1024) "strd r0, r1, [ip, #0]\n" "str r5, [sp, #-4]!\n" // Push(r5) - "movw r5, #1028\n" // LoadImmediate(r5, 1024 + kRegisterSize) - "add r5, r5, sp\n" - "strd r11, ip, [r5, #0]\n" + "add.w r5, sp, #1024\n" // AddConstant(r5, sp, (1024 + kRegisterSize) & ~0x3fc) + "strd r11, ip, [r5, #4]\n" // StoreToOffset(type, r11, sp, (1024 + kRegisterSize) & 0x3fc) "ldr r5, [sp], #4\n" // Pop(r5) "str r6, [sp, #-4]!\n" // Push(r6) - "mov r6, #1024\n" // LoadImmediate(r6, 1024) - "add r6, r6, r5\n" - "strd r11, ip, [r6, #0]\n" + "add.w r6, r5, #1024\n" // AddConstant(r6, r5, 1024 & ~0x3fc) + "strd r11, ip, [r6, #0]\n" // StoreToOffset(type, r11, r6, 1024 & 0x3fc) "ldr r6, [sp], #4\n"; // Pop(r6) DriverStr(expected, "StoreWordPairToNonThumbOffset"); } diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index f1233ca457..9457da1c36 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -840,12 +840,17 @@ class AssemblerTest : public testing::Test { return str; } + // Override this to pad the code with NOPs to a certain size if needed. + virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) { + } + void DriverWrapper(std::string assembly_text, std::string test_name) { assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); assembler_->FinalizeInstructions(code); + Pad(*data); test_helper_->Driver(*data, assembly_text, test_name); } diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 2ae88413e7..1de51a2dc8 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -466,6 +466,38 @@ TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) { EmitAndCheck(&assembler, "DataProcessingShiftedRegister"); } +TEST(Thumb2AssemblerTest, ShiftImmediate) { + // Note: This test produces the same results as DataProcessingShiftedRegister + // but it does so using shift functions instead of mov(). + arm::Thumb2Assembler assembler; + + // 16-bit variants. + __ Lsl(R3, R4, 4); + __ Lsr(R3, R4, 5); + __ Asr(R3, R4, 6); + + // 32-bit ROR because ROR immediate doesn't have the same 16-bit version as other shifts. + __ Ror(R3, R4, 7); + + // 32-bit RRX because RRX has no 16-bit version. + __ Rrx(R3, R4); + + // 32 bit variants (not setting condition codes). + __ Lsl(R3, R4, 4, AL, kCcKeep); + __ Lsr(R3, R4, 5, AL, kCcKeep); + __ Asr(R3, R4, 6, AL, kCcKeep); + __ Ror(R3, R4, 7, AL, kCcKeep); + __ Rrx(R3, R4, AL, kCcKeep); + + // 32 bit variants (high registers). + __ Lsls(R8, R4, 4); + __ Lsrs(R8, R4, 5); + __ Asrs(R8, R4, 6); + __ Rors(R8, R4, 7); + __ Rrxs(R8, R4); + + EmitAndCheck(&assembler, "ShiftImmediate"); +} TEST(Thumb2AssemblerTest, BasicLoad) { arm::Thumb2Assembler assembler; @@ -823,29 +855,80 @@ TEST(Thumb2AssemblerTest, SpecialAddSub) { __ add(R2, SP, ShifterOperand(0xf00)); // 32 bit due to imm size. __ add(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size. + __ add(SP, SP, ShifterOperand(0xffc)); // 32 bit due to imm size; encoding T4. - __ sub(SP, SP, ShifterOperand(0x50)); // 16 bit - __ sub(R0, SP, ShifterOperand(0x50)); // 32 bit - __ sub(R8, SP, ShifterOperand(0x50)); // 32 bit. + __ sub(SP, SP, ShifterOperand(0x50)); // 16 bit + __ sub(R0, SP, ShifterOperand(0x50)); // 32 bit + __ sub(R8, SP, ShifterOperand(0x50)); // 32 bit. - __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size + __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size + __ sub(SP, SP, ShifterOperand(0xffc)); // 32 bit due to imm size; encoding T4. EmitAndCheck(&assembler, "SpecialAddSub"); } +TEST(Thumb2AssemblerTest, LoadFromOffset) { + arm::Thumb2Assembler assembler; + + __ LoadFromOffset(kLoadWord, R2, R4, 12); + __ LoadFromOffset(kLoadWord, R2, R4, 0xfff); + __ LoadFromOffset(kLoadWord, R2, R4, 0x1000); + __ LoadFromOffset(kLoadWord, R2, R4, 0x1000a4); + __ LoadFromOffset(kLoadWord, R2, R4, 0x101000); + __ LoadFromOffset(kLoadWord, R4, R4, 0x101000); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 12); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0xfff); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000a4); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x101000); + __ LoadFromOffset(kLoadUnsignedHalfword, R4, R4, 0x101000); + __ LoadFromOffset(kLoadWordPair, R2, R4, 12); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x3fc); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400a4); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x40400); + __ LoadFromOffset(kLoadWordPair, R4, R4, 0x40400); + + __ LoadFromOffset(kLoadWord, R0, R12, 12); // 32-bit because of R12. + __ LoadFromOffset(kLoadWord, R2, R4, 0xa4 - 0x100000); + + __ LoadFromOffset(kLoadSignedByte, R2, R4, 12); + __ LoadFromOffset(kLoadUnsignedByte, R2, R4, 12); + __ LoadFromOffset(kLoadSignedHalfword, R2, R4, 12); + + EmitAndCheck(&assembler, "LoadFromOffset"); +} + TEST(Thumb2AssemblerTest, StoreToOffset) { arm::Thumb2Assembler assembler; - __ StoreToOffset(kStoreWord, R2, R4, 12); // Simple - __ StoreToOffset(kStoreWord, R2, R4, 0x2000); // Offset too big. - __ StoreToOffset(kStoreWord, R0, R12, 12); - __ StoreToOffset(kStoreHalfword, R0, R12, 12); - __ StoreToOffset(kStoreByte, R2, R12, 12); + __ StoreToOffset(kStoreWord, R2, R4, 12); + __ StoreToOffset(kStoreWord, R2, R4, 0xfff); + __ StoreToOffset(kStoreWord, R2, R4, 0x1000); + __ StoreToOffset(kStoreWord, R2, R4, 0x1000a4); + __ StoreToOffset(kStoreWord, R2, R4, 0x101000); + __ StoreToOffset(kStoreWord, R4, R4, 0x101000); + __ StoreToOffset(kStoreHalfword, R2, R4, 12); + __ StoreToOffset(kStoreHalfword, R2, R4, 0xfff); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000a4); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x101000); + __ StoreToOffset(kStoreHalfword, R4, R4, 0x101000); + __ StoreToOffset(kStoreWordPair, R2, R4, 12); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x3fc); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x400); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x400a4); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x40400); + __ StoreToOffset(kStoreWordPair, R4, R4, 0x40400); + + __ StoreToOffset(kStoreWord, R0, R12, 12); // 32-bit because of R12. + __ StoreToOffset(kStoreWord, R2, R4, 0xa4 - 0x100000); + + __ StoreToOffset(kStoreByte, R2, R4, 12); EmitAndCheck(&assembler, "StoreToOffset"); } - TEST(Thumb2AssemblerTest, IfThen) { arm::Thumb2Assembler assembler; diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index b79c2e46f0..9246c827a7 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -132,8 +132,8 @@ const char* DataProcessingRegisterResults[] = { const char* DataProcessingImmediateResults[] = { " 0: 2055 movs r0, #85 ; 0x55\n", " 2: f06f 0055 mvn.w r0, #85 ; 0x55\n", - " 6: f201 0055 addw r0, r1, #85 ; 0x55\n", - " a: f2a1 0055 subw r0, r1, #85 ; 0x55\n", + " 6: f101 0055 add.w r0, r1, #85 ; 0x55\n", + " a: f1a1 0055 sub.w r0, r1, #85 ; 0x55\n", " e: f001 0055 and.w r0, r1, #85 ; 0x55\n", " 12: f041 0055 orr.w r0, r1, #85 ; 0x55\n", " 16: f061 0055 orn r0, r1, #85 ; 0x55\n", @@ -201,6 +201,24 @@ const char* DataProcessingShiftedRegisterResults[] = { " 32: ea5f 0834 movs.w r8, r4, rrx\n", nullptr }; +const char* ShiftImmediateResults[] = { + " 0: 0123 lsls r3, r4, #4\n", + " 2: 0963 lsrs r3, r4, #5\n", + " 4: 11a3 asrs r3, r4, #6\n", + " 6: ea4f 13f4 mov.w r3, r4, ror #7\n", + " a: ea4f 0334 mov.w r3, r4, rrx\n", + " e: ea4f 1304 mov.w r3, r4, lsl #4\n", + " 12: ea4f 1354 mov.w r3, r4, lsr #5\n", + " 16: ea4f 13a4 mov.w r3, r4, asr #6\n", + " 1a: ea4f 13f4 mov.w r3, r4, ror #7\n", + " 1e: ea4f 0334 mov.w r3, r4, rrx\n", + " 22: ea5f 1804 movs.w r8, r4, lsl #4\n", + " 26: ea5f 1854 movs.w r8, r4, lsr #5\n", + " 2a: ea5f 18a4 movs.w r8, r4, asr #6\n", + " 2e: ea5f 18f4 movs.w r8, r4, ror #7\n", + " 32: ea5f 0834 movs.w r8, r4, rrx\n", + nullptr +}; const char* BasicLoadResults[] = { " 0: 69a3 ldr r3, [r4, #24]\n", " 2: 7e23 ldrb r3, [r4, #24]\n", @@ -434,23 +452,115 @@ const char* MovWMovTResults[] = { const char* SpecialAddSubResults[] = { " 0: aa14 add r2, sp, #80 ; 0x50\n", " 2: b014 add sp, #80 ; 0x50\n", - " 4: f20d 0850 addw r8, sp, #80 ; 0x50\n", - " 8: f60d 7200 addw r2, sp, #3840 ; 0xf00\n", - " c: f60d 7d00 addw sp, sp, #3840 ; 0xf00\n", - " 10: b094 sub sp, #80 ; 0x50\n", - " 12: f2ad 0050 subw r0, sp, #80 ; 0x50\n", - " 16: f2ad 0850 subw r8, sp, #80 ; 0x50\n", - " 1a: f6ad 7d00 subw sp, sp, #3840 ; 0xf00\n", + " 4: f10d 0850 add.w r8, sp, #80 ; 0x50\n", + " 8: f50d 6270 add.w r2, sp, #3840 ; 0xf00\n", + " c: f50d 6d70 add.w sp, sp, #3840 ; 0xf00\n", + " 10: f60d 7dfc addw sp, sp, #4092 ; 0xffc\n", + " 14: b094 sub sp, #80 ; 0x50\n", + " 16: f1ad 0050 sub.w r0, sp, #80 ; 0x50\n", + " 1a: f1ad 0850 sub.w r8, sp, #80 ; 0x50\n", + " 1e: f5ad 6d70 sub.w sp, sp, #3840 ; 0xf00\n", + " 22: f6ad 7dfc subw sp, sp, #4092 ; 0xffc\n", + nullptr +}; +const char* LoadFromOffsetResults[] = { + " 0: 68e2 ldr r2, [r4, #12]\n", + " 2: f8d4 2fff ldr.w r2, [r4, #4095] ; 0xfff\n", + " 6: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", + " a: 6812 ldr r2, [r2, #0]\n", + " c: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n", + " 10: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n", + " 14: f241 0200 movw r2, #4096 ; 0x1000\n", + " 18: f2c0 0210 movt r2, #16\n", + " 1c: 4422 add r2, r4\n", + " 1e: 6812 ldr r2, [r2, #0]\n", + " 20: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 24: f2c0 0c10 movt ip, #16\n", + " 28: 4464 add r4, ip\n", + " 2a: 6824 ldr r4, [r4, #0]\n", + " 2c: 89a2 ldrh r2, [r4, #12]\n", + " 2e: f8b4 2fff ldrh.w r2, [r4, #4095] ; 0xfff\n", + " 32: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", + " 36: 8812 ldrh r2, [r2, #0]\n", + " 38: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n", + " 3c: f8b2 20a4 ldrh.w r2, [r2, #164] ; 0xa4\n", + " 40: f241 0200 movw r2, #4096 ; 0x1000\n", + " 44: f2c0 0210 movt r2, #16\n", + " 48: 4422 add r2, r4\n", + " 4a: 8812 ldrh r2, [r2, #0]\n", + " 4c: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 50: f2c0 0c10 movt ip, #16\n", + " 54: 4464 add r4, ip\n", + " 56: 8824 ldrh r4, [r4, #0]\n", + " 58: e9d4 2303 ldrd r2, r3, [r4, #12]\n", + " 5c: e9d4 23ff ldrd r2, r3, [r4, #1020] ; 0x3fc\n", + " 60: f504 6280 add.w r2, r4, #1024 ; 0x400\n", + " 64: e9d2 2300 ldrd r2, r3, [r2]\n", + " 68: f504 2280 add.w r2, r4, #262144 ; 0x40000\n", + " 6c: e9d2 2329 ldrd r2, r3, [r2, #164]; 0xa4\n", + " 70: f240 4200 movw r2, #1024 ; 0x400\n", + " 74: f2c0 0204 movt r2, #4\n", + " 78: 4422 add r2, r4\n", + " 7a: e9d2 2300 ldrd r2, r3, [r2]\n", + " 7e: f240 4c00 movw ip, #1024 ; 0x400\n", + " 82: f2c0 0c04 movt ip, #4\n", + " 86: 4464 add r4, ip\n", + " 88: e9d4 4500 ldrd r4, r5, [r4]\n", + " 8c: f8dc 000c ldr.w r0, [ip, #12]\n", + " 90: f5a4 1280 sub.w r2, r4, #1048576 ; 0x100000\n", + " 94: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n", + " 98: f994 200c ldrsb.w r2, [r4, #12]\n", + " 9c: 7b22 ldrb r2, [r4, #12]\n", + " 9e: f9b4 200c ldrsh.w r2, [r4, #12]\n", nullptr }; const char* StoreToOffsetResults[] = { " 0: 60e2 str r2, [r4, #12]\n", - " 2: f44f 5c00 mov.w ip, #8192 ; 0x2000\n", - " 6: 44a4 add ip, r4\n", - " 8: f8cc 2000 str.w r2, [ip]\n", - " c: f8cc 000c str.w r0, [ip, #12]\n", - " 10: f8ac 000c strh.w r0, [ip, #12]\n", - " 14: f88c 200c strb.w r2, [ip, #12]\n", + " 2: f8c4 2fff str.w r2, [r4, #4095] ; 0xfff\n", + " 6: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", + " a: f8cc 2000 str.w r2, [ip]\n", + " e: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n", + " 12: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n", + " 16: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 1a: f2c0 0c10 movt ip, #16\n", + " 1e: 44a4 add ip, r4\n", + " 20: f8cc 2000 str.w r2, [ip]\n", + " 24: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 28: f2c0 0c10 movt ip, #16\n", + " 2c: 44a4 add ip, r4\n", + " 2e: f8cc 4000 str.w r4, [ip]\n", + " 32: 81a2 strh r2, [r4, #12]\n", + " 34: f8a4 2fff strh.w r2, [r4, #4095] ; 0xfff\n", + " 38: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", + " 3c: f8ac 2000 strh.w r2, [ip]\n", + " 40: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n", + " 44: f8ac 20a4 strh.w r2, [ip, #164] ; 0xa4\n", + " 48: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 4c: f2c0 0c10 movt ip, #16\n", + " 50: 44a4 add ip, r4\n", + " 52: f8ac 2000 strh.w r2, [ip]\n", + " 56: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 5a: f2c0 0c10 movt ip, #16\n", + " 5e: 44a4 add ip, r4\n", + " 60: f8ac 4000 strh.w r4, [ip]\n", + " 64: e9c4 2303 strd r2, r3, [r4, #12]\n", + " 68: e9c4 23ff strd r2, r3, [r4, #1020] ; 0x3fc\n", + " 6c: f504 6c80 add.w ip, r4, #1024 ; 0x400\n", + " 70: e9cc 2300 strd r2, r3, [ip]\n", + " 74: f504 2c80 add.w ip, r4, #262144 ; 0x40000\n", + " 78: e9cc 2329 strd r2, r3, [ip, #164]; 0xa4\n", + " 7c: f240 4c00 movw ip, #1024 ; 0x400\n", + " 80: f2c0 0c04 movt ip, #4\n", + " 84: 44a4 add ip, r4\n", + " 86: e9cc 2300 strd r2, r3, [ip]\n", + " 8a: f240 4c00 movw ip, #1024 ; 0x400\n", + " 8e: f2c0 0c04 movt ip, #4\n", + " 92: 44a4 add ip, r4\n", + " 94: e9cc 4500 strd r4, r5, [ip]\n", + " 98: f8cc 000c str.w r0, [ip, #12]\n", + " 9c: f5a4 1c80 sub.w ip, r4, #1048576 ; 0x100000\n", + " a0: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n", + " a4: 7322 strb r2, [r4, #12]\n", nullptr }; const char* IfThenResults[] = { @@ -4952,6 +5062,7 @@ void setup_results() { test_results["DataProcessingModifiedImmediate"] = DataProcessingModifiedImmediateResults; test_results["DataProcessingModifiedImmediates"] = DataProcessingModifiedImmediatesResults; test_results["DataProcessingShiftedRegister"] = DataProcessingShiftedRegisterResults; + test_results["ShiftImmediate"] = ShiftImmediateResults; test_results["BasicLoad"] = BasicLoadResults; test_results["BasicStore"] = BasicStoreResults; test_results["ComplexLoad"] = ComplexLoadResults; @@ -4966,6 +5077,7 @@ void setup_results() { test_results["StoreMultiple"] = StoreMultipleResults; test_results["MovWMovT"] = MovWMovTResults; test_results["SpecialAddSub"] = SpecialAddSubResults; + test_results["LoadFromOffset"] = LoadFromOffsetResults; test_results["StoreToOffset"] = StoreToOffsetResults; test_results["IfThen"] = IfThenResults; test_results["CbzCbnz"] = CbzCbnzResults; diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index aee64120a8..fc7ac7061a 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -310,15 +310,27 @@ void MipsAssembler::Seh(Register rd, Register rt) { EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20); } +void MipsAssembler::Wsbh(Register rd, Register rt) { + EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20); +} + void MipsAssembler::Sll(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00); } void MipsAssembler::Srl(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02); } +void MipsAssembler::Rotr(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; + EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02); +} + void MipsAssembler::Sra(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03); } diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 4038c1f1c4..1ef0992dac 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -135,9 +135,11 @@ class MipsAssembler FINAL : public Assembler { void Seb(Register rd, Register rt); // R2+ void Seh(Register rd, Register rt); // R2+ + void Wsbh(Register rd, Register rt); // R2+ void Sll(Register rd, Register rt, int shamt); void Srl(Register rd, Register rt, int shamt); + void Rotr(Register rd, Register rt, int shamt); // R2+ void Sra(Register rd, Register rt, int shamt); void Sllv(Register rd, Register rt, Register rs); void Srlv(Register rd, Register rt, Register rs); diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index ba2525e555..107d5bb572 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -19,15 +19,73 @@ #include "base/bit_utils.h" #include "base/casts.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "memory_region.h" #include "thread.h" namespace art { namespace mips64 { +void Mips64Assembler::FinalizeCode() { + for (auto& exception_block : exception_blocks_) { + EmitExceptionPoll(&exception_block); + } + PromoteBranches(); +} + +void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) { + EmitBranches(); + Assembler::FinalizeInstructions(region); + PatchCFI(); +} + +void Mips64Assembler::PatchCFI() { + if (cfi().NumberOfDelayedAdvancePCs() == 0u) { + return; + } + + typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); + const std::vector<uint8_t>& old_stream = data.first; + const std::vector<DelayedAdvancePC>& advances = data.second; + + // Refill our data buffer with patched opcodes. + cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16); + size_t stream_pos = 0; + for (const DelayedAdvancePC& advance : advances) { + DCHECK_GE(advance.stream_pos, stream_pos); + // Copy old data up to the point where advance was issued. + cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos); + stream_pos = advance.stream_pos; + // Insert the advance command with its final offset. + size_t final_pc = GetAdjustedPosition(advance.pc); + cfi().AdvancePC(final_pc); + } + // Copy the final segment if any. + cfi().AppendRawData(old_stream, stream_pos, old_stream.size()); +} + +void Mips64Assembler::EmitBranches() { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (branch placeholders) in the buffer. + overwriting_ = true; + for (auto& branch : branches_) { + EmitBranch(&branch); + } + overwriting_ = false; +} + void Mips64Assembler::Emit(uint32_t value) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - buffer_.Emit<uint32_t>(value); + if (overwriting_) { + // Branches to labels are emitted into their placeholders here. + buffer_.Store<uint32_t>(overwrite_location_, value); + overwrite_location_ += sizeof(uint32_t); + } else { + // Other instructions are simply appended at the end here. + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + buffer_.Emit<uint32_t>(value); + } } void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, @@ -82,15 +140,16 @@ void Mips64Assembler::EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) { CHECK_NE(rs, kNoGpuRegister); + CHECK(IsUint<21>(imm21)) << imm21; uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | static_cast<uint32_t>(rs) << kRsShift | - (imm21 & 0x1FFFFF); + imm21; Emit(encoding); } -void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) { - uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | - (addr26 & 0x3FFFFFF); +void Mips64Assembler::EmitI26(int opcode, uint32_t imm26) { + CHECK(IsUint<26>(imm26)) << imm26; + uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26; Emit(encoding); } @@ -428,26 +487,6 @@ void Mips64Assembler::Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0xb, rs, rt, imm16); } -void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x4, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x5, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::J(uint32_t addr26) { - EmitJ(0x2, addr26); - Nop(); -} - -void Mips64Assembler::Jal(uint32_t addr26) { - EmitJ(0x3, addr26); - Nop(); -} - void Mips64Assembler::Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x35); } @@ -474,7 +513,6 @@ void Mips64Assembler::Dclo(GpuRegister rd, GpuRegister rs) { void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) { EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09); - Nop(); } void Mips64Assembler::Jalr(GpuRegister rs) { @@ -489,6 +527,15 @@ void Mips64Assembler::Auipc(GpuRegister rs, uint16_t imm16) { EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16); } +void Mips64Assembler::Addiupc(GpuRegister rs, uint32_t imm19) { + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, imm19); +} + +void Mips64Assembler::Bc(uint32_t imm26) { + EmitI26(0x32, imm26); +} + void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) { EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16); } @@ -549,14 +596,14 @@ void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) { @@ -569,6 +616,65 @@ void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) { EmitI21(0x3E, rs, imm21); } +void Mips64Assembler::EmitBcondc(BranchCondition cond, + GpuRegister rs, + GpuRegister rt, + uint32_t imm16_21) { + switch (cond) { + case kCondLT: + Bltc(rs, rt, imm16_21); + break; + case kCondGE: + Bgec(rs, rt, imm16_21); + break; + case kCondLE: + Bgec(rt, rs, imm16_21); + break; + case kCondGT: + Bltc(rt, rs, imm16_21); + break; + case kCondLTZ: + CHECK_EQ(rt, ZERO); + Bltzc(rs, imm16_21); + break; + case kCondGEZ: + CHECK_EQ(rt, ZERO); + Bgezc(rs, imm16_21); + break; + case kCondLEZ: + CHECK_EQ(rt, ZERO); + Blezc(rs, imm16_21); + break; + case kCondGTZ: + CHECK_EQ(rt, ZERO); + Bgtzc(rs, imm16_21); + break; + case kCondEQ: + Beqc(rs, rt, imm16_21); + break; + case kCondNE: + Bnec(rs, rt, imm16_21); + break; + case kCondEQZ: + CHECK_EQ(rt, ZERO); + Beqzc(rs, imm16_21); + break; + case kCondNEZ: + CHECK_EQ(rt, ZERO); + Bnezc(rs, imm16_21); + break; + case kCondLTU: + Bltuc(rs, rt, imm16_21); + break; + case kCondGEU: + Bgeuc(rs, rt, imm16_21); + break; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + UNREACHABLE(); + } +} + void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x10, ft, fs, fd, 0x0); } @@ -925,15 +1031,6 @@ void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) { } } -void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) { - if (IsInt<16>(value)) { - Addiu(rt, rs, value); - } else { - LoadConst32(rtmp, value); - Addu(rt, rs, rtmp); - } -} - void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) { if (IsInt<16>(value)) { Daddiu(rt, rs, value); @@ -943,177 +1040,621 @@ void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, Gp } } -// -// MIPS64R6 branches -// -// -// Unconditional (pc + 32-bit signed offset): -// -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Conditional (pc + 32-bit signed offset): -// -// b<cond>c reg, +2 // skip next 2 instructions -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Unconditional (pc + 32-bit signed offset) and link: -// -// auipc reg, ofs_high -// daddiu reg, ofs_low -// jialc reg, 0 -// // no delay/forbidden slot -// -// -// TODO: use shorter instruction sequences whenever possible. -// - -void Mips64Assembler::Bind(Label* label) { - CHECK(!label->IsBound()); - int32_t bound_pc = buffer_.Size(); +void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBits offset_size, + Mips64Assembler::Branch::Type short_type, + Mips64Assembler::Branch::Type long_type) { + type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; +} - // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label. - // Embed the previously unknown pc-relative addresses in them. - while (label->IsLinked()) { - int32_t position = label->Position(); - // Extract the branch (instruction pair) - uint32_t auipc = buffer_.Load<uint32_t>(position); - uint32_t jic = buffer_.Load<uint32_t>(position + 4); // actually, jic or daddiu +void Mips64Assembler::Branch::InitializeType(bool is_call) { + OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); + if (is_call) { + InitShortOrLong(offset_size, kCall, kLongCall); + } else if (condition_ == kUncond) { + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + } else { + if (condition_ == kCondEQZ || condition_ == kCondNEZ) { + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch; + } else { + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + } + } + old_type_ = type_; +} + +bool Mips64Assembler::Branch::IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs) { + switch (condition) { + case kCondLT: + case kCondGT: + case kCondNE: + case kCondLTU: + return lhs == rhs; + default: + return false; + } +} + +bool Mips64Assembler::Branch::IsUncond(BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + switch (condition) { + case kUncond: + return true; + case kCondGE: + case kCondLE: + case kCondEQ: + case kCondGEU: + return lhs == rhs; + default: + return false; + } +} + +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(ZERO), + rhs_reg_(ZERO), + condition_(kUncond) { + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + Mips64Assembler::BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(lhs_reg), + rhs_reg_(rhs_reg), + condition_(condition) { + CHECK_NE(condition, kUncond); + switch (condition) { + case kCondEQ: + case kCondNE: + case kCondLT: + case kCondGE: + case kCondLE: + case kCondGT: + case kCondLTU: + case kCondGEU: + CHECK_NE(lhs_reg, ZERO); + CHECK_NE(rhs_reg, ZERO); + break; + case kCondLTZ: + case kCondGEZ: + case kCondLEZ: + case kCondGTZ: + case kCondEQZ: + case kCondNEZ: + CHECK_NE(lhs_reg, ZERO); + CHECK_EQ(rhs_reg, ZERO); + break; + case kUncond: + UNREACHABLE(); + } + CHECK(!IsNop(condition, lhs_reg, rhs_reg)); + if (IsUncond(condition, lhs_reg, rhs_reg)) { + // Branch condition is always true, make the branch unconditional. + condition_ = kUncond; + } + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(indirect_reg), + rhs_reg_(ZERO), + condition_(kUncond) { + CHECK_NE(indirect_reg, ZERO); + CHECK_NE(indirect_reg, AT); + InitializeType(true); +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( + Mips64Assembler::BranchCondition cond) { + switch (cond) { + case kCondLT: + return kCondGE; + case kCondGE: + return kCondLT; + case kCondLE: + return kCondGT; + case kCondGT: + return kCondLE; + case kCondLTZ: + return kCondGEZ; + case kCondGEZ: + return kCondLTZ; + case kCondLEZ: + return kCondGTZ; + case kCondGTZ: + return kCondLEZ; + case kCondEQ: + return kCondNE; + case kCondNE: + return kCondEQ; + case kCondEQZ: + return kCondNEZ; + case kCondNEZ: + return kCondEQZ; + case kCondLTU: + return kCondGEU; + case kCondGEU: + return kCondLTU; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + } + UNREACHABLE(); +} + +Mips64Assembler::Branch::Type Mips64Assembler::Branch::GetType() const { + return type_; +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::GetCondition() const { + return condition_; +} + +GpuRegister Mips64Assembler::Branch::GetLeftRegister() const { + return lhs_reg_; +} + +GpuRegister Mips64Assembler::Branch::GetRightRegister() const { + return rhs_reg_; +} + +uint32_t Mips64Assembler::Branch::GetTarget() const { + return target_; +} - // Extract the location of the previous pair in the list (walking the list backwards; - // the previous pair location was stored in the immediate operands of the instructions) - int32_t prev = (auipc << 16) | (jic & 0xFFFF); +uint32_t Mips64Assembler::Branch::GetLocation() const { + return location_; +} + +uint32_t Mips64Assembler::Branch::GetOldLocation() const { + return old_location_; +} + +uint32_t Mips64Assembler::Branch::GetLength() const { + return branch_info_[type_].length; +} + +uint32_t Mips64Assembler::Branch::GetOldLength() const { + return branch_info_[old_type_].length; +} + +uint32_t Mips64Assembler::Branch::GetSize() const { + return GetLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOldSize() const { + return GetOldLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetEndLocation() const { + return GetLocation() + GetSize(); +} + +uint32_t Mips64Assembler::Branch::GetOldEndLocation() const { + return GetOldLocation() + GetOldSize(); +} + +bool Mips64Assembler::Branch::IsLong() const { + switch (type_) { + // Short branches. + case kUncondBranch: + case kCondBranch: + case kCall: + return false; + // Long branches. + case kLongUncondBranch: + case kLongCondBranch: + case kLongCall: + return true; + } + UNREACHABLE(); +} + +bool Mips64Assembler::Branch::IsResolved() const { + return target_ != kUnresolved; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const { + OffsetBits offset_size = + (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ)) + ? kOffset23 + : branch_info_[type_].offset_size; + return offset_size; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSizeNeeded(uint32_t location, + uint32_t target) { + // For unresolved targets assume the shortest encoding + // (later it will be made longer if needed). + if (target == kUnresolved) + return kOffset16; + int64_t distance = static_cast<int64_t>(target) - location; + // To simplify calculations in composite branches consisting of multiple instructions + // bump up the distance by a value larger than the max byte size of a composite branch. + distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize; + if (IsInt<kOffset16>(distance)) + return kOffset16; + else if (IsInt<kOffset18>(distance)) + return kOffset18; + else if (IsInt<kOffset21>(distance)) + return kOffset21; + else if (IsInt<kOffset23>(distance)) + return kOffset23; + else if (IsInt<kOffset28>(distance)) + return kOffset28; + return kOffset32; +} + +void Mips64Assembler::Branch::Resolve(uint32_t target) { + target_ = target; +} + +void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) { + if (location_ > expand_location) { + location_ += delta; + } + if (!IsResolved()) { + return; // Don't know the target yet. + } + if (target_ > expand_location) { + target_ += delta; + } +} + +void Mips64Assembler::Branch::PromoteToLong() { + switch (type_) { + // Short branches. + case kUncondBranch: + type_ = kLongUncondBranch; + break; + case kCondBranch: + type_ = kLongCondBranch; + break; + case kCall: + type_ = kLongCall; + break; + default: + // Note: 'type_' is already long. + break; + } + CHECK(IsLong()); +} + +uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { + // If the branch is still unresolved or already long, nothing to do. + if (IsLong() || !IsResolved()) { + return 0; + } + // Promote the short branch to long if the offset size is too small + // to hold the distance between location_ and target_. + if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + // The following logic is for debugging/testing purposes. + // Promote some short branches to long when it's not really required. + if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { + int64_t distance = static_cast<int64_t>(target_) - location_; + distance = (distance >= 0) ? distance : -distance; + if (distance >= max_short_distance) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + } + return 0; +} + +uint32_t Mips64Assembler::Branch::GetOffsetLocation() const { + return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOffset() const { + CHECK(IsResolved()); + uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); + // Calculate the byte distance between instructions and also account for + // different PC-relative origins. + uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + // Prepare the offset for encoding into the instruction(s). + offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; + return offset; +} - // Get the pc-relative address - uint32_t offset = bound_pc - position; - offset += (offset & 0x8000) << 1; // account for sign extension in jic/daddiu +Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +const Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) const { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +void Mips64Assembler::Bind(Mips64Label* label) { + CHECK(!label->IsBound()); + uint32_t bound_pc = buffer_.Size(); - // Embed it in the two instructions - auipc = (auipc & 0xFFFF0000) | (offset >> 16); - jic = (jic & 0xFFFF0000) | (offset & 0xFFFF); + // Walk the list of branches referring to and preceding this label. + // Store the previously unknown target addresses in them. + while (label->IsLinked()) { + uint32_t branch_id = label->Position(); + Branch* branch = GetBranch(branch_id); + branch->Resolve(bound_pc); - // Save the adjusted instructions - buffer_.Store<uint32_t>(position, auipc); - buffer_.Store<uint32_t>(position + 4, jic); + uint32_t branch_location = branch->GetLocation(); + // Extract the location of the previous branch in the list (walking the list backwards; + // the previous branch ID was stored in the space reserved for this branch). + uint32_t prev = buffer_.Load<uint32_t>(branch_location); // On to the previous branch in the list... label->position_ = prev; } - // Now make the label object contain its own location - // (it will be used by the branches referring to and following this label) + // Now make the label object contain its own location (relative to the end of the preceding + // branch, if any; it will be used by the branches referring to and following this label). + label->prev_branch_id_plus_one_ = branches_.size(); + if (label->prev_branch_id_plus_one_) { + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + bound_pc -= branch->GetEndLocation(); + } label->BindTo(bound_pc); } -void Mips64Assembler::B(Label* label) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in jic - Auipc(AT, offset >> 16); - Jic(AT, offset); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(AT, prev >> 16); - Jic(AT, prev); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const { + CHECK(label->IsBound()); + uint32_t target = label->Position(); + if (label->prev_branch_id_plus_one_) { + // Get label location based on the branch preceding it. + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + target += branch->GetEndLocation(); + } + return target; +} + +uint32_t Mips64Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the branches from the beginning + // up to the old_position. Since we expect AdjustedPosition() to be called in a loop + // with increasing old_position, we can use the data from last AdjustedPosition() to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of branches. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0; + last_old_position_ = 0; + last_branch_id_ = 0; + } + while (last_branch_id_ != branches_.size()) { + const Branch* branch = GetBranch(last_branch_id_); + if (branch->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + last_position_adjustment_ += branch->GetSize() - branch->GetOldSize(); + ++last_branch_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) { + uint32_t length = branches_.back().GetLength(); + if (!label->IsBound()) { + // Branch forward (to a following label), distance is unknown. + // The first branch forward will contain 0, serving as the terminator of + // the list of forward-reaching branches. + Emit(label->position_); + length--; + // Now make the label object point to this branch + // (this forms a linked list of branches preceding this label). + uint32_t branch_id = branches_.size() - 1; + label->LinkTo(branch_id); + } + // Reserve space for the branch. + while (length--) { + Nop(); } } -void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in daddiu - Auipc(indirect_reg, offset >> 16); - Daddiu(indirect_reg, indirect_reg, offset); - Jialc(indirect_reg, 0); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(indirect_reg, prev >> 16); - Daddiu(indirect_reg, indirect_reg, prev); - Jialc(indirect_reg, 0); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +void Mips64Assembler::Buncond(Mips64Label* label) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + // If lhs = rhs, this can be a NOP. + if (Branch::IsNop(condition, lhs, rhs)) { + return; + } + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, indirect_reg); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::PromoteBranches() { + // Promote short branches to long as necessary. + bool changed; + do { + changed = false; + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + uint32_t delta = branch.PromoteIfNeeded(); + // If this branch has been promoted and needs to expand in size, + // relocate all branches by the expansion size. + if (delta) { + changed = true; + uint32_t expand_location = branch.GetLocation(); + for (auto& branch2 : branches_) { + branch2.Relocate(expand_location, delta); + } + } + } + } while (changed); + + // Account for branch expansion by resizing the code buffer + // and moving the code in it to its final location. + size_t branch_count = branches_.size(); + if (branch_count > 0) { + // Resize. + Branch& last_branch = branches_[branch_count - 1]; + uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation(); + uint32_t old_size = buffer_.Size(); + buffer_.Resize(old_size + size_delta); + // Move the code residing between branch placeholders. + uint32_t end = old_size; + for (size_t i = branch_count; i > 0; ) { + Branch& branch = branches_[--i]; + uint32_t size = end - branch.GetOldEndLocation(); + buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size); + end = branch.GetOldLocation(); + } + } +} + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = { + // Short branches. + { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kUncondBranch + { 2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kCondBranch + // Exception: kOffset23 for beqzc/bnezc + { 2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kCall + // Long branches. + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongUncondBranch + { 3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCondBranch + { 3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCall +}; + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { + CHECK(overwriting_); + overwrite_location_ = branch->GetLocation(); + uint32_t offset = branch->GetOffset(); + BranchCondition condition = branch->GetCondition(); + GpuRegister lhs = branch->GetLeftRegister(); + GpuRegister rhs = branch->GetRightRegister(); + switch (branch->GetType()) { + // Short branches. + case Branch::kUncondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Bc(offset); + break; + case Branch::kCondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcondc(condition, lhs, rhs, offset); + Nop(); // TODO: improve by filling the forbidden slot. + break; + case Branch::kCall: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Addiupc(lhs, offset); + Jialc(lhs, 0); + break; + + // Long branches. + case Branch::kLongUncondBranch: + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCondBranch: + EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2); + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCall: + offset += (offset & 0x8000) << 1; // Account for sign extension in daddiu. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(lhs, High16Bits(offset)); + Daddiu(lhs, lhs, Low16Bits(offset)); + Jialc(lhs, 0); + break; } + CHECK_EQ(overwrite_location_, branch->GetEndLocation()); + CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize)); } -void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgec(rs, rt, 2); - B(label); +void Mips64Assembler::Bc(Mips64Label* label) { + Buncond(label); } -void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) { - Bgezc(rt, 2); - B(label); +void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) { + Call(label, indirect_reg); } -void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) { - Blezc(rt, 2); - B(label); +void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLT, rs, rt); } -void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) { - Bltc(rs, rt, 2); - B(label); +void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTZ, rt); } -void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) { - Bltzc(rt, 2); - B(label); +void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGTZ, rt); } -void Mips64Assembler::Blezc(GpuRegister rt, Label* label) { - Bgtzc(rt, 2); - B(label); +void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGE, rs, rt); } -void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgeuc(rs, rt, 2); - B(label); +void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEZ, rt); } -void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bltuc(rs, rt, 2); - B(label); +void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLEZ, rt); } -void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) { - Bnec(rs, rt, 2); - B(label); +void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTU, rs, rt); } -void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) { - Beqc(rs, rt, 2); - B(label); +void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEU, rs, rt); } -void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) { - Bnezc(rs, 2); - B(label); +void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondEQ, rs, rt); } -void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) { - Beqzc(rs, 2); - B(label); +void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondNE, rs, rt); +} + +void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondEQZ, rs); +} + +void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondNEZ, rs); } void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, @@ -1256,6 +1797,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); // Increase frame to required size. IncreaseFrameSize(frame_size); @@ -1298,6 +1840,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Mips64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); cfi_.RememberState(); // Pop callee saves and return address @@ -1316,6 +1859,7 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, // Then jump to the return address. Jr(RA); + Nop(); // The CFI should be restored for any code that follows the exit block. cfi_.RestoreState(); @@ -1324,12 +1868,14 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, void Mips64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(-adjust)); cfi_.AdjustCFAOffset(adjust); } void Mips64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(adjust)); cfi_.AdjustCFAOffset(-adjust); } @@ -1379,17 +1925,7 @@ void Mips64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value()); } -void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) { - Mips64ManagedRegister scratch = mscratch.AsMips64(); - CHECK(scratch.IsGpuRegister()) << scratch; - // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?). - // Is this function even referenced anywhere else in the code? - LoadConst32(scratch.AsGpuRegister(), imm); - StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value()); -} - -void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1398,7 +1934,7 @@ void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value()); } -void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) { +void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) { StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value()); } @@ -1415,7 +1951,9 @@ void Mips64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) return EmitLoad(mdest, SP, src.Int32Value(), size); } -void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) { +void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) { return EmitLoad(mdest, S1, src.Int32Value(), size); } @@ -1449,18 +1987,20 @@ void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, } void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, - ThreadOffset<8> offs) { + ThreadOffset<kMipsDoublewordSize> offs) { Mips64ManagedRegister dest = mdest.AsMips64(); CHECK(dest.IsGpuRegister()); LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value()); } -void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips"; +void Mips64Assembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No sign extension necessary for MIPS64"; } -void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips"; +void Mips64Assembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No zero extension necessary for MIPS64"; } void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) { @@ -1492,7 +2032,7 @@ void Mips64Assembler::CopyRef(FrameOffset dest, FrameOffset src, } void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, - ThreadOffset<8> thr_offs, + ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; @@ -1500,7 +2040,7 @@ void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value()); } -void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1561,9 +2101,12 @@ void Mips64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameO } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + FrameOffset src_base ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, @@ -1584,15 +2127,18 @@ void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset -/*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + Offset dest_offset ATTRIBUTE_UNUSED, + FrameOffset src ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } -void Mips64Assembler::MemoryBarrier(ManagedRegister) { +void Mips64Assembler::MemoryBarrier(ManagedRegister mreg ATTRIBUTE_UNUSED) { // TODO: sync? - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, @@ -1604,7 +2150,7 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg; CHECK(out_reg.IsGpuRegister()) << out_reg; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is // the address in the handle scope holding the reference. // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) @@ -1631,7 +2177,7 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off, Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is @@ -1653,7 +2199,7 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Mips64ManagedRegister in_reg = min_reg.AsMips64(); CHECK(out_reg.IsGpuRegister()) << out_reg; CHECK(in_reg.IsGpuRegister()) << in_reg; - Label null_arg; + Mips64Label null_arg; if (!out_reg.Equals(in_reg)) { LoadConst32(out_reg.AsGpuRegister(), 0); } @@ -1663,11 +2209,13 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Bind(&null_arg); } -void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } -void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } @@ -1679,6 +2227,7 @@ void Mips64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), base.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } @@ -1691,11 +2240,13 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), scratch.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } -void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { @@ -1703,37 +2254,39 @@ void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { } void Mips64Assembler::GetCurrentThread(FrameOffset offset, - ManagedRegister /*mscratch*/) { + ManagedRegister mscratch ATTRIBUTE_UNUSED) { StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value()); } void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { Mips64ManagedRegister scratch = mscratch.AsMips64(); - Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust); - buffer_.EnqueueSlowPath(slow); - LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), - S1, Thread::ExceptionOffset<8>().Int32Value()); - Bnezc(scratch.AsGpuRegister(), slow->Entry()); -} - -void Mips64ExceptionSlowPath::Emit(Assembler* sasm) { - Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm); -#define __ sp_asm-> - __ Bind(&entry_); - if (stack_adjust_ != 0) { // Fix up the frame. - __ DecreaseFrameSize(stack_adjust_); + exception_blocks_.emplace_back(scratch, stack_adjust); + LoadFromOffset(kLoadDoubleword, + scratch.AsGpuRegister(), + S1, + Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value()); + Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry()); +} + +void Mips64Assembler::EmitExceptionPoll(Mips64ExceptionSlowPath* exception) { + Bind(exception->Entry()); + if (exception->stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSize(exception->stack_adjust_); } - // Pass exception object as argument - // Don't care about preserving A0 as this call won't return - __ Move(A0, scratch_.AsGpuRegister()); + // Pass exception object as argument. + // Don't care about preserving A0 as this call won't return. + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); + Move(A0, exception->scratch_.AsGpuRegister()); // Set up call to Thread::Current()->pDeliverException - __ LoadFromOffset(kLoadDoubleword, T9, S1, - QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()); - // TODO: check T9 usage - __ Jr(T9); + LoadFromOffset(kLoadDoubleword, + T9, + S1, + QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value()); + Jr(T9); + Nop(); + // Call never returns - __ Break(); -#undef __ + Break(); } } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 42962bca20..57fc19a6e9 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -17,18 +17,22 @@ #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ +#include <utility> #include <vector> #include "base/macros.h" #include "constants_mips64.h" #include "globals.h" #include "managed_register_mips64.h" -#include "utils/assembler.h" #include "offsets.h" +#include "utils/assembler.h" +#include "utils/label.h" namespace art { namespace mips64 { +static constexpr size_t kMipsDoublewordSize = 8; + enum LoadOperandType { kLoadSignedByte, kLoadUnsignedByte, @@ -60,10 +64,57 @@ enum FPClassMaskType { kPositiveZero = 0x200, }; +class Mips64Label : public Label { + public: + Mips64Label() : prev_branch_id_plus_one_(0) {} + + Mips64Label(Mips64Label&& src) + : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {} + + private: + uint32_t prev_branch_id_plus_one_; // To get distance from preceding branch, if any. + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64Label); +}; + +// Slowpath entered when Thread::Current()->_exception is non-null. +class Mips64ExceptionSlowPath { + public: + explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) {} + + Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src) + : scratch_(src.scratch_), + stack_adjust_(src.stack_adjust_), + exception_entry_(std::move(src.exception_entry_)) {} + + private: + Mips64Label* Entry() { return &exception_entry_; } + const Mips64ManagedRegister scratch_; + const size_t stack_adjust_; + Mips64Label exception_entry_; + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath); +}; + class Mips64Assembler FINAL : public Assembler { public: - Mips64Assembler() {} - virtual ~Mips64Assembler() {} + Mips64Assembler() + : overwriting_(false), + overwrite_location_(0), + last_position_adjustment_(0), + last_old_position_(0), + last_branch_id_(0) { + cfi().DelayEmittingAdvancePCs(); + } + + virtual ~Mips64Assembler() { + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + } + } // Emit Machine Instructions. void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt); @@ -156,14 +207,12 @@ class Mips64Assembler FINAL : public Assembler { void Dclz(GpuRegister rd, GpuRegister rs); void Dclo(GpuRegister rd, GpuRegister rs); - void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void J(uint32_t addr26); - void Jal(uint32_t addr26); void Jalr(GpuRegister rd, GpuRegister rs); void Jalr(GpuRegister rs); void Jr(GpuRegister rs); void Auipc(GpuRegister rs, uint16_t imm16); + void Addiupc(GpuRegister rs, uint32_t imm19); + void Bc(uint32_t imm26); void Jic(GpuRegister rt, uint16_t imm16); void Jialc(GpuRegister rt, uint16_t imm16); void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16); @@ -240,32 +289,34 @@ class Mips64Assembler FINAL : public Assembler { void Clear(GpuRegister rd); void Not(GpuRegister rd, GpuRegister rs); - // Higher level composite instructions + // Higher level composite instructions. void LoadConst32(GpuRegister rd, int32_t value); void LoadConst64(GpuRegister rd, int64_t value); // MIPS64 - void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT); void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 - void Bind(Label* label) OVERRIDE; - void Jump(Label* label) OVERRIDE { - B(label); + void Bind(Label* label) OVERRIDE { + Bind(down_cast<Mips64Label*>(label)); + } + void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64"; } - void B(Label* label); - void Jalr(Label* label, GpuRegister indirect_reg = RA); - // TODO: implement common for R6 and non-R6 interface for conditional branches? - void Bltc(GpuRegister rs, GpuRegister rt, Label* label); - void Bltzc(GpuRegister rt, Label* label); - void Bgtzc(GpuRegister rt, Label* label); - void Bgec(GpuRegister rs, GpuRegister rt, Label* label); - void Bgezc(GpuRegister rt, Label* label); - void Blezc(GpuRegister rt, Label* label); - void Bltuc(GpuRegister rs, GpuRegister rt, Label* label); - void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label); - void Beqc(GpuRegister rs, GpuRegister rt, Label* label); - void Bnec(GpuRegister rs, GpuRegister rt, Label* label); - void Beqzc(GpuRegister rs, Label* label); - void Bnezc(GpuRegister rs, Label* label); + + void Bind(Mips64Label* label); + void Bc(Mips64Label* label); + void Jialc(Mips64Label* label, GpuRegister indirect_reg); + void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bltzc(GpuRegister rt, Mips64Label* label); + void Bgtzc(GpuRegister rt, Mips64Label* label); + void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgezc(GpuRegister rt, Mips64Label* label); + void Blezc(GpuRegister rt, Mips64Label* label); + void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqzc(GpuRegister rs, Mips64Label* label); + void Bnezc(GpuRegister rs, Mips64Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); @@ -277,43 +328,42 @@ class Mips64Assembler FINAL : public Assembler { void Emit(uint32_t value); // - // Overridden common assembler high-level functionality + // Overridden common assembler high-level functionality. // - // Emit code that will create an activation on the stack + // Emit code that will create an activation on the stack. void BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - // Emit code that will remove an activation from the stack + // Emit code that will remove an activation from the stack. void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE; void IncreaseFrameSize(size_t adjust) OVERRIDE; void DecreaseFrameSize(size_t adjust) OVERRIDE; - // Store routines + // Store routines. void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE; void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; - void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) OVERRIDE; - - void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; - void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE; + void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE; void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, ManagedRegister mscratch) OVERRIDE; - // Load routines + // Load routines. void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE; + void LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) OVERRIDE; void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; @@ -322,15 +372,16 @@ class Mips64Assembler FINAL : public Assembler { void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE; + void LoadRawPtrFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE; - // Copying routines + // Copying routines. void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; - void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs, + void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) OVERRIDE; - void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; @@ -354,13 +405,13 @@ class Mips64Assembler FINAL : public Assembler { void MemoryBarrier(ManagedRegister) OVERRIDE; - // Sign extension + // Sign extension. void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Zero extension + // Zero extension. void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Exploit fast access in managed code to Thread::Current() + // Exploit fast access in managed code to Thread::Current(). void GetCurrentThread(ManagedRegister tr) OVERRIDE; void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE; @@ -376,7 +427,7 @@ class Mips64Assembler FINAL : public Assembler { void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister mscratch, bool null_allowed) OVERRIDE; - // src holds a handle scope entry (Object**) load this into dst + // src holds a handle scope entry (Object**) load this into dst. void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; // Heap::VerifyObject on src. In some cases (such as a reference to this) we @@ -384,37 +435,253 @@ class Mips64Assembler FINAL : public Assembler { void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - // Call to address held at [base+offset] + // Call to address held at [base+offset]. void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE; + void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset, + ManagedRegister mscratch) OVERRIDE; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE; + // Emit slow paths queued during assembly and promote short branches to long if needed. + void FinalizeCode() OVERRIDE; + + // Emit branches and finalize all instructions. + void FinalizeInstructions(const MemoryRegion& region); + + // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64, + // must be used instead of Mips64Label::GetPosition()). + uint32_t GetLabelLocation(Mips64Label* label) const; + + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position); + + enum BranchCondition { + kCondLT, + kCondGE, + kCondLE, + kCondGT, + kCondLTZ, + kCondGEZ, + kCondLEZ, + kCondGTZ, + kCondEQ, + kCondNE, + kCondEQZ, + kCondNEZ, + kCondLTU, + kCondGEU, + kUncond, + }; + friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); + private: + class Branch { + public: + enum Type { + // Short branches. + kUncondBranch, + kCondBranch, + kCall, + // Long branches. + kLongUncondBranch, + kLongCondBranch, + kLongCall, + }; + + // Bit sizes of offsets defined as enums to minimize chance of typos. + enum OffsetBits { + kOffset16 = 16, + kOffset18 = 18, + kOffset21 = 21, + kOffset23 = 23, + kOffset28 = 28, + kOffset32 = 32, + }; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ + static constexpr int32_t kMaxBranchLength = 32; + static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t); + + struct BranchInfo { + // Branch length as a number of 4-byte-long instructions. + uint32_t length; + // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's + // PC-relative offset (or its most significant 16-bit half, which goes first). + uint32_t instr_offset; + // Different MIPS instructions with PC-relative offsets apply said offsets to slightly + // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte + // instructions) from the instruction containing the offset. + uint32_t pc_org; + // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is + // an exception: use kOffset23 for beqzc/bnezc). + OffsetBits offset_size; + // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift + // count. + int offset_shift; + }; + static const BranchInfo branch_info_[/* Type */]; + + // Unconditional branch. + Branch(uint32_t location, uint32_t target); + // Conditional branch. + Branch(uint32_t location, + uint32_t target, + BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg = ZERO); + // Call (branch and link) that stores the target address in a given register (i.e. T9). + Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg); + + // Some conditional branches with lhs = rhs are effectively NOPs, while some + // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. + // So, we need a way to identify such branches in order to emit no instructions for them + // or change them to unconditional. + static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + + static BranchCondition OppositeCondition(BranchCondition cond); + + Type GetType() const; + BranchCondition GetCondition() const; + GpuRegister GetLeftRegister() const; + GpuRegister GetRightRegister() const; + uint32_t GetTarget() const; + uint32_t GetLocation() const; + uint32_t GetOldLocation() const; + uint32_t GetLength() const; + uint32_t GetOldLength() const; + uint32_t GetSize() const; + uint32_t GetOldSize() const; + uint32_t GetEndLocation() const; + uint32_t GetOldEndLocation() const; + bool IsLong() const; + bool IsResolved() const; + + // Returns the bit size of the signed offset that the branch instruction can handle. + OffsetBits GetOffsetSize() const; + + // Calculates the distance between two byte locations in the assembler buffer and + // returns the number of bits needed to represent the distance as a signed integer. + // + // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc), + // and 26 (bc) bits, which are additionally shifted left 2 positions at run time. + // + // Composite branches (made of several instructions) with longer reach have 32-bit + // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first). + // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end, + // however. Consider the following implementation of a long unconditional branch, for + // example: + // + // auipc at, offset_31_16 // at = pc + sign_extend(offset_31_16) << 16 + // jic at, offset_15_0 // pc = at + sign_extend(offset_15_0) + // + // Both of the above instructions take 16-bit signed offsets as immediate operands. + // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000 + // due to sign extension. This must be compensated for by incrementing offset_31_16 + // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is + // 0x7FFF, adding 1 will overflow the positive offset into the negative range. + // Therefore, the long branch range is something like from PC - 0x80000000 to + // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side. + // + // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special + // case with the addiu instruction and a 16 bit offset. + static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target); + + // Resolve a branch when the target is known. + void Resolve(uint32_t target); + + // Relocate a branch by a given delta if needed due to expansion of this or another + // branch at a given location by this delta (just changes location_ and target_). + void Relocate(uint32_t expand_location, uint32_t delta); + + // If the branch is short, changes its type to long. + void PromoteToLong(); + + // If necessary, updates the type by promoting a short branch to a long branch + // based on the branch location and target. Returns the amount (in bytes) by + // which the branch size has increased. + // max_short_distance caps the maximum distance between location_ and target_ + // that is allowed for short branches. This is for debugging/testing purposes. + // max_short_distance = 0 forces all short branches to become long. + // Use the implicit default argument when not debugging/testing. + uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); + + // Returns the location of the instruction(s) containing the offset. + uint32_t GetOffsetLocation() const; + + // Calculates and returns the offset ready for encoding in the branch instruction(s). + uint32_t GetOffset() const; + + private: + // Completes branch construction by determining and recording its type. + void InitializeType(bool is_call); + // Helper for the above. + void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); + + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + + GpuRegister lhs_reg_; // Left-hand side register in conditional branches or + // indirect call register. + GpuRegister rhs_reg_; // Right-hand side register in conditional branches. + BranchCondition condition_; // Condition for conditional branches. + + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. + }; + friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs); + friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs); + void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct); void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm); void EmitI21(int opcode, GpuRegister rs, uint32_t imm21); - void EmitJ(int opcode, uint32_t addr26); + void EmitI26(int opcode, uint32_t imm26); void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); + void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); - DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); -}; + void Buncond(Mips64Label* label); + void Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs = ZERO); + void Call(Mips64Label* label, GpuRegister indirect_reg); + void FinalizeLabeledBranch(Mips64Label* label); -// Slowpath entered when Thread::Current()->_exception is non-null -class Mips64ExceptionSlowPath FINAL : public SlowPath { - public: - Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) - : scratch_(scratch), stack_adjust_(stack_adjust) {} - virtual void Emit(Assembler *sp_asm) OVERRIDE; - private: - const Mips64ManagedRegister scratch_; - const size_t stack_adjust_; + Branch* GetBranch(uint32_t branch_id); + const Branch* GetBranch(uint32_t branch_id) const; + + void PromoteBranches(); + void EmitBranch(Branch* branch); + void EmitBranches(); + void PatchCFI(); + + // Emits exception block. + void EmitExceptionPoll(Mips64ExceptionSlowPath* exception); + + // List of exception blocks to generate at the end of the code cache. + std::vector<Mips64ExceptionSlowPath> exception_blocks_; + + std::vector<Branch> branches_; + + // Whether appending instructions at the end of the buffer or overwriting the existing ones. + bool overwriting_; + // The current overwrite location. + uint32_t overwrite_location_; + + // Data for AdjustedPosition(), see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + uint32_t last_branch_id_; + + DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); }; } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 4413906fd7..29a5a88316 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -24,6 +24,8 @@ #include "base/stl_util.h" #include "utils/assembler_test.h" +#define __ GetAssembler()-> + namespace art { struct MIPS64CpuRegisterCompare { @@ -48,8 +50,26 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return "mips64"; } + std::string GetAssemblerCmdName() OVERRIDE { + // We assemble and link for MIPS64R6. See GetAssemblerParameters() for details. + return "gcc"; + } + std::string GetAssemblerParameters() OVERRIDE { - return " --no-warn -march=mips64r6"; + // We assemble and link for MIPS64R6. The reason is that object files produced for MIPS64R6 + // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative + // branches in the .text section and so they require a relocation pass (there's a relocation + // section, .rela.text, that has the needed info to fix up the branches). + return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; + } + + void Pad(std::vector<uint8_t>& data) OVERRIDE { + // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple + // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't + // pad, so, in order for two assembler outputs to match, we need to match the padding as well. + // NOP is encoded as four zero bytes on MIPS. + size_t pad_size = RoundUp(data.size(), 16u) - data.size(); + data.insert(data.end(), pad_size, 0); } std::string GetDisassembleParameters() OVERRIDE { @@ -182,6 +202,71 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return secondary_register_names_[reg]; } + std::string RepeatInsn(size_t count, const std::string& insn) { + std::string result; + for (; count != 0u; --count) { + result += insn; + } + return result; + } + + void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A1, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a1, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + + void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, $a1, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a2, $a3, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + private: std::vector<mips64::GpuRegister*> registers_; std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_; @@ -194,7 +279,6 @@ TEST_F(AssemblerMIPS64Test, Toolchain) { EXPECT_TRUE(CheckTools()); } - /////////////////// // FP Operations // /////////////////// @@ -348,7 +432,203 @@ TEST_F(AssemblerMIPS64Test, CvtSW) { //////////////// TEST_F(AssemblerMIPS64Test, Jalr) { - DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); + DriverStr(".set noreorder\n" + + RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); +} + +TEST_F(AssemblerMIPS64Test, Jialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + std::string expected = + ".set noreorder\n" + "lapc $t9, 1f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "lapc $t9, 2f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "lapc $t9, 1b\n" + "jialc $t9, 0\n"; + DriverStr(expected, "Jialc"); +} + +TEST_F(AssemblerMIPS64Test, LongJialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr uint32_t kAdduCount1 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr uint32_t kAdduCount2 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + uint32_t offset_forward1 = 3 + kAdduCount1; // 3: account for auipc, daddiu and jic. + offset_forward1 <<= 2; + offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_forward2 = 3 + kAdduCount2; // 3: account for auipc, daddiu and jic. + offset_forward2 <<= 2; + offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_back = -(3 + kAdduCount2); // 3: account for auipc, daddiu and jic. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in daddiu. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "1:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "2:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "jialc $t9, 0\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongJialc"); +} + +TEST_F(AssemblerMIPS64Test, Bc) { + mips64::Mips64Label label1, label2; + __ Bc(&label1); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Bc(&label2); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Bc(&label1); + + std::string expected = + ".set noreorder\n" + "bc 1f\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "bc 2f\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "bc 1b\n"; + DriverStr(expected, "Bc"); +} + +TEST_F(AssemblerMIPS64Test, Beqzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc"); +} + +TEST_F(AssemblerMIPS64Test, Bnezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc"); +} + +TEST_F(AssemblerMIPS64Test, Bltzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc"); +} + +TEST_F(AssemblerMIPS64Test, Bgezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc"); +} + +TEST_F(AssemblerMIPS64Test, Blezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc"); +} + +TEST_F(AssemblerMIPS64Test, Bgtzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc"); +} + +TEST_F(AssemblerMIPS64Test, Beqc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc"); +} + +TEST_F(AssemblerMIPS64Test, Bnec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec"); +} + +TEST_F(AssemblerMIPS64Test, Bltc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc"); +} + +TEST_F(AssemblerMIPS64Test, Bgec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec"); +} + +TEST_F(AssemblerMIPS64Test, Bltuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc"); +} + +TEST_F(AssemblerMIPS64Test, Bgeuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc"); +} + +TEST_F(AssemblerMIPS64Test, LongBeqc) { + mips64::Mips64Label label; + __ Beqc(mips64::A0, mips64::A1, &label); + constexpr uint32_t kAdduCount1 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr uint32_t kAdduCount2 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Beqc(mips64::A2, mips64::A3, &label); + + uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic. + offset_forward <<= 2; + offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic. + + uint32_t offset_back = -(kAdduCount2 + 1); // 1: account for bnec. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "bnec $a0, $a1, 1f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "1:\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "2:\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "bnec $a2, $a3, 3f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "3:\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBeqc"); } ////////// diff --git a/runtime/Android.mk b/runtime/Android.mk index 0b0f0942a3..571a2f5d64 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -103,6 +103,7 @@ LIBART_COMMON_SRC_FILES := \ jit/jit.cc \ jit/jit_code_cache.cc \ jit/jit_instrumentation.cc \ + jit/offline_profiling_info.cc \ jit/profiling_info.cc \ lambda/art_lambda_method.cc \ lambda/box_table.cc \ diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc index 70096f5627..4163e2efdf 100644 --- a/runtime/dex_file.cc +++ b/runtime/dex_file.cc @@ -1870,10 +1870,10 @@ bool DexFile::ProcessAnnotationValue(Handle<mirror::Class> klass, const uint8_t* Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader())); ArtField* enum_field = Runtime::Current()->GetClassLinker()->ResolveField( klass->GetDexFile(), index, dex_cache, class_loader, true); - Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass())); if (enum_field == nullptr) { return false; } else { + Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass())); Runtime::Current()->GetClassLinker()->EnsureInitialized(self, field_class, true, true); element_object = enum_field->GetObject(field_class.Get()); set_object = true; diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index 1fe9a03159..e2b2431054 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -58,10 +58,7 @@ static int32_t ChooseRelocationOffsetDelta(int32_t min_delta, int32_t max_delta) CHECK_ALIGNED(max_delta, kPageSize); CHECK_LT(min_delta, max_delta); - std::default_random_engine generator; - generator.seed(NanoTime() * getpid()); - std::uniform_int_distribution<int32_t> distribution(min_delta, max_delta); - int32_t r = distribution(generator); + int32_t r = GetRandomNumber<int32_t>(min_delta, max_delta); if (r % 2 == 0) { r = RoundUp(r, kPageSize); } else { diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index bf95a0e46f..c9831e67aa 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -66,6 +66,11 @@ namespace interpreter { } \ } while (false) +#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \ + do { \ + instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \ + } while (false) + static bool IsExperimentalInstructionEnabled(const Instruction *inst) { DCHECK(inst->IsExperimental()); return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas); @@ -542,6 +547,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, PREAMBLE(); int8_t offset = inst->VRegA_10t(inst_data); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -551,6 +557,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, PREAMBLE(); int16_t offset = inst->VRegA_20t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -560,6 +567,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, PREAMBLE(); int32_t offset = inst->VRegA_30t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -569,6 +577,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, PREAMBLE(); int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -578,6 +587,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, PREAMBLE(); int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -681,6 +691,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -695,6 +706,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -709,6 +721,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -723,6 +736,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -737,6 +751,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -751,6 +766,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -764,6 +780,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -777,6 +794,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -790,6 +808,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -803,6 +822,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -816,6 +836,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -829,6 +850,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index ecbf13c4b1..27a0e2d1af 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -24,6 +24,8 @@ #include "interpreter/interpreter.h" #include "jit_code_cache.h" #include "jit_instrumentation.h" +#include "oat_file_manager.h" +#include "offline_profiling_info.h" #include "runtime.h" #include "runtime_options.h" #include "utils.h" @@ -44,6 +46,8 @@ JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& opt options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold); jit_options->dump_info_on_shutdown_ = options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown); + jit_options->save_profiling_info_ = + options.GetOrDefault(RuntimeArgumentMap::JITSaveProfilingInfo);; return jit_options; } @@ -76,6 +80,10 @@ Jit* Jit::Create(JitOptions* options, std::string* error_msg) { if (jit->GetCodeCache() == nullptr) { return nullptr; } + jit->offline_profile_info_.reset(nullptr); + if (options->GetSaveProfilingInfo()) { + jit->offline_profile_info_.reset(new OfflineProfilingInfo()); + } LOG(INFO) << "JIT created with initial_capacity=" << PrettySize(options->GetCodeCacheInitialCapacity()) << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity()) @@ -152,6 +160,33 @@ void Jit::DeleteThreadPool() { } } +void Jit::SaveProfilingInfo(const std::string& filename) { + if (offline_profile_info_ == nullptr) { + return; + } + // Note that we can't check the PrimaryOatFile when constructing the offline_profilie_info_ + // because it becomes known to the Runtime after we create and initialize the JIT. + const OatFile* primary_oat_file = Runtime::Current()->GetOatFileManager().GetPrimaryOatFile(); + if (primary_oat_file == nullptr) { + LOG(WARNING) << "Couldn't find a primary oat file when trying to save profile info to " + << filename; + return; + } + + uint64_t last_update_ns = code_cache_->GetLastUpdateTimeNs(); + if (offline_profile_info_->NeedsSaving(last_update_ns)) { + VLOG(profiler) << "Iniate save profiling information to: " << filename; + std::set<ArtMethod*> methods; + { + ScopedObjectAccess soa(Thread::Current()); + code_cache_->GetCompiledArtMethods(primary_oat_file, methods); + } + offline_profile_info_->SaveProfilingInfo(filename, last_update_ns, methods); + } else { + VLOG(profiler) << "No need to save profiling information to: " << filename; + } +} + Jit::~Jit() { if (dump_info_on_shutdown_) { DumpInfo(LOG(INFO)); diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h index fc76549013..630eba34af 100644 --- a/runtime/jit/jit.h +++ b/runtime/jit/jit.h @@ -26,6 +26,7 @@ #include "gc_root.h" #include "jni.h" #include "object_callbacks.h" +#include "offline_profiling_info.h" #include "thread_pool.h" namespace art { @@ -71,6 +72,8 @@ class Jit { return instrumentation_cache_.get(); } + void SaveProfilingInfo(const std::string& filename); + private: Jit(); bool LoadCompiler(std::string* error_msg); @@ -90,6 +93,7 @@ class Jit { std::unique_ptr<jit::JitCodeCache> code_cache_; CompilerCallbacks* compiler_callbacks_; // Owned by the jit compiler. + std::unique_ptr<OfflineProfilingInfo> offline_profile_info_; DISALLOW_COPY_AND_ASSIGN(Jit); }; @@ -111,12 +115,18 @@ class JitOptions { bool DumpJitInfoOnShutdown() const { return dump_info_on_shutdown_; } + bool GetSaveProfilingInfo() const { + return save_profiling_info_; + } bool UseJIT() const { return use_jit_; } void SetUseJIT(bool b) { use_jit_ = b; } + void SetSaveProfilingInfo(bool b) { + save_profiling_info_ = b; + } private: bool use_jit_; @@ -125,13 +135,15 @@ class JitOptions { size_t compile_threshold_; size_t warmup_threshold_; bool dump_info_on_shutdown_; + bool save_profiling_info_; JitOptions() : use_jit_(false), code_cache_initial_capacity_(0), code_cache_max_capacity_(0), compile_threshold_(0), - dump_info_on_shutdown_(false) { } + dump_info_on_shutdown_(false), + save_profiling_info_(false) { } DISALLOW_COPY_AND_ASSIGN(JitOptions); }; diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index da79109b4f..804d69fbf8 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -19,6 +19,7 @@ #include <sstream> #include "art_method-inl.h" +#include "base/time_utils.h" #include "entrypoints/runtime_asm_entrypoints.h" #include "gc/accounting/bitmap-inl.h" #include "jit/profiling_info.h" @@ -109,7 +110,8 @@ JitCodeCache::JitCodeCache(MemMap* code_map, current_capacity_(initial_code_capacity + initial_data_capacity), code_end_(initial_code_capacity), data_end_(initial_data_capacity), - has_done_one_collection_(false) { + has_done_one_collection_(false), + last_update_time_ns_(0) { code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/); data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/); @@ -314,6 +316,7 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self, // code. GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr)); } + last_update_time_ns_ = NanoTime(); VLOG(jit) << "JIT added " << PrettyMethod(method) << "@" << method @@ -677,5 +680,19 @@ void* JitCodeCache::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_S } } +void JitCodeCache::GetCompiledArtMethods(const OatFile* oat_file, + std::set<ArtMethod*>& methods) { + MutexLock mu(Thread::Current(), lock_); + for (auto it : method_code_map_) { + if (it.second->GetDexFile()->GetOatDexFile()->GetOatFile() == oat_file) { + methods.insert(it.second); + } + } +} + +uint64_t JitCodeCache::GetLastUpdateTimeNs() { + MutexLock mu(Thread::Current(), lock_); + return last_update_time_ns_; +} } // namespace jit } // namespace art diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index 13481e0e67..acd7c62940 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -139,6 +139,13 @@ class JitCodeCache { void* MoreCore(const void* mspace, intptr_t increment); + // Adds to `methods` all the compiled ArtMethods which are part of the given `oat_file`. + void GetCompiledArtMethods(const OatFile* oat_file, std::set<ArtMethod*>& methods) + REQUIRES(!lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + + uint64_t GetLastUpdateTimeNs() REQUIRES(!lock_); + private: // Take ownership of maps. JitCodeCache(MemMap* code_map, @@ -228,6 +235,9 @@ class JitCodeCache { // Whether a collection has already been done on the current capacity. bool has_done_one_collection_ GUARDED_BY(lock_); + // Last time the the code_cache was updated. + uint64_t last_update_time_ns_ GUARDED_BY(lock_); + DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache); }; diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc new file mode 100644 index 0000000000..4450653a90 --- /dev/null +++ b/runtime/jit/offline_profiling_info.cc @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offline_profiling_info.h" + +#include <fstream> +#include <set> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/uio.h> + +#include "art_method-inl.h" +#include "base/mutex.h" +#include "jit/profiling_info.h" +#include "safe_map.h" +#include "utils.h" + +namespace art { + +// An arbitrary value to throttle save requests. Set to 500ms for now. +static constexpr const uint64_t kMilisecondsToNano = 1000000; +static constexpr const uint64_t kMinimumTimeBetweenSavesNs = 500 * kMilisecondsToNano; + +bool OfflineProfilingInfo::NeedsSaving(uint64_t last_update_time_ns) const { + return last_update_time_ns - last_update_time_ns_.LoadRelaxed() > kMinimumTimeBetweenSavesNs; +} + +void OfflineProfilingInfo::SaveProfilingInfo(const std::string& filename, + uint64_t last_update_time_ns, + const std::set<ArtMethod*>& methods) { + if (!NeedsSaving(last_update_time_ns)) { + VLOG(profiler) << "No need to saved profile info to " << filename; + return; + } + + if (methods.empty()) { + VLOG(profiler) << "No info to save to " << filename; + return; + } + + DexFileToMethodsMap info; + { + ScopedObjectAccess soa(Thread::Current()); + for (auto it = methods.begin(); it != methods.end(); it++) { + AddMethodInfo(*it, &info); + } + } + + // This doesn't need locking because we are trying to lock the file for exclusive + // access and fail immediately if we can't. + if (Serialize(filename, info)) { + last_update_time_ns_.StoreRelaxed(last_update_time_ns); + VLOG(profiler) << "Successfully saved profile info to " + << filename << " with time stamp: " << last_update_time_ns; + } +} + + +void OfflineProfilingInfo::AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) { + DCHECK(method != nullptr); + const DexFile* dex_file = method->GetDexFile(); + + auto info_it = info->find(dex_file); + if (info_it == info->end()) { + info_it = info->Put(dex_file, std::set<uint32_t>()); + } + info_it->second.insert(method->GetDexMethodIndex()); +} + +static int OpenOrCreateFile(const std::string& filename) { + // TODO(calin) allow the shared uid of the app to access the file. + int fd = open(filename.c_str(), + O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC, + S_IRUSR | S_IWUSR); + if (fd < 0) { + PLOG(WARNING) << "Failed to open profile file " << filename; + return -1; + } + + // Lock the file for exclusive access but don't wait if we can't lock it. + int err = flock(fd, LOCK_EX | LOCK_NB); + if (err < 0) { + PLOG(WARNING) << "Failed to lock profile file " << filename; + return -1; + } + + return fd; +} + +static bool CloseDescriptorForFile(int fd, const std::string& filename) { + // Now unlock the file, allowing another process in. + int err = flock(fd, LOCK_UN); + if (err < 0) { + PLOG(WARNING) << "Failed to unlock profile file " << filename; + return false; + } + + // Done, close the file. + err = ::close(fd); + if (err < 0) { + PLOG(WARNING) << "Failed to close descriptor for profile file" << filename; + return false; + } + + return true; +} + +static void WriteToFile(int fd, const std::ostringstream& os) { + std::string data(os.str()); + const char *p = data.c_str(); + size_t length = data.length(); + do { + int n = ::write(fd, p, length); + p += n; + length -= n; + } while (length > 0); +} + +static constexpr char kFieldSeparator = ','; +static constexpr char kLineSeparator = '\n'; + +/** + * Serialization format: + * multidex_suffix1,dex_location_checksum1,method_id11,method_id12... + * multidex_suffix2,dex_location_checksum2,method_id21,method_id22... + * e.g. + * ,131232145,11,23,454,54 -> this is the first dex file, it has no multidex suffix + * :classes5.dex,218490184,39,13,49,1 -> this is the fifth dex file. + **/ +bool OfflineProfilingInfo::Serialize(const std::string& filename, + const DexFileToMethodsMap& info) const { + int fd = OpenOrCreateFile(filename); + if (fd == -1) { + return false; + } + + // TODO(calin): Merge with a previous existing profile. + // TODO(calin): Profile this and see how much memory it takes. If too much, + // write to file directly. + std::ostringstream os; + for (auto it : info) { + const DexFile* dex_file = it.first; + const std::set<uint32_t>& method_dex_ids = it.second; + + os << DexFile::GetMultiDexSuffix(dex_file->GetLocation()) + << kFieldSeparator + << dex_file->GetLocationChecksum(); + for (auto method_it : method_dex_ids) { + os << kFieldSeparator << method_it; + } + os << kLineSeparator; + } + + WriteToFile(fd, os); + + return CloseDescriptorForFile(fd, filename); +} +} // namespace art diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h new file mode 100644 index 0000000000..e3117eb5ee --- /dev/null +++ b/runtime/jit/offline_profiling_info.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_ +#define ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_ + +#include <set> + +#include "atomic.h" +#include "dex_file.h" +#include "safe_map.h" + +namespace art { + +class ArtMethod; + +/** + * Profiling information in a format that can be serialized to disk. + * It is a serialize-friendly format based on information collected + * by the interpreter (ProfileInfo). + * Currently it stores only the hot compiled methods. + */ +class OfflineProfilingInfo { + public: + bool NeedsSaving(uint64_t last_update_time_ns) const; + void SaveProfilingInfo(const std::string& filename, + uint64_t last_update_time_ns, + const std::set<ArtMethod*>& methods); + + private: + // Map identifying the location of the profiled methods. + // dex_file_ -> [dex_method_index]+ + using DexFileToMethodsMap = SafeMap<const DexFile*, std::set<uint32_t>>; + + void AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) + SHARED_REQUIRES(Locks::mutator_lock_); + bool Serialize(const std::string& filename, const DexFileToMethodsMap& info) const; + + // TODO(calin): Verify if Atomic is really needed (are we sure to be called from a + // singe thread?) + Atomic<uint64_t> last_update_time_ns_; +}; + +} // namespace art + +#endif // ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_ diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc index 415109fb06..5e3fa199e5 100644 --- a/runtime/jni_internal.cc +++ b/runtime/jni_internal.cc @@ -1689,7 +1689,8 @@ class JNI { } else { CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf); const jchar* chars = s->GetValue(); - ConvertUtf16ToModifiedUtf8(buf, chars + start, length); + size_t bytes = CountUtf8Bytes(chars + start, length); + ConvertUtf16ToModifiedUtf8(buf, bytes, chars + start, length); } } @@ -1772,7 +1773,7 @@ class JNI { char* bytes = new char[byte_count + 1]; CHECK(bytes != nullptr); // bionic aborts anyway. const uint16_t* chars = s->GetValue(); - ConvertUtf16ToModifiedUtf8(bytes, chars, s->GetLength()); + ConvertUtf16ToModifiedUtf8(bytes, byte_count, chars, s->GetLength()); bytes[byte_count] = '\0'; return bytes; } diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc index be869d4e6a..33aca0304c 100644 --- a/runtime/mirror/string.cc +++ b/runtime/mirror/string.cc @@ -109,12 +109,17 @@ String* String::AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_ String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) { DCHECK(utf != nullptr); - size_t char_count = CountModifiedUtf8Chars(utf); - return AllocFromModifiedUtf8(self, char_count, utf); + size_t byte_count = strlen(utf); + size_t char_count = CountModifiedUtf8Chars(utf, byte_count); + return AllocFromModifiedUtf8(self, char_count, utf, byte_count); +} + +String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in) { + return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in)); } String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, - const char* utf8_data_in) { + const char* utf8_data_in, int32_t utf8_length) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); SetStringCountVisitor visitor(utf16_length); String* string = Alloc<true>(self, utf16_length, allocator_type, visitor); @@ -122,7 +127,7 @@ String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, return nullptr; } uint16_t* utf16_data_out = string->GetValue(); - ConvertModifiedUtf8ToUtf16(utf16_data_out, utf8_data_in); + ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length); return string; } @@ -217,7 +222,7 @@ std::string String::ToModifiedUtf8() { const uint16_t* chars = GetValue(); size_t byte_count = GetUtfLength(); std::string result(byte_count, static_cast<char>(0)); - ConvertUtf16ToModifiedUtf8(&result[0], chars, GetLength()); + ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength()); return result; } diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h index 80ebd2cf0f..e2cfb8d5ad 100644 --- a/runtime/mirror/string.h +++ b/runtime/mirror/string.h @@ -116,6 +116,10 @@ class MANAGED String FINAL : public Object { static String* AllocFromModifiedUtf8(Thread* self, const char* utf) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); + static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, + const char* utf8_data_in, int32_t utf8_length) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); + static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index 4cd3c3d730..da6cf1f198 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -155,7 +155,9 @@ static jobject DexFile_openDexFileNative(JNIEnv* env, jstring javaOutputName, jint flags ATTRIBUTE_UNUSED, // class_loader will be used for app images. - jobject class_loader ATTRIBUTE_UNUSED) { + jobject class_loader ATTRIBUTE_UNUSED, + // dex_elements will be used for app images. + jobject dex_elements ATTRIBUTE_UNUSED) { ScopedUtfChars sourceName(env, javaSourceName); if (sourceName.c_str() == nullptr) { return 0; @@ -445,7 +447,12 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(DexFile, getDexOptNeeded, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"), NATIVE_METHOD(DexFile, openDexFileNative, - "(Ljava/lang/String;Ljava/lang/String;ILjava/lang/ClassLoader;)Ljava/lang/Object;"), + "(Ljava/lang/String;" + "Ljava/lang/String;" + "I" + "Ljava/lang/ClassLoader;" + "[Ldalvik/system/DexPathList$Element;" + ")Ljava/lang/Object;"), }; void register_dalvik_system_DexFile(JNIEnv* env) { diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index 4c5dc3ad25..b49d68f6ce 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -562,17 +562,20 @@ static void VMRuntime_preloadDexCaches(JNIEnv* env, jobject) { /* * This is called by the framework when it knows the application directory and - * process name. We use this information to start up the sampling profiler for - * for ART. + * process name. */ -static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring pkgName, - jstring appDir ATTRIBUTE_UNUSED, +static void VMRuntime_registerAppInfo(JNIEnv* env, + jclass clazz ATTRIBUTE_UNUSED, + jstring pkgName, + jstring appDir, jstring procName ATTRIBUTE_UNUSED) { - const char *pkgNameChars = env->GetStringUTFChars(pkgName, nullptr); - std::string profileFile = StringPrintf("/data/dalvik-cache/profiles/%s", pkgNameChars); + const char* appDirChars = env->GetStringUTFChars(appDir, nullptr); + const char* pkgNameChars = env->GetStringUTFChars(pkgName, nullptr); + std::string profileFile = StringPrintf("%s/code_cache/%s.prof", appDirChars, pkgNameChars); - Runtime::Current()->StartProfiler(profileFile.c_str()); + Runtime::Current()->SetJitProfilingFilename(profileFile.c_str()); + env->ReleaseStringUTFChars(appDir, appDirChars); env->ReleaseStringUTFChars(pkgName, pkgNameChars); } diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index dfd783b988..585c7c4596 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -164,6 +164,9 @@ std::unique_ptr<RuntimeParser> ParsedOptions::MakeParser(bool ignore_unrecognize .Define("-Xjitwarmupthreshold:_") .WithType<unsigned int>() .IntoKey(M::JITWarmupThreshold) + .Define("-Xjitsaveprofilinginfo") + .WithValue(true) + .IntoKey(M::JITSaveProfilingInfo) .Define("-XX:HspaceCompactForOOMMinIntervalMs=_") // in ms .WithType<MillisecondsToNanoseconds>() // store as ns .IntoKey(M::HSpaceCompactForOOMMinIntervalsMs) diff --git a/runtime/runtime.cc b/runtime/runtime.cc index a210aa8c16..931e581ce3 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -218,6 +218,9 @@ Runtime::~Runtime() { if (is_native_bridge_loaded_) { UnloadNativeBridge(); } + + MaybeSaveJitProfilingInfo(); + if (dump_gc_performance_on_shutdown_) { // This can't be called from the Heap destructor below because it // could call RosAlloc::InspectAll() which needs the thread_list @@ -601,7 +604,6 @@ bool Runtime::Start() { LOG(INFO) << "Failed to access the profile file. Profiler disabled."; return true; } - StartProfiler(profile_output_filename_.c_str()); } if (trace_config_.get() != nullptr && trace_config_->trace_file != "") { @@ -1618,10 +1620,8 @@ void Runtime::SetCalleeSaveMethod(ArtMethod* method, CalleeSaveType type) { callee_save_methods_[type] = reinterpret_cast<uintptr_t>(method); } -void Runtime::StartProfiler(const char* profile_output_filename) { +void Runtime::SetJitProfilingFilename(const char* profile_output_filename) { profile_output_filename_ = profile_output_filename; - profiler_started_ = - BackgroundMethodSamplingProfiler::Start(profile_output_filename_, profiler_options_); } // Transaction support. @@ -1767,8 +1767,16 @@ void Runtime::AddCurrentRuntimeFeaturesAsDex2OatArguments(std::vector<std::strin argv->push_back(feature_string); } +void Runtime::MaybeSaveJitProfilingInfo() { + if (jit_.get() != nullptr && !profile_output_filename_.empty()) { + jit_->SaveProfilingInfo(profile_output_filename_); + } +} + void Runtime::UpdateProfilerState(int state) { - VLOG(profiler) << "Profiler state updated to " << state; + if (state == kProfileBackground) { + MaybeSaveJitProfilingInfo(); + } } void Runtime::CreateJit() { diff --git a/runtime/runtime.h b/runtime/runtime.h index d61663cd10..bd3641405d 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -457,7 +457,7 @@ class Runtime { return &instrumentation_; } - void StartProfiler(const char* profile_output_filename); + void SetJitProfilingFilename(const char* profile_output_filename); void UpdateProfilerState(int state); // Transaction support. @@ -608,12 +608,14 @@ class Runtime { void StartDaemonThreads(); void StartSignalCatcher(); + void MaybeSaveJitProfilingInfo(); + // A pointer to the active runtime or null. static Runtime* instance_; // NOTE: these must match the gc::ProcessState values as they come directly from the framework. static constexpr int kProfileForground = 0; - static constexpr int kProfileBackgrouud = 1; + static constexpr int kProfileBackground = 1; // 64 bit so that we can share the same asm offsets for both 32 and 64 bits. uint64_t callee_save_methods_[kLastCalleeSaveType]; diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def index 9051eda0df..5624285b09 100644 --- a/runtime/runtime_options.def +++ b/runtime/runtime_options.def @@ -71,6 +71,7 @@ RUNTIME_OPTIONS_KEY (unsigned int, JITCompileThreshold, jit::J RUNTIME_OPTIONS_KEY (unsigned int, JITWarmupThreshold, jit::Jit::kDefaultWarmupThreshold) RUNTIME_OPTIONS_KEY (MemoryKiB, JITCodeCacheInitialCapacity, jit::JitCodeCache::kInitialCapacity) RUNTIME_OPTIONS_KEY (MemoryKiB, JITCodeCacheMaxCapacity, jit::JitCodeCache::kMaxCapacity) +RUNTIME_OPTIONS_KEY (bool, JITSaveProfilingInfo, false) RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \ HSpaceCompactForOOMMinIntervalsMs,\ MsToNs(100 * 1000)) // 100s diff --git a/runtime/safe_map.h b/runtime/safe_map.h index 7ac17b60d6..4e62dda8dd 100644 --- a/runtime/safe_map.h +++ b/runtime/safe_map.h @@ -92,7 +92,7 @@ class SafeMap { DCHECK(result.second); // Check we didn't accidentally overwrite an existing value. return result.first; } - iterator Put(const K& k, const V&& v) { + iterator Put(const K& k, V&& v) { std::pair<iterator, bool> result = map_.emplace(k, std::move(v)); DCHECK(result.second); // Check we didn't accidentally overwrite an existing value. return result.first; @@ -105,7 +105,7 @@ class SafeMap { DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k)); return map_.emplace_hint(pos, k, v); } - iterator PutBefore(iterator pos, const K& k, const V&& v) { + iterator PutBefore(iterator pos, const K& k, V&& v) { // Check that we're using the correct position and the key is not in the map. DCHECK(pos == map_.end() || map_.key_comp()(k, pos->first)); DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k)); diff --git a/runtime/utf.cc b/runtime/utf.cc index 10600e2153..5a116980c9 100644 --- a/runtime/utf.cc +++ b/runtime/utf.cc @@ -23,28 +23,50 @@ namespace art { +// This is used only from debugger and test code. size_t CountModifiedUtf8Chars(const char* utf8) { + return CountModifiedUtf8Chars(utf8, strlen(utf8)); +} + +/* + * This does not validate UTF8 rules (nor did older code). But it gets the right answer + * for valid UTF-8 and that's fine because it's used only to size a buffer for later + * conversion. + * + * Modified UTF-8 consists of a series of bytes up to 21 bit Unicode code points as follows: + * U+0001 - U+007F 0xxxxxxx + * U+0080 - U+07FF 110xxxxx 10xxxxxx + * U+0800 - U+FFFF 1110xxxx 10xxxxxx 10xxxxxx + * U+10000 - U+1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * + * U+0000 is encoded using the 2nd form to avoid nulls inside strings (this differs from + * standard UTF-8). + * The four byte encoding converts to two utf16 characters. + */ +size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count) { + DCHECK_LE(byte_count, strlen(utf8)); size_t len = 0; - int ic; - while ((ic = *utf8++) != '\0') { + const char* end = utf8 + byte_count; + for (; utf8 < end; ++utf8) { + int ic = *utf8; len++; - if ((ic & 0x80) == 0) { - // one-byte encoding + if (LIKELY((ic & 0x80) == 0)) { + // One-byte encoding. continue; } - // two- or three-byte encoding + // Two- or three-byte encoding. utf8++; if ((ic & 0x20) == 0) { - // two-byte encoding + // Two-byte encoding. continue; } utf8++; if ((ic & 0x10) == 0) { - // three-byte encoding + // Three-byte encoding. continue; } - // four-byte encoding: needs to be converted into a surrogate + // Four-byte encoding: needs to be converted into a surrogate // pair. utf8++; len++; @@ -52,6 +74,7 @@ size_t CountModifiedUtf8Chars(const char* utf8) { return len; } +// This is used only from debugger and test code. void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) { while (*utf8_data_in != '\0') { const uint32_t ch = GetUtf16FromUtf8(&utf8_data_in); @@ -65,13 +88,53 @@ void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_ } } -void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) { +void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, size_t out_chars, + const char* utf8_data_in, size_t in_bytes) { + const char *in_start = utf8_data_in; + const char *in_end = utf8_data_in + in_bytes; + uint16_t *out_p = utf16_data_out; + + if (LIKELY(out_chars == in_bytes)) { + // Common case where all characters are ASCII. + for (const char *p = in_start; p < in_end;) { + // Safe even if char is signed because ASCII characters always have + // the high bit cleared. + *out_p++ = dchecked_integral_cast<uint16_t>(*p++); + } + return; + } + + // String contains non-ASCII characters. + for (const char *p = in_start; p < in_end;) { + const uint32_t ch = GetUtf16FromUtf8(&p); + const uint16_t leading = GetLeadingUtf16Char(ch); + const uint16_t trailing = GetTrailingUtf16Char(ch); + + *out_p++ = leading; + if (trailing != 0) { + *out_p++ = trailing; + } + } +} + +void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count, + const uint16_t* utf16_in, size_t char_count) { + if (LIKELY(byte_count == char_count)) { + // Common case where all characters are ASCII. + const uint16_t *utf16_end = utf16_in + char_count; + for (const uint16_t *p = utf16_in; p < utf16_end;) { + *utf8_out++ = dchecked_integral_cast<char>(*p++); + } + return; + } + + // String contains non-ASCII characters. while (char_count--) { const uint16_t ch = *utf16_in++; if (ch > 0 && ch <= 0x7f) { *utf8_out++ = ch; } else { - // char_count == 0 here implies we've encountered an unpaired + // Char_count == 0 here implies we've encountered an unpaired // surrogate and we have no choice but to encode it as 3-byte UTF // sequence. Note that unpaired surrogates can occur as a part of // "normal" operation. @@ -161,34 +224,31 @@ int CompareModifiedUtf8ToUtf16AsCodePointValues(const char* utf8, const uint16_t size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) { size_t result = 0; - while (char_count--) { + const uint16_t *end = chars + char_count; + while (chars < end) { const uint16_t ch = *chars++; - if (ch > 0 && ch <= 0x7f) { - ++result; - } else if (ch >= 0xd800 && ch <= 0xdbff) { - if (char_count > 0) { + if (LIKELY(ch != 0 && ch < 0x80)) { + result++; + continue; + } + if (ch < 0x800) { + result += 2; + continue; + } + if (ch >= 0xd800 && ch < 0xdc00) { + if (chars < end) { const uint16_t ch2 = *chars; // If we find a properly paired surrogate, we emit it as a 4 byte // UTF sequence. If we find an unpaired leading or trailing surrogate, // we emit it as a 3 byte sequence like would have done earlier. - if (ch2 >= 0xdc00 && ch2 <= 0xdfff) { + if (ch2 >= 0xdc00 && ch2 < 0xe000) { chars++; - char_count--; - result += 4; - } else { - result += 3; + continue; } - } else { - // This implies we found an unpaired trailing surrogate at the end - // of a string. - result += 3; } - } else if (ch > 0x7ff) { - result += 3; - } else { - result += 2; } + result += 3; } return result; } diff --git a/runtime/utf.h b/runtime/utf.h index 1193d29c7d..03158c492d 100644 --- a/runtime/utf.h +++ b/runtime/utf.h @@ -40,6 +40,7 @@ namespace mirror { * Returns the number of UTF-16 characters in the given modified UTF-8 string. */ size_t CountModifiedUtf8Chars(const char* utf8); +size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count); /* * Returns the number of modified UTF-8 bytes needed to represent the given @@ -51,6 +52,8 @@ size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count); * Convert from Modified UTF-8 to UTF-16. */ void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, const char* utf8_in); +void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, size_t out_chars, + const char* utf8_in, size_t in_bytes); /* * Compare two modified UTF-8 strings as UTF-16 code point values in a non-locale sensitive manner @@ -71,7 +74,8 @@ int CompareModifiedUtf8ToUtf16AsCodePointValues(const char* utf8, const uint16_t * this anyway, so if you want a NUL-terminated string, you know where to * put the NUL byte. */ -void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count); +void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count, + const uint16_t* utf16_in, size_t char_count); /* * The java.lang.String hashCode() algorithm. diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc index 94a6ea57e2..5239e40540 100644 --- a/runtime/utf_test.cc +++ b/runtime/utf_test.cc @@ -19,6 +19,7 @@ #include "common_runtime_test.h" #include "utf-inl.h" +#include <map> #include <vector> namespace art { @@ -48,7 +49,7 @@ static const uint8_t kAllSequences[] = { }; // A test string that contains a UTF-8 encoding of a surrogate pair -// (code point = U+10400) +// (code point = U+10400). static const uint8_t kSurrogateEncoding[] = { 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x80, @@ -66,13 +67,13 @@ TEST_F(UtfTest, GetUtf16FromUtf8) { EXPECT_EQ(0, GetTrailingUtf16Char(pair)); EXPECT_ARRAY_POSITION(1, ptr, start); - // Two byte sequence + // Two byte sequence. pair = GetUtf16FromUtf8(&ptr); EXPECT_EQ(0xa2, GetLeadingUtf16Char(pair)); EXPECT_EQ(0, GetTrailingUtf16Char(pair)); EXPECT_ARRAY_POSITION(3, ptr, start); - // Three byte sequence + // Three byte sequence. pair = GetUtf16FromUtf8(&ptr); EXPECT_EQ(0x20ac, GetLeadingUtf16Char(pair)); EXPECT_EQ(0, GetTrailingUtf16Char(pair)); @@ -84,7 +85,7 @@ TEST_F(UtfTest, GetUtf16FromUtf8) { EXPECT_EQ(0xdfe0, GetTrailingUtf16Char(pair)); EXPECT_ARRAY_POSITION(10, ptr, start); - // Null terminator + // Null terminator. pair = GetUtf16FromUtf8(&ptr); EXPECT_EQ(0, GetLeadingUtf16Char(pair)); EXPECT_EQ(0, GetTrailingUtf16Char(pair)); @@ -117,7 +118,8 @@ static void AssertConversion(const std::vector<uint16_t> input, ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size())); std::vector<uint8_t> output(expected.size()); - ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), &input[0], input.size()); + ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), expected.size(), + &input[0], input.size()); EXPECT_EQ(expected, output); } @@ -139,10 +141,10 @@ TEST_F(UtfTest, CountAndConvertUtf8Bytes) { AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f }); AssertConversion({ - 0xd802, 0xdc02, // Surrogate pair - 0xdef0, 0xdcff, // Three byte encodings - 0x0101, 0x0000, // Two byte encodings - 'p' , 'p' // One byte encoding + 0xd802, 0xdc02, // Surrogate pair. + 0xdef0, 0xdcff, // Three byte encodings. + 0x0101, 0x0000, // Two byte encodings. + 'p' , 'p' // One byte encoding. }, { 0xf0, 0x90, 0xa0, 0x82, 0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf, @@ -155,9 +157,225 @@ TEST_F(UtfTest, CountAndConvertUtf8Bytes_UnpairedSurrogate) { // Unpaired trailing surrogate at the end of input. AssertConversion({ 'h', 'e', 0xd801 }, { 'h', 'e', 0xed, 0xa0, 0x81 }); // Unpaired (or incorrectly paired) surrogates in the middle of the input. - AssertConversion({ 'h', 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 'e' }); - AssertConversion({ 'h', 0xd801, 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81, 'e' }); - AssertConversion({ 'h', 0xdc00, 0xdc00, 'e' }, { 'h', 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 'e' }); + const std::map<std::vector<uint16_t>, std::vector<uint8_t>> prefixes { + {{ 'h' }, { 'h' }}, + {{ 0 }, { 0xc0, 0x80 }}, + {{ 0x81 }, { 0xc2, 0x81 }}, + {{ 0x801 }, { 0xe0, 0xa0, 0x81 }}, + }; + const std::map<std::vector<uint16_t>, std::vector<uint8_t>> suffixes { + {{ 'e' }, { 'e' }}, + {{ 0 }, { 0xc0, 0x80 }}, + {{ 0x7ff }, { 0xdf, 0xbf }}, + {{ 0xffff }, { 0xef, 0xbf, 0xbf }}, + }; + const std::map<std::vector<uint16_t>, std::vector<uint8_t>> tests { + {{ 0xd801 }, { 0xed, 0xa0, 0x81 }}, + {{ 0xdc00 }, { 0xed, 0xb0, 0x80 }}, + {{ 0xd801, 0xd801 }, { 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81 }}, + {{ 0xdc00, 0xdc00 }, { 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80 }}, + }; + for (const auto& prefix : prefixes) { + const std::vector<uint16_t>& prefix_in = prefix.first; + const std::vector<uint8_t>& prefix_out = prefix.second; + for (const auto& test : tests) { + const std::vector<uint16_t>& test_in = test.first; + const std::vector<uint8_t>& test_out = test.second; + for (const auto& suffix : suffixes) { + const std::vector<uint16_t>& suffix_in = suffix.first; + const std::vector<uint8_t>& suffix_out = suffix.second; + std::vector<uint16_t> in = prefix_in; + in.insert(in.end(), test_in.begin(), test_in.end()); + in.insert(in.end(), suffix_in.begin(), suffix_in.end()); + std::vector<uint8_t> out = prefix_out; + out.insert(out.end(), test_out.begin(), test_out.end()); + out.insert(out.end(), suffix_out.begin(), suffix_out.end()); + AssertConversion(in, out); + } + } + } +} + +// Old versions of functions, here to compare answers with optimized versions. + +size_t CountModifiedUtf8Chars_reference(const char* utf8) { + size_t len = 0; + int ic; + while ((ic = *utf8++) != '\0') { + len++; + if ((ic & 0x80) == 0) { + // one-byte encoding + continue; + } + // two- or three-byte encoding + utf8++; + if ((ic & 0x20) == 0) { + // two-byte encoding + continue; + } + utf8++; + if ((ic & 0x10) == 0) { + // three-byte encoding + continue; + } + + // four-byte encoding: needs to be converted into a surrogate + // pair. + utf8++; + len++; + } + return len; +} + +static size_t CountUtf8Bytes_reference(const uint16_t* chars, size_t char_count) { + size_t result = 0; + while (char_count--) { + const uint16_t ch = *chars++; + if (ch > 0 && ch <= 0x7f) { + ++result; + } else if (ch >= 0xd800 && ch <= 0xdbff) { + if (char_count > 0) { + const uint16_t ch2 = *chars; + // If we find a properly paired surrogate, we emit it as a 4 byte + // UTF sequence. If we find an unpaired leading or trailing surrogate, + // we emit it as a 3 byte sequence like would have done earlier. + if (ch2 >= 0xdc00 && ch2 <= 0xdfff) { + chars++; + char_count--; + + result += 4; + } else { + result += 3; + } + } else { + // This implies we found an unpaired trailing surrogate at the end + // of a string. + result += 3; + } + } else if (ch > 0x7ff) { + result += 3; + } else { + result += 2; + } + } + return result; +} + +static void ConvertUtf16ToModifiedUtf8_reference(char* utf8_out, const uint16_t* utf16_in, + size_t char_count) { + while (char_count--) { + const uint16_t ch = *utf16_in++; + if (ch > 0 && ch <= 0x7f) { + *utf8_out++ = ch; + } else { + // Char_count == 0 here implies we've encountered an unpaired + // surrogate and we have no choice but to encode it as 3-byte UTF + // sequence. Note that unpaired surrogates can occur as a part of + // "normal" operation. + if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) { + const uint16_t ch2 = *utf16_in; + + // Check if the other half of the pair is within the expected + // range. If it isn't, we will have to emit both "halves" as + // separate 3 byte sequences. + if (ch2 >= 0xdc00 && ch2 <= 0xdfff) { + utf16_in++; + char_count--; + const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00; + *utf8_out++ = (code_point >> 18) | 0xf0; + *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80; + *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80; + *utf8_out++ = (code_point & 0x3f) | 0x80; + continue; + } + } + + if (ch > 0x07ff) { + // Three byte encoding. + *utf8_out++ = (ch >> 12) | 0xe0; + *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80; + *utf8_out++ = (ch & 0x3f) | 0x80; + } else /*(ch > 0x7f || ch == 0)*/ { + // Two byte encoding. + *utf8_out++ = (ch >> 6) | 0xc0; + *utf8_out++ = (ch & 0x3f) | 0x80; + } + } + } +} + +// Exhaustive test of converting a single code point to UTF-16, then UTF-8, and back again. + +static void codePointToSurrogatePair(uint32_t code_point, uint16_t &first, uint16_t &second) { + first = (code_point >> 10) + 0xd7c0; + second = (code_point & 0x03ff) + 0xdc00; +} + +static void testConversions(uint16_t *buf, int char_count) { + char bytes_test[8], bytes_reference[8]; + uint16_t out_buf_test[4], out_buf_reference[4]; + int byte_count_test, byte_count_reference; + int char_count_test, char_count_reference; + + // Calculate the number of utf-8 bytes for the utf-16 chars. + byte_count_reference = CountUtf8Bytes_reference(buf, char_count); + byte_count_test = CountUtf8Bytes(buf, char_count); + EXPECT_EQ(byte_count_reference, byte_count_test); + + // Convert the utf-16 string to utf-8 bytes. + ConvertUtf16ToModifiedUtf8_reference(bytes_reference, buf, char_count); + ConvertUtf16ToModifiedUtf8(bytes_test, byte_count_test, buf, char_count); + for (int i = 0; i < byte_count_test; ++i) { + EXPECT_EQ(bytes_reference[i], bytes_test[i]); + } + + // Calculate the number of utf-16 chars from the utf-8 bytes. + bytes_reference[byte_count_reference] = 0; // Reference function needs null termination. + char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference); + char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test); + EXPECT_EQ(char_count, char_count_reference); + EXPECT_EQ(char_count, char_count_test); + + // Convert the utf-8 bytes back to utf-16 chars. + // Does not need copied _reference version of the function because the original + // function with the old API is retained for debug/testing code. + ConvertModifiedUtf8ToUtf16(out_buf_reference, bytes_reference); + ConvertModifiedUtf8ToUtf16(out_buf_test, char_count_test, bytes_test, byte_count_test); + for (int i = 0; i < char_count_test; ++i) { + EXPECT_EQ(buf[i], out_buf_reference[i]); + EXPECT_EQ(buf[i], out_buf_test[i]); + } +} + +TEST_F(UtfTest, ExhaustiveBidirectionalCodePointCheck) { + for (int codePoint = 0; codePoint <= 0x10ffff; ++codePoint) { + uint16_t buf[4]; + if (codePoint <= 0xffff) { + if (codePoint >= 0xd800 && codePoint <= 0xdfff) { + // According to the Unicode standard, no character will ever + // be assigned to these code points, and they can not be encoded + // into either utf-16 or utf-8. + continue; + } + buf[0] = 'h'; + buf[1] = codePoint; + buf[2] = 'e'; + testConversions(buf, 2); + testConversions(buf, 3); + testConversions(buf + 1, 1); + testConversions(buf + 1, 2); + } else { + buf[0] = 'h'; + codePointToSurrogatePair(codePoint, buf[1], buf[2]); + buf[3] = 'e'; + testConversions(buf, 2); + testConversions(buf, 3); + testConversions(buf, 4); + testConversions(buf + 1, 1); + testConversions(buf + 1, 2); + testConversions(buf + 1, 3); + } + } } } // namespace art diff --git a/runtime/utils.h b/runtime/utils.h index 3690f86a80..8b7941a1b2 100644 --- a/runtime/utils.h +++ b/runtime/utils.h @@ -18,9 +18,11 @@ #define ART_RUNTIME_UTILS_H_ #include <pthread.h> +#include <stdlib.h> #include <limits> #include <memory> +#include <random> #include <string> #include <type_traits> #include <vector> @@ -350,6 +352,26 @@ void ParseDouble(const std::string& option, double* parsed_value, UsageFn Usage); +#if defined(__BIONIC__) +struct Arc4RandomGenerator { + typedef uint32_t result_type; + static constexpr uint32_t min() { return std::numeric_limits<uint32_t>::min(); } + static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); } + uint32_t operator() () { return arc4random(); } +}; +using RNG = Arc4RandomGenerator; +#else +using RNG = std::random_device; +#endif + +template <typename T> +T GetRandomNumber(T min, T max) { + CHECK_LT(min, max); + std::uniform_int_distribution<T> dist(min, max); + RNG rng; + return dist(rng); +} + } // namespace art #endif // ART_RUNTIME_UTILS_H_ diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java index d5fed2adfe..6151fc10f2 100644 --- a/test/458-checker-instruction-simplification/src/Main.java +++ b/test/458-checker-instruction-simplification/src/Main.java @@ -389,24 +389,6 @@ public class Main { return arg << 0; } - /// CHECK-START: int Main.Shl1(int) instruction_simplifier (before) - /// CHECK-DAG: <<Arg:i\d+>> ParameterValue - /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 - /// CHECK-DAG: <<Shl:i\d+>> Shl [<<Arg>>,<<Const1>>] - /// CHECK-DAG: Return [<<Shl>>] - - /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after) - /// CHECK-DAG: <<Arg:i\d+>> ParameterValue - /// CHECK-DAG: <<Add:i\d+>> Add [<<Arg>>,<<Arg>>] - /// CHECK-DAG: Return [<<Add>>] - - /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after) - /// CHECK-NOT: Shl - - public static int Shl1(int arg) { - return arg << 1; - } - /// CHECK-START: long Main.Shr0(long) instruction_simplifier (before) /// CHECK-DAG: <<Arg:j\d+>> ParameterValue /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 @@ -1245,7 +1227,6 @@ public class Main { return arg * 9; } - /** * Test strength reduction of factors of the form (2^n - 1). */ @@ -1265,6 +1246,91 @@ public class Main { return arg * 31; } + /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier (before) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Field:z\d+>> StaticFieldGet + /// CHECK-DAG: <<NE:z\d+>> NotEqual [<<Field>>,<<Const1>>] + /// CHECK-DAG: If [<<NE>>] + + /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier (after) + /// CHECK-DAG: <<Field:z\d+>> StaticFieldGet + /// CHECK-DAG: <<Not:z\d+>> BooleanNot [<<Field>>] + /// CHECK-DAG: If [<<Not>>] + + public static int booleanFieldNotEqualOne() { + return (booleanField == true) ? 13 : 54; + } + + /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 + /// CHECK-DAG: <<Field:z\d+>> StaticFieldGet + /// CHECK-DAG: <<EQ:z\d+>> Equal [<<Field>>,<<Const0>>] + /// CHECK-DAG: If [<<EQ>>] + + /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier (after) + /// CHECK-DAG: <<Field:z\d+>> StaticFieldGet + /// CHECK-DAG: <<Not:z\d+>> BooleanNot [<<Field>>] + /// CHECK-DAG: If [<<Not>>] + + public static int booleanFieldEqualZero() { + return (booleanField != false) ? 13 : 54; + } + + /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 + /// CHECK-DAG: <<GT:z\d+>> GreaterThan [<<Arg>>,<<Const42>>] + /// CHECK-DAG: <<NE:z\d+>> NotEqual [<<GT>>,<<Const1>>] + /// CHECK-DAG: If [<<NE>>] + + /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 + /// CHECK-DAG: If [<<LE:z\d+>>] + /// CHECK-DAG: <<LE>> LessThanOrEqual [<<Arg>>,<<Const42>>] + // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions. + + public static int intConditionNotEqualOne(int i) { + return ((i > 42) == true) ? 13 : 54; + } + + /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 + /// CHECK-DAG: <<GT:z\d+>> GreaterThan [<<Arg>>,<<Const42>>] + /// CHECK-DAG: <<EQ:z\d+>> Equal [<<GT>>,<<Const0>>] + /// CHECK-DAG: If [<<EQ>>] + + /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 + /// CHECK-DAG: If [<<LE:z\d+>>] + /// CHECK-DAG: <<LE>> LessThanOrEqual [<<Arg>>,<<Const42>>] + // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions. + + public static int intConditionEqualZero(int i) { + return ((i > 42) != false) ? 13 : 54; + } + + // Test that conditions on float/double are not flipped. + + /// CHECK-START: int Main.floatConditionNotEqualOne(float) register (before) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: NotEqual [{{i\d+}},<<Const1>>] + + public static int floatConditionNotEqualOne(float f) { + return ((f > 42.0f) == true) ? 13 : 54; + } + + /// CHECK-START: int Main.doubleConditionEqualZero(double) register (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 + /// CHECK-DAG: Equal [{{i\d+}},<<Const0>>] + + public static int doubleConditionEqualZero(double d) { + return ((d > 42.0) != false) ? 13 : 54; + } public static void main(String[] args) { int arg = 123456; @@ -1314,7 +1380,6 @@ public class Main { assertDoubleEquals(Div2(150.0), 75.0); assertFloatEquals(DivMP25(100.0f), -400.0f); assertDoubleEquals(DivMP25(150.0), -600.0); - assertLongEquals(Shl1(100), 200); assertIntEquals(UShr28And15(0xc1234567), 0xc); assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL); assertIntEquals(UShr28And7(0xc1234567), 0x4); @@ -1333,5 +1398,22 @@ public class Main { assertLongEquals(62, mulPow2Minus1(2)); assertLongEquals(3100, mulPow2Minus1(100)); assertLongEquals(382695, mulPow2Minus1(12345)); - } + + booleanField = false; + assertIntEquals(booleanFieldNotEqualOne(), 54); + assertIntEquals(booleanFieldEqualZero(), 54); + booleanField = true; + assertIntEquals(booleanFieldNotEqualOne(), 13); + assertIntEquals(booleanFieldEqualZero(), 13); + assertIntEquals(intConditionNotEqualOne(6), 54); + assertIntEquals(intConditionNotEqualOne(43), 13); + assertIntEquals(intConditionEqualZero(6), 54); + assertIntEquals(intConditionEqualZero(43), 13); + assertIntEquals(floatConditionNotEqualOne(6.0f), 54); + assertIntEquals(floatConditionNotEqualOne(43.0f), 13); + assertIntEquals(doubleConditionEqualZero(6.0), 54); + assertIntEquals(doubleConditionEqualZero(43.0), 13); + } + + public static boolean booleanField; } diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java index 13c4722bc4..17e88ceb21 100644 --- a/test/530-checker-lse/src/Main.java +++ b/test/530-checker-lse/src/Main.java @@ -136,6 +136,9 @@ public class Main { // A new allocation shouldn't alias with pre-existing values. static int test3(TestClass obj) { + // Do an allocation here to avoid the HLoadClass and HClinitCheck + // at the second allocation. + new TestClass(); obj.i = 1; obj.next.j = 2; TestClass obj2 = new TestClass(); diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java index 12f0380df0..f791adfd9a 100644 --- a/test/538-checker-embed-constants/src/Main.java +++ b/test/538-checker-embed-constants/src/Main.java @@ -260,26 +260,43 @@ public class Main { return arg ^ 0xf00000000000000fL; } + /// CHECK-START-ARM: long Main.shl1(long) disassembly (after) + /// CHECK: lsls{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1 + /// CHECK: adc{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}} + + /// CHECK-START-ARM: long Main.shl1(long) disassembly (after) + /// CHECK-NOT: lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + + /// CHECK-START-X86: long Main.shl1(long) disassembly (after) + /// CHECK: add + /// CHECK: adc + + /// CHECK-START-X86: long Main.shl1(long) disassembly (after) + /// CHECK-NOT: shl + + public static long shl1(long arg) { + return arg << 1; + } + /// CHECK-START-ARM: long Main.shl2(long) disassembly (after) - /// CHECK: lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #2 + /// CHECK: lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #2 /// CHECK: orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #30 - /// CHECK-DAG: lsl{{s?|.w}} {{r\d+}}, <<low>>, #2 + /// CHECK: lsl{{s?|\.w}} {{r\d+}}, <<low>>, #2 /// CHECK-START-ARM: long Main.shl2(long) disassembly (after) - /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shl2(long arg) { - // Note: Shl(x, 1) is transformed to Add(x, x), so test Shl(x, 2). return arg << 2; } /// CHECK-START-ARM: long Main.shl31(long) disassembly (after) - /// CHECK: lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #31 + /// CHECK: lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #31 /// CHECK: orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #1 - /// CHECK: lsl{{s?|.w}} {{r\d+}}, <<low>>, #31 + /// CHECK: lsl{{s?|\.w}} {{r\d+}}, <<low>>, #31 /// CHECK-START-ARM: long Main.shl31(long) disassembly (after) - /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shl31(long arg) { return arg << 31; @@ -287,114 +304,136 @@ public class Main { /// CHECK-START-ARM: long Main.shl32(long) disassembly (after) /// CHECK-DAG: mov {{r\d+}}, {{r\d+}} - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.shl32(long) disassembly (after) - /// CHECK-NOT: lsl{{s?|.w}} + /// CHECK-NOT: lsl{{s?|\.w}} public static long shl32(long arg) { return arg << 32; } /// CHECK-START-ARM: long Main.shl33(long) disassembly (after) - /// CHECK-DAG: lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1 - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.shl33(long) disassembly (after) - /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shl33(long arg) { return arg << 33; } /// CHECK-START-ARM: long Main.shl63(long) disassembly (after) - /// CHECK-DAG: lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31 - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.shl63(long) disassembly (after) - /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shl63(long arg) { return arg << 63; } /// CHECK-START-ARM: long Main.shr1(long) disassembly (after) - /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1 - /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31 - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high>>, #1 + /// CHECK: asrs{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1 + /// CHECK: mov.w {{r\d+}}, {{r\d+}}, rrx /// CHECK-START-ARM: long Main.shr1(long) disassembly (after) - /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shr1(long arg) { return arg >> 1; } + /// CHECK-START-ARM: long Main.shr2(long) disassembly (after) + /// CHECK: lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2 + /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high>>, #2 + + /// CHECK-START-ARM: long Main.shr2(long) disassembly (after) + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + + public static long shr2(long arg) { + return arg >> 2; + } + /// CHECK-START-ARM: long Main.shr31(long) disassembly (after) - /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31 + /// CHECK: lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31 /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1 - /// CHECK: asr{{s?|.w}} {{r\d+}}, <<high>>, #31 + /// CHECK: asr{{s?|\.w}} {{r\d+}}, <<high>>, #31 /// CHECK-START-ARM: long Main.shr31(long) disassembly (after) - /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shr31(long arg) { return arg >> 31; } /// CHECK-START-ARM: long Main.shr32(long) disassembly (after) - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31 /// CHECK-DAG: mov {{r\d+}}, <<high>> /// CHECK-START-ARM: long Main.shr32(long) disassembly (after) - /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} - /// CHECK-NOT: lsr{{s?|.w}} + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsr{{s?|\.w}} public static long shr32(long arg) { return arg >> 32; } /// CHECK-START-ARM: long Main.shr33(long) disassembly (after) - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1 - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high>>, #31 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high>>, #31 /// CHECK-START-ARM: long Main.shr33(long) disassembly (after) - /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shr33(long arg) { return arg >> 33; } /// CHECK-START-ARM: long Main.shr63(long) disassembly (after) - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31 - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high>>, #31 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high>>, #31 /// CHECK-START-ARM: long Main.shr63(long) disassembly (after) - /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shr63(long arg) { return arg >> 63; } /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after) - /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1 - /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31 - /// CHECK-DAG: lsr{{s?|.w}} {{r\d+}}, <<high>>, #1 + /// CHECK: lsrs{{|.w}} {{r\d+}}, {{r\d+}}, #1 + /// CHECK: mov.w {{r\d+}}, {{r\d+}}, rrx /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after) - /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long ushr1(long arg) { return arg >>> 1; } + /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after) + /// CHECK: lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2 + /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30 + /// CHECK-DAG: lsr{{s?|\.w}} {{r\d+}}, <<high>>, #2 + + /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after) + /// CHECK-NOT: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + + public static long ushr2(long arg) { + return arg >>> 2; + } + /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after) - /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31 + /// CHECK: lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31 /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1 - /// CHECK: lsr{{s?|.w}} {{r\d+}}, <<high>>, #31 + /// CHECK: lsr{{s?|\.w}} {{r\d+}}, <<high>>, #31 /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after) - /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long ushr31(long arg) { return arg >>> 31; @@ -402,32 +441,32 @@ public class Main { /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after) /// CHECK-DAG: mov {{r\d+}}, {{r\d+}} - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after) - /// CHECK-NOT: lsr{{s?|.w}} + /// CHECK-NOT: lsr{{s?|\.w}} public static long ushr32(long arg) { return arg >>> 32; } /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after) - /// CHECK-DAG: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #1 - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #1 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after) - /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long ushr33(long arg) { return arg >>> 33; } /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after) - /// CHECK-DAG: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #31 - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #31 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after) - /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long ushr63(long arg) { return arg >>> 63; @@ -485,11 +524,13 @@ public class Main { assertLongEquals(14, addM1(7)); + assertLongEquals(shl1(longArg), 0x2468acf10eca8642L); assertLongEquals(shl2(longArg), 0x48d159e21d950c84L); assertLongEquals(shl31(longArg), 0x43b2a19080000000L); assertLongEquals(shl32(longArg), 0x8765432100000000L); assertLongEquals(shl33(longArg), 0x0eca864200000000L); assertLongEquals(shl63(longArg), 0x8000000000000000L); + assertLongEquals(shl1(~longArg), 0xdb97530ef13579bcL); assertLongEquals(shl2(~longArg), 0xb72ea61de26af378L); assertLongEquals(shl31(~longArg), 0xbc4d5e6f00000000L); assertLongEquals(shl32(~longArg), 0x789abcde00000000L); @@ -497,22 +538,26 @@ public class Main { assertLongEquals(shl63(~longArg), 0x0000000000000000L); assertLongEquals(shr1(longArg), 0x091a2b3c43b2a190L); + assertLongEquals(shr2(longArg), 0x048d159e21d950c8L); assertLongEquals(shr31(longArg), 0x000000002468acf1L); assertLongEquals(shr32(longArg), 0x0000000012345678L); assertLongEquals(shr33(longArg), 0x00000000091a2b3cL); assertLongEquals(shr63(longArg), 0x0000000000000000L); assertLongEquals(shr1(~longArg), 0xf6e5d4c3bc4d5e6fL); + assertLongEquals(shr2(~longArg), 0xfb72ea61de26af37L); assertLongEquals(shr31(~longArg), 0xffffffffdb97530eL); assertLongEquals(shr32(~longArg), 0xffffffffedcba987L); assertLongEquals(shr33(~longArg), 0xfffffffff6e5d4c3L); assertLongEquals(shr63(~longArg), 0xffffffffffffffffL); assertLongEquals(ushr1(longArg), 0x091a2b3c43b2a190L); + assertLongEquals(ushr2(longArg), 0x048d159e21d950c8L); assertLongEquals(ushr31(longArg), 0x000000002468acf1L); assertLongEquals(ushr32(longArg), 0x0000000012345678L); assertLongEquals(ushr33(longArg), 0x00000000091a2b3cL); assertLongEquals(ushr63(longArg), 0x0000000000000000L); assertLongEquals(ushr1(~longArg), 0x76e5d4c3bc4d5e6fL); + assertLongEquals(ushr2(~longArg), 0x3b72ea61de26af37L); assertLongEquals(ushr31(~longArg), 0x00000001db97530eL); assertLongEquals(ushr32(~longArg), 0x00000000edcba987L); assertLongEquals(ushr33(~longArg), 0x0000000076e5d4c3L); diff --git a/test/543-env-long-ref/env_long_ref.cc b/test/543-env-long-ref/env_long_ref.cc new file mode 100644 index 0000000000..41083235d9 --- /dev/null +++ b/test/543-env-long-ref/env_long_ref.cc @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arch/context.h" +#include "art_method-inl.h" +#include "jni.h" +#include "scoped_thread_state_change.h" +#include "stack.h" +#include "thread.h" + +namespace art { + +namespace { + +class TestVisitor : public StackVisitor { + public: + TestVisitor(const ScopedObjectAccess& soa, Context* context, jobject expected_value) + SHARED_REQUIRES(Locks::mutator_lock_) + : StackVisitor(soa.Self(), context, StackVisitor::StackWalkKind::kIncludeInlinedFrames), + expected_value_(expected_value), + found_(false), + soa_(soa) {} + + bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) { + ArtMethod* m = GetMethod(); + std::string m_name(m->GetName()); + + if (m_name == "testCase") { + found_ = true; + uint32_t value = 0; + CHECK(GetVReg(m, 1, kReferenceVReg, &value)); + CHECK_EQ(reinterpret_cast<mirror::Object*>(value), + soa_.Decode<mirror::Object*>(expected_value_)); + } + return true; + } + + jobject expected_value_; + bool found_; + const ScopedObjectAccess& soa_; +}; + +} // namespace + +extern "C" JNIEXPORT void JNICALL Java_Main_lookForMyRegisters(JNIEnv*, jclass, jobject value) { + ScopedObjectAccess soa(Thread::Current()); + std::unique_ptr<Context> context(Context::Create()); + TestVisitor visitor(soa, context.get(), value); + visitor.WalkStack(); + CHECK(visitor.found_); +} + +} // namespace art diff --git a/test/543-env-long-ref/expected.txt b/test/543-env-long-ref/expected.txt new file mode 100644 index 0000000000..89f155b8c9 --- /dev/null +++ b/test/543-env-long-ref/expected.txt @@ -0,0 +1,2 @@ +JNI_OnLoad called +42 diff --git a/test/543-env-long-ref/info.txt b/test/543-env-long-ref/info.txt new file mode 100644 index 0000000000..6a4253364e --- /dev/null +++ b/test/543-env-long-ref/info.txt @@ -0,0 +1,3 @@ +Regression test for optimizing that used to not return +the right dex register in debuggable when a new value +was overwriting the high dex register of a wide value. diff --git a/test/543-env-long-ref/smali/TestCase.smali b/test/543-env-long-ref/smali/TestCase.smali new file mode 100644 index 0000000000..608d6eb96a --- /dev/null +++ b/test/543-env-long-ref/smali/TestCase.smali @@ -0,0 +1,26 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTestCase; +.super Ljava/lang/Object; + +.method public static testCase()I + .registers 5 + const-wide/16 v0, 0x1 + invoke-static {v0, v1}, LMain;->$noinline$allocate(J)LMain; + move-result-object v1 + invoke-static {v1}, LMain;->lookForMyRegisters(LMain;)V + iget v2, v1, LMain;->field:I + return v2 +.end method diff --git a/test/543-env-long-ref/src/Main.java b/test/543-env-long-ref/src/Main.java new file mode 100644 index 0000000000..e723789ce2 --- /dev/null +++ b/test/543-env-long-ref/src/Main.java @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; + +public class Main { + // Workaround for b/18051191. + class InnerClass {} + + public static void main(String[] args) throws Throwable { + System.loadLibrary(args[0]); + Class<?> c = Class.forName("TestCase"); + Method m = c.getMethod("testCase"); + Integer a = (Integer)m.invoke(null, (Object[]) null); + System.out.println(a); + } + + public static Main $noinline$allocate(long a) { + try { + return new Main(); + } catch (Exception e) { + throw new Error(e); + } + } + + public static native void lookForMyRegisters(Main m); + + int field = 42; +} diff --git a/test/550-checker-multiply-accumulate/expected.txt b/test/550-checker-multiply-accumulate/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/550-checker-multiply-accumulate/expected.txt diff --git a/test/550-checker-multiply-accumulate/info.txt b/test/550-checker-multiply-accumulate/info.txt new file mode 100644 index 0000000000..10e998cb18 --- /dev/null +++ b/test/550-checker-multiply-accumulate/info.txt @@ -0,0 +1 @@ +Test the merging of instructions into the shifter operand on arm64. diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java new file mode 100644 index 0000000000..2d0688d57e --- /dev/null +++ b/test/550-checker-multiply-accumulate/src/Main.java @@ -0,0 +1,234 @@ +/* +* Copyright (C) 2015 The Android Open Source Project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +public class Main { + + // A dummy value to defeat inlining of these routines. + static boolean doThrow = false; + + public static void assertIntEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertLongEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + /** + * Test basic merging of `MUL+ADD` into `MULADD`. + */ + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Left:i\d+>> ParameterValue + /// CHECK: <<Right:i\d+>> ParameterValue + /// CHECK: <<Mul:i\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Add:i\d+>> Add [<<Acc>>,<<Mul>>] + /// CHECK: Return [<<Add>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Left:i\d+>> ParameterValue + /// CHECK: <<Right:i\d+>> ParameterValue + /// CHECK: <<MulAdd:i\d+>> Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add + /// CHECK: Return [<<MulAdd>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Mul + /// CHECK-NOT: Add + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after) + /// CHECK: madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}} + + public static int $opt$noinline$mulAdd(int acc, int left, int right) { + if (doThrow) throw new Error(); + return acc + left * right; + } + + /** + * Test basic merging of `MUL+SUB` into `MULSUB`. + */ + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Left:j\d+>> ParameterValue + /// CHECK: <<Right:j\d+>> ParameterValue + /// CHECK: <<Mul:j\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Sub:j\d+>> Sub [<<Acc>>,<<Mul>>] + /// CHECK: Return [<<Sub>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Left:j\d+>> ParameterValue + /// CHECK: <<Right:j\d+>> ParameterValue + /// CHECK: <<MulSub:j\d+>> Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub + /// CHECK: Return [<<MulSub>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Mul + /// CHECK-NOT: Sub + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after) + /// CHECK: msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}} + + public static long $opt$noinline$mulSub(long acc, long left, long right) { + if (doThrow) throw new Error(); + return acc - left * right; + } + + /** + * Test that we do not create a multiply-accumulate instruction when there + * are other uses of the multiplication that cannot merge it. + */ + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Left:i\d+>> ParameterValue + /// CHECK: <<Right:i\d+>> ParameterValue + /// CHECK: <<Mul:i\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Add:i\d+>> Add [<<Acc>>,<<Mul>>] + /// CHECK: <<Or:i\d+>> Or [<<Mul>>,<<Add>>] + /// CHECK: Return [<<Or>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Left:i\d+>> ParameterValue + /// CHECK: <<Right:i\d+>> ParameterValue + /// CHECK: <<Mul:i\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Add:i\d+>> Add [<<Acc>>,<<Mul>>] + /// CHECK: <<Or:i\d+>> Or [<<Mul>>,<<Add>>] + /// CHECK: Return [<<Or>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64MultiplyAccumulate + + public static int $opt$noinline$multipleUses1(int acc, int left, int right) { + if (doThrow) throw new Error(); + int temp = left * right; + return temp | (acc + temp); + } + + /** + * Test that we do not create a multiply-accumulate instruction even when all + * uses of the multiplication can merge it. + */ + + /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Left:j\d+>> ParameterValue + /// CHECK: <<Right:j\d+>> ParameterValue + /// CHECK: <<Mul:j\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Add:j\d+>> Add [<<Acc>>,<<Mul>>] + /// CHECK: <<Sub:j\d+>> Sub [<<Acc>>,<<Mul>>] + /// CHECK: <<Res:j\d+>> Add [<<Add>>,<<Sub>>] + /// CHECK: Return [<<Res>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Left:j\d+>> ParameterValue + /// CHECK: <<Right:j\d+>> ParameterValue + /// CHECK: <<Mul:j\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Add:j\d+>> Add [<<Acc>>,<<Mul>>] + /// CHECK: <<Sub:j\d+>> Sub [<<Acc>>,<<Mul>>] + /// CHECK: <<Res:j\d+>> Add [<<Add>>,<<Sub>>] + /// CHECK: Return [<<Res>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64MultiplyAccumulate + + + public static long $opt$noinline$multipleUses2(long acc, long left, long right) { + if (doThrow) throw new Error(); + long temp = left * right; + return (acc + temp) + (acc - temp); + } + + + /** + * Test the interpretation of `a * (b + 1)` as `a + (a * b)`. + */ + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Var:i\d+>> ParameterValue + /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Add:i\d+>> Add [<<Var>>,<<Const1>>] + /// CHECK: <<Mul:i\d+>> Mul [<<Acc>>,<<Add>>] + /// CHECK: Return [<<Mul>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Var:i\d+>> ParameterValue + /// CHECK: <<MulAdd:i\d+>> Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add + /// CHECK: Return [<<MulAdd>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Mul + /// CHECK-NOT: Add + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after) + /// CHECK: madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}} + + public static int $opt$noinline$mulPlusOne(int acc, int var) { + if (doThrow) throw new Error(); + return acc * (var + 1); + } + + + /** + * Test the interpretation of `a * (1 - b)` as `a - (a * b)`. + */ + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Var:j\d+>> ParameterValue + /// CHECK: <<Const1:j\d+>> LongConstant 1 + /// CHECK: <<Sub:j\d+>> Sub [<<Const1>>,<<Var>>] + /// CHECK: <<Mul:j\d+>> Mul [<<Acc>>,<<Sub>>] + /// CHECK: Return [<<Mul>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Var:j\d+>> ParameterValue + /// CHECK: <<MulSub:j\d+>> Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub + /// CHECK: Return [<<MulSub>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Mul + /// CHECK-NOT: Sub + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after) + /// CHECK: msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}} + + public static long $opt$noinline$mulMinusOne(long acc, long var) { + if (doThrow) throw new Error(); + return acc * (1 - var); + } + + + public static void main(String[] args) { + assertIntEquals(7, $opt$noinline$mulAdd(1, 2, 3)); + assertLongEquals(-26, $opt$noinline$mulSub(4, 5, 6)); + assertIntEquals(79, $opt$noinline$multipleUses1(7, 8, 9)); + assertLongEquals(20, $opt$noinline$multipleUses2(10, 11, 12)); + assertIntEquals(195, $opt$noinline$mulPlusOne(13, 14)); + assertLongEquals(-225, $opt$noinline$mulMinusOne(15, 16)); + } +} diff --git a/test/550-checker-regression-wide-store/expected.txt b/test/550-checker-regression-wide-store/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/550-checker-regression-wide-store/expected.txt diff --git a/test/550-checker-regression-wide-store/info.txt b/test/550-checker-regression-wide-store/info.txt new file mode 100644 index 0000000000..6cf04bc35a --- /dev/null +++ b/test/550-checker-regression-wide-store/info.txt @@ -0,0 +1,3 @@ +Test an SsaBuilder regression where storing into the high vreg of a pair +would not invalidate the low vreg. The resulting environment would generate +an incorrect stack map, causing deopt and try/catch to use a wrong location.
\ No newline at end of file diff --git a/test/550-checker-regression-wide-store/smali/TestCase.smali b/test/550-checker-regression-wide-store/smali/TestCase.smali new file mode 100644 index 0000000000..7974d56a8f --- /dev/null +++ b/test/550-checker-regression-wide-store/smali/TestCase.smali @@ -0,0 +1,82 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTestCase; +.super Ljava/lang/Object; + +.method public static $noinline$throw()V + .registers 1 + new-instance v0, Ljava/lang/Exception; + invoke-direct {v0}, Ljava/lang/Exception;-><init>()V + throw v0 +.end method + +# Test storing into the high vreg of a wide pair. This scenario has runtime +# behaviour implications so we run it from Main.main. + +## CHECK-START: int TestCase.invalidateLow(long) ssa_builder (after) +## CHECK-DAG: <<Cst0:i\d+>> IntConstant 0 +## CHECK-DAG: <<Arg:j\d+>> ParameterValue +## CHECK-DAG: <<Cast:i\d+>> TypeConversion [<<Arg>>] +## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[_,<<Cst0>>,<<Arg>>,_]] +## CHECK-DAG: InvokeStaticOrDirect method_name:TestCase.$noinline$throw env:[[_,<<Cast>>,<<Arg>>,_]] + +.method public static invalidateLow(J)I + .registers 4 + + const/4 v1, 0x0 + + :try_start + invoke-static {}, Ljava/lang/System;->nanoTime()J + move-wide v0, p0 + long-to-int v1, v0 + invoke-static {}, LTestCase;->$noinline$throw()V + :try_end + .catchall {:try_start .. :try_end} :catchall + + :catchall + return v1 + +.end method + +# Test that storing a wide invalidates the value in the high vreg. This +# cannot be detected from runtime so we only test the environment with Checker. + +## CHECK-START: void TestCase.invalidateHigh1(long) ssa_builder (after) +## CHECK-DAG: <<Arg:j\d+>> ParameterValue +## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,<<Arg>>,_]] + +.method public static invalidateHigh1(J)V + .registers 4 + + const/4 v1, 0x0 + move-wide v0, p0 + invoke-static {}, Ljava/lang/System;->nanoTime()J + return-void + +.end method + +## CHECK-START: void TestCase.invalidateHigh2(long) ssa_builder (after) +## CHECK-DAG: <<Arg:j\d+>> ParameterValue +## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,_,<<Arg>>,_]] + +.method public static invalidateHigh2(J)V + .registers 5 + + move-wide v1, p0 + move-wide v0, p0 + invoke-static {}, Ljava/lang/System;->nanoTime()J + return-void + +.end method diff --git a/test/550-checker-regression-wide-store/src/Main.java b/test/550-checker-regression-wide-store/src/Main.java new file mode 100644 index 0000000000..9b502df632 --- /dev/null +++ b/test/550-checker-regression-wide-store/src/Main.java @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; + +public class Main { + + // Workaround for b/18051191. + class InnerClass {} + + private static int runTestCase(String name, long arg) throws Exception { + Class<?> c = Class.forName("TestCase"); + Method m = c.getMethod(name, long.class); + int result = (Integer) m.invoke(null, arg); + return result; + } + + private static void assertEquals(int expected, int actual) { + if (expected != actual) { + throw new Error("Wrong result: " + expected + " != " + actual); + } + } + + public static void main(String[] args) throws Exception { + assertEquals(42, runTestCase("invalidateLow", 42L)); + } +} diff --git a/test/550-new-instance-clinit/expected.txt b/test/550-new-instance-clinit/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/550-new-instance-clinit/expected.txt diff --git a/test/550-new-instance-clinit/info.txt b/test/550-new-instance-clinit/info.txt new file mode 100644 index 0000000000..c5fa3c7cc9 --- /dev/null +++ b/test/550-new-instance-clinit/info.txt @@ -0,0 +1,3 @@ +Regression test for optimizing which used to treat +HNewInstance as not having side effects even though it +could invoke a clinit method. diff --git a/test/550-new-instance-clinit/src/Main.java b/test/550-new-instance-clinit/src/Main.java new file mode 100644 index 0000000000..45e259ef2c --- /dev/null +++ b/test/550-new-instance-clinit/src/Main.java @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void main(String[] args) { + int foo = Main.a; + new Bar(); + foo = Main.a; + if (foo != 43) { + throw new Error("Expected 43, got " + foo); + } + } + static int a = 42; +} + +class Bar { + static { + Main.a++; + } +} diff --git a/test/551-checker-clinit/expected.txt b/test/551-checker-clinit/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/551-checker-clinit/expected.txt diff --git a/test/551-checker-clinit/info.txt b/test/551-checker-clinit/info.txt new file mode 100644 index 0000000000..4d54bb5193 --- /dev/null +++ b/test/551-checker-clinit/info.txt @@ -0,0 +1 @@ +Checker test to ensure we optimize aways HClinitChecks as expected. diff --git a/test/551-checker-clinit/src/Main.java b/test/551-checker-clinit/src/Main.java new file mode 100644 index 0000000000..5ec304808b --- /dev/null +++ b/test/551-checker-clinit/src/Main.java @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + public static void main(String[] args) {} + public static int foo = 42; + + /// CHECK-START: void Main.inlinedMethod() builder (after) + /// CHECK: ClinitCheck + + /// CHECK-START: void Main.inlinedMethod() inliner (after) + /// CHECK: ClinitCheck + /// CHECK-NOT: ClinitCheck + /// CHECK-NOT: InvokeStaticOrDirect + public void inlinedMethod() { + SubSub.bar(); + } +} + +class Sub extends Main { + /// CHECK-START: void Sub.invokeSuperClass() builder (after) + /// CHECK-NOT: ClinitCheck + public void invokeSuperClass() { + int a = Main.foo; + } + + /// CHECK-START: void Sub.invokeItself() builder (after) + /// CHECK-NOT: ClinitCheck + public void invokeItself() { + int a = foo; + } + + /// CHECK-START: void Sub.invokeSubClass() builder (after) + /// CHECK: ClinitCheck + public void invokeSubClass() { + int a = SubSub.foo; + } + + public static int foo = 42; +} + +class SubSub { + public static void bar() { + int a = Main.foo; + } + public static int foo = 42; +} diff --git a/test/551-checker-shifter-operand/build b/test/551-checker-shifter-operand/build new file mode 100644 index 0000000000..18e8c59e91 --- /dev/null +++ b/test/551-checker-shifter-operand/build @@ -0,0 +1,212 @@ +#!/bin/bash +# +# Copyright (C) 2008 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# This is an almost exact copy of `art/test/etc/default-build`. Only the parsing +# of `dx` option has been overriden. + +# Stop if something fails. +set -e + +# Set default values for directories. +if [ -d smali ]; then + HAS_SMALI=true +else + HAS_SMALI=false +fi + +if [ -d src ]; then + HAS_SRC=true +else + HAS_SRC=false +fi + +if [ -d src2 ]; then + HAS_SRC2=true +else + HAS_SRC2=false +fi + +if [ -d src-multidex ]; then + HAS_SRC_MULTIDEX=true +else + HAS_SRC_MULTIDEX=false +fi + +if [ -d src-ex ]; then + HAS_SRC_EX=true +else + HAS_SRC_EX=false +fi + +DX_FLAGS="" +SKIP_DX_MERGER="false" +EXPERIMENTAL="" + +# Setup experimental flag mappings in a bash associative array. +declare -A JACK_EXPERIMENTAL_ARGS +JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8" +JACK_EXPERIMENTAL_ARGS["lambdas"]="-D jack.java.source.version=1.8" + +while true; do + if [ "x$1" = "x--dx-option" ]; then + shift + option="$1" + # Make sure we run this test *with* `dx` optimizations. + if [ "x$option" != "x--no-optimize" ]; then + DX_FLAGS="${DX_FLAGS} $option" + fi + shift + elif [ "x$1" = "x--jvm" ]; then + shift + elif [ "x$1" = "x--no-src" ]; then + HAS_SRC=false + shift + elif [ "x$1" = "x--no-src2" ]; then + HAS_SRC2=false + shift + elif [ "x$1" = "x--no-src-multidex" ]; then + HAS_SRC_MULTIDEX=false + shift + elif [ "x$1" = "x--no-src-ex" ]; then + HAS_SRC_EX=false + shift + elif [ "x$1" = "x--no-smali" ]; then + HAS_SMALI=false + shift + elif [ "x$1" = "x--experimental" ]; then + shift + EXPERIMENTAL="${EXPERIMENTAL} $1" + shift + elif expr "x$1" : "x--" >/dev/null 2>&1; then + echo "unknown $0 option: $1" 1>&2 + exit 1 + else + break + fi +done + +# Add args from the experimental mappings. +for experiment in ${EXPERIMENTAL}; do + JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}" +done + +if [ -e classes.dex ]; then + zip $TEST_NAME.jar classes.dex + exit 0 +fi + +if ! [ "${HAS_SRC}" = "true" ] && ! [ "${HAS_SRC2}" = "true" ]; then + # No src directory? Then forget about trying to run dx. + SKIP_DX_MERGER="true" +fi + +if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then + # Jack does not support this configuration unless we specify how to partition the DEX file + # with a .jpp file. + USE_JACK="false" +fi + +if [ ${USE_JACK} = "true" ]; then + # Jack toolchain + if [ "${HAS_SRC}" = "true" ]; then + ${JACK} ${JACK_ARGS} --output-jack src.jack src + imported_jack_files="--import src.jack" + fi + + if [ "${HAS_SRC2}" = "true" ]; then + ${JACK} ${JACK_ARGS} --output-jack src2.jack src2 + imported_jack_files="--import src2.jack ${imported_jack_files}" + fi + + # Compile jack files into a DEX file. We set jack.import.type.policy=keep-first to consider + # class definitions from src2 first. + if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then + ${JACK} ${JACK_ARGS} ${imported_jack_files} -D jack.import.type.policy=keep-first --output-dex . + fi +else + # Legacy toolchain with javac+dx + if [ "${HAS_SRC}" = "true" ]; then + mkdir classes + ${JAVAC} ${JAVAC_ARGS} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'` + fi + + if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then + mkdir classes2 + ${JAVAC} -implicit:none -classpath src -d classes2 `find src-multidex -name '*.java'` + if [ ${NEED_DEX} = "true" ]; then + ${DX} -JXmx256m --debug --dex --dump-to=classes2.lst --output=classes2.dex \ + --dump-width=1000 ${DX_FLAGS} classes2 + fi + fi + + if [ "${HAS_SRC2}" = "true" ]; then + mkdir -p classes + ${JAVAC} ${JAVAC_ARGS} -d classes `find src2 -name '*.java'` + fi + + if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then + if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then + ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \ + --dump-width=1000 ${DX_FLAGS} classes + fi + fi +fi + +if [ "${HAS_SMALI}" = "true" ]; then + # Compile Smali classes + ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'` + + # Don't bother with dexmerger if we provide our own main function in a smali file. + if [ ${SKIP_DX_MERGER} = "false" ]; then + ${DXMERGER} classes.dex classes.dex smali_classes.dex + else + mv smali_classes.dex classes.dex + fi +fi + +if [ ${HAS_SRC_EX} = "true" ]; then + if [ ${USE_JACK} = "true" ]; then + # Rename previous "classes.dex" so it is not overwritten. + mv classes.dex classes-1.dex + #TODO find another way to append src.jack to the jack classpath + ${JACK}:src.jack ${JACK_ARGS} --output-dex . src-ex + zip $TEST_NAME-ex.jar classes.dex + # Restore previous "classes.dex" so it can be zipped. + mv classes-1.dex classes.dex + else + mkdir classes-ex + ${JAVAC} ${JAVAC_ARGS} -d classes-ex -cp classes `find src-ex -name '*.java'` + if [ ${NEED_DEX} = "true" ]; then + ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes-ex.dex \ + --dump-width=1000 ${DX_FLAGS} classes-ex + + # quick shuffle so that the stored name is "classes.dex" + mv classes.dex classes-1.dex + mv classes-ex.dex classes.dex + zip $TEST_NAME-ex.jar classes.dex + mv classes.dex classes-ex.dex + mv classes-1.dex classes.dex + fi + fi +fi + +# Create a single jar with two dex files for multidex. +if [ ${HAS_SRC_MULTIDEX} = "true" ]; then + zip $TEST_NAME.jar classes.dex classes2.dex +elif [ ${NEED_DEX} = "true" ]; then + zip $TEST_NAME.jar classes.dex +fi diff --git a/test/551-checker-shifter-operand/expected.txt b/test/551-checker-shifter-operand/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/551-checker-shifter-operand/expected.txt diff --git a/test/551-checker-shifter-operand/info.txt b/test/551-checker-shifter-operand/info.txt new file mode 100644 index 0000000000..10e998cb18 --- /dev/null +++ b/test/551-checker-shifter-operand/info.txt @@ -0,0 +1 @@ +Test the merging of instructions into the shifter operand on arm64. diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java new file mode 100644 index 0000000000..decdd1f324 --- /dev/null +++ b/test/551-checker-shifter-operand/src/Main.java @@ -0,0 +1,678 @@ +/* +* Copyright (C) 2015 The Android Open Source Project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +public class Main { + + // A dummy value to defeat inlining of these routines. + static boolean doThrow = false; + + public static void assertByteEquals(byte expected, byte result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertCharEquals(char expected, char result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertShortEquals(short expected, short result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertIntEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertLongEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + // Non-inlinable type-casting helpers. + static char $noinline$byteToChar (byte v) { if (doThrow) throw new Error(); return (char)v; } + static short $noinline$byteToShort (byte v) { if (doThrow) throw new Error(); return (short)v; } + static int $noinline$byteToInt (byte v) { if (doThrow) throw new Error(); return (int)v; } + static long $noinline$byteToLong (byte v) { if (doThrow) throw new Error(); return (long)v; } + static byte $noinline$charToByte (char v) { if (doThrow) throw new Error(); return (byte)v; } + static short $noinline$charToShort (char v) { if (doThrow) throw new Error(); return (short)v; } + static int $noinline$charToInt (char v) { if (doThrow) throw new Error(); return (int)v; } + static long $noinline$charToLong (char v) { if (doThrow) throw new Error(); return (long)v; } + static byte $noinline$shortToByte (short v) { if (doThrow) throw new Error(); return (byte)v; } + static char $noinline$shortToChar (short v) { if (doThrow) throw new Error(); return (char)v; } + static int $noinline$shortToInt (short v) { if (doThrow) throw new Error(); return (int)v; } + static long $noinline$shortToLong (short v) { if (doThrow) throw new Error(); return (long)v; } + static byte $noinline$intToByte (int v) { if (doThrow) throw new Error(); return (byte)v; } + static char $noinline$intToChar (int v) { if (doThrow) throw new Error(); return (char)v; } + static short $noinline$intToShort (int v) { if (doThrow) throw new Error(); return (short)v; } + static long $noinline$intToLong (int v) { if (doThrow) throw new Error(); return (long)v; } + static byte $noinline$longToByte (long v) { if (doThrow) throw new Error(); return (byte)v; } + static char $noinline$longToChar (long v) { if (doThrow) throw new Error(); return (char)v; } + static short $noinline$longToShort (long v) { if (doThrow) throw new Error(); return (short)v; } + static int $noinline$longToInt (long v) { if (doThrow) throw new Error(); return (int)v; } + + /** + * Basic test merging a bitfield move operation (here a type conversion) into + * the shifter operand. + */ + + /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (before) + /// CHECK-DAG: <<l:j\d+>> ParameterValue + /// CHECK-DAG: <<b:b\d+>> ParameterValue + /// CHECK: <<tmp:j\d+>> TypeConversion [<<b>>] + /// CHECK: Sub [<<l>>,<<tmp>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after) + /// CHECK-DAG: <<l:j\d+>> ParameterValue + /// CHECK-DAG: <<b:b\d+>> ParameterValue + /// CHECK: Arm64DataProcWithShifterOp [<<l>>,<<b>>] kind:Sub+SXTB + + /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after) + /// CHECK-NOT: TypeConversion + /// CHECK-NOT: Sub + + /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) disassembly (after) + /// CHECK: sub x{{\d+}}, x{{\d+}}, w{{\d+}}, sxtb + + public static long $opt$noinline$translate(long l, byte b) { + if (doThrow) throw new Error(); + long tmp = (long)b; + return l - tmp; + } + + + /** + * Test that we do not merge into the shifter operand when the left and right + * inputs are the the IR. + */ + + /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (before) + /// CHECK: <<a:i\d+>> ParameterValue + /// CHECK: <<Const2:i\d+>> IntConstant 2 + /// CHECK: <<tmp:i\d+>> Shl [<<a>>,<<Const2>>] + /// CHECK: Add [<<tmp>>,<<tmp>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after) + /// CHECK-DAG: <<a:i\d+>> ParameterValue + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 + /// CHECK: <<Shl:i\d+>> Shl [<<a>>,<<Const2>>] + /// CHECK: Add [<<Shl>>,<<Shl>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64DataProcWithShifterOp + + public static int $opt$noinline$sameInput(int a) { + if (doThrow) throw new Error(); + int tmp = a << 2; + return tmp + tmp; + } + + /** + * Check that we perform the merge for multiple uses. + */ + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (before) + /// CHECK: <<arg:i\d+>> ParameterValue + /// CHECK: <<Const23:i\d+>> IntConstant 23 + /// CHECK: <<tmp:i\d+>> Shl [<<arg>>,<<Const23>>] + /// CHECK: Add [<<tmp>>,{{i\d+}}] + /// CHECK: Add [<<tmp>>,{{i\d+}}] + /// CHECK: Add [<<tmp>>,{{i\d+}}] + /// CHECK: Add [<<tmp>>,{{i\d+}}] + /// CHECK: Add [<<tmp>>,{{i\d+}}] + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after) + /// CHECK: <<arg:i\d+>> ParameterValue + /// CHECK: Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23 + /// CHECK: Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23 + /// CHECK: Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23 + /// CHECK: Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23 + /// CHECK: Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23 + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Shl + /// CHECK-NOT: Add + + public static int $opt$noinline$multipleUses(int arg) { + if (doThrow) throw new Error(); + int tmp = arg << 23; + switch (arg) { + case 1: return (arg | 1) + tmp; + case 2: return (arg | 2) + tmp; + case 3: return (arg | 3) + tmp; + case 4: return (arg | 4) + tmp; + case (1 << 20): return (arg | 5) + tmp; + default: return 0; + } + } + + /** + * Logical instructions cannot take 'extend' operations into the shift + * operand, so test that only the shifts are merged. + */ + + /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) disassembly (after) + /// CHECK: and lsl + /// CHECK: sxtb + /// CHECK: and + + static void $opt$noinline$testAnd(long a, long b) { + if (doThrow) throw new Error(); + assertLongEquals((a & $noinline$LongShl(b, 5)) | (a & $noinline$longToByte(b)), + (a & (b << 5)) | (a & (byte)b)); + } + + /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) disassembly (after) + /// CHECK: orr asr + /// CHECK: uxth + /// CHECK: orr + + static void $opt$noinline$testOr(int a, int b) { + if (doThrow) throw new Error(); + assertIntEquals((a | $noinline$IntShr(b, 6)) | (a | $noinline$intToChar(b)), + (a | (b >> 6)) | (a | (char)b)); + } + + /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) disassembly (after) + /// CHECK: eor lsr + /// CHECK: sxtw + /// CHECK: eor + + static void $opt$noinline$testXor(long a, long b) { + if (doThrow) throw new Error(); + assertLongEquals((a ^ $noinline$LongUshr(b, 7)) | (a ^ $noinline$longToInt(b)), + (a ^ (b >>> 7)) | (a ^ (int)b)); + } + + /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) disassembly (after) + /// CHECK: neg lsl + /// CHECK: sxth + /// CHECK: neg + + static void $opt$noinline$testNeg(int a) { + if (doThrow) throw new Error(); + assertIntEquals(-$noinline$IntShl(a, 8) | -$noinline$intToShort(a), + (-(a << 8)) | (-(short)a)); + } + + /** + * The functions below are used to compare the result of optimized operations + * to non-optimized operations. + * On the left-hand side we use a non-inlined function call to ensure the + * optimization does not occur. The checker tests ensure that the optimization + * does occur on the right-hand. + */ + + /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after) + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendByteInt1(int a, byte b) { + assertIntEquals(a + $noinline$byteToChar (b), a + (char)b); + assertIntEquals(a + $noinline$byteToShort(b), a + (short)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt2(int, byte) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + public static void $opt$validateExtendByteInt2(int a, byte b) { + // The conversion to `int` has been optimized away, so there is nothing to merge. + assertIntEquals (a + $noinline$byteToInt (b), a + (int)b); + // There is an environment use for `(long)b`, preventing the merge. + assertLongEquals(a + $noinline$byteToLong(b), a + (long)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after) + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendByteLong(long a, byte b) { + // The first two tests have a type conversion. + assertLongEquals(a + $noinline$byteToChar (b), a + (char)b); + assertLongEquals(a + $noinline$byteToShort(b), a + (short)b); + // This test does not because the conversion to `int` is optimized away. + assertLongEquals(a + $noinline$byteToInt (b), a + (int)b); + } + + public static void $opt$validateExtendByte(long a, byte b) { + $opt$validateExtendByteInt1((int)a, b); + $opt$validateExtendByteInt2((int)a, b); + $opt$validateExtendByteLong(a, b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after) + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendCharInt1(int a, char b) { + assertIntEquals(a + $noinline$charToByte (b), a + (byte)b); + assertIntEquals(a + $noinline$charToShort(b), a + (short)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt2(int, char) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + public static void $opt$validateExtendCharInt2(int a, char b) { + // The conversion to `int` has been optimized away, so there is nothing to merge. + assertIntEquals (a + $noinline$charToInt (b), a + (int)b); + // There is an environment use for `(long)b`, preventing the merge. + assertLongEquals(a + $noinline$charToLong(b), a + (long)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after) + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendCharLong(long a, char b) { + // The first two tests have a type conversion. + assertLongEquals(a + $noinline$charToByte (b), a + (byte)b); + assertLongEquals(a + $noinline$charToShort(b), a + (short)b); + // This test does not because the conversion to `int` is optimized away. + assertLongEquals(a + $noinline$charToInt (b), a + (int)b); + } + + public static void $opt$validateExtendChar(long a, char b) { + $opt$validateExtendCharInt1((int)a, b); + $opt$validateExtendCharInt2((int)a, b); + $opt$validateExtendCharLong(a, b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after) + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendShortInt1(int a, short b) { + assertIntEquals(a + $noinline$shortToByte (b), a + (byte)b); + assertIntEquals(a + $noinline$shortToChar (b), a + (char)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt2(int, short) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + public static void $opt$validateExtendShortInt2(int a, short b) { + // The conversion to `int` has been optimized away, so there is nothing to merge. + assertIntEquals (a + $noinline$shortToInt (b), a + (int)b); + // There is an environment use for `(long)b`, preventing the merge. + assertLongEquals(a + $noinline$shortToLong (b), a + (long)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after) + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendShortLong(long a, short b) { + // The first two tests have a type conversion. + assertLongEquals(a + $noinline$shortToByte(b), a + (byte)b); + assertLongEquals(a + $noinline$shortToChar(b), a + (char)b); + // This test does not because the conversion to `int` is optimized away. + assertLongEquals(a + $noinline$shortToInt (b), a + (int)b); + } + + public static void $opt$validateExtendShort(long a, short b) { + $opt$validateExtendShortInt1((int)a, b); + $opt$validateExtendShortInt2((int)a, b); + $opt$validateExtendShortLong(a, b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after) + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendInt(long a, int b) { + // All tests have a conversion to `long`. The first three tests also have a + // conversion from `int` to the specified type. For each test the conversion + // to `long` is merged into the shifter operand. + assertLongEquals(a + $noinline$intToByte (b), a + (byte)b); + assertLongEquals(a + $noinline$intToChar (b), a + (char)b); + assertLongEquals(a + $noinline$intToShort(b), a + (short)b); + assertLongEquals(a + $noinline$intToLong (b), a + (long)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after) + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendLong(long a, long b) { + // Each test has two conversions, from `long` and then back to `long`. The + // conversions to `long` are merged. + assertLongEquals(a + $noinline$longToByte (b), a + (byte)b); + assertLongEquals(a + $noinline$longToChar (b), a + (char)b); + assertLongEquals(a + $noinline$longToShort(b), a + (short)b); + assertLongEquals(a + $noinline$longToInt (b), a + (int)b); + } + + + static int $noinline$IntShl(int b, int c) { + if (doThrow) throw new Error(); + return b << c; + } + static int $noinline$IntShr(int b, int c) { + if (doThrow) throw new Error(); + return b >> c; + } + static int $noinline$IntUshr(int b, int c) { + if (doThrow) throw new Error(); + return b >>> c; + } + + + // Each test line below should see one merge. + /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Shl + /// CHECK-NOT: Shr + /// CHECK-NOT: UShr + + public static void $opt$validateShiftInt(int a, int b) { + assertIntEquals(a + $noinline$IntShl(b, 1), a + (b << 1)); + assertIntEquals(a + $noinline$IntShl(b, 6), a + (b << 6)); + assertIntEquals(a + $noinline$IntShl(b, 7), a + (b << 7)); + assertIntEquals(a + $noinline$IntShl(b, 8), a + (b << 8)); + assertIntEquals(a + $noinline$IntShl(b, 14), a + (b << 14)); + assertIntEquals(a + $noinline$IntShl(b, 15), a + (b << 15)); + assertIntEquals(a + $noinline$IntShl(b, 16), a + (b << 16)); + assertIntEquals(a + $noinline$IntShl(b, 30), a + (b << 30)); + assertIntEquals(a + $noinline$IntShl(b, 31), a + (b << 31)); + assertIntEquals(a + $noinline$IntShl(b, 32), a + (b << 32)); + assertIntEquals(a + $noinline$IntShl(b, 62), a + (b << 62)); + assertIntEquals(a + $noinline$IntShl(b, 63), a + (b << 63)); + + assertIntEquals(a - $noinline$IntShr(b, 1), a - (b >> 1)); + assertIntEquals(a - $noinline$IntShr(b, 6), a - (b >> 6)); + assertIntEquals(a - $noinline$IntShr(b, 7), a - (b >> 7)); + assertIntEquals(a - $noinline$IntShr(b, 8), a - (b >> 8)); + assertIntEquals(a - $noinline$IntShr(b, 14), a - (b >> 14)); + assertIntEquals(a - $noinline$IntShr(b, 15), a - (b >> 15)); + assertIntEquals(a - $noinline$IntShr(b, 16), a - (b >> 16)); + assertIntEquals(a - $noinline$IntShr(b, 30), a - (b >> 30)); + assertIntEquals(a - $noinline$IntShr(b, 31), a - (b >> 31)); + assertIntEquals(a - $noinline$IntShr(b, 32), a - (b >> 32)); + assertIntEquals(a - $noinline$IntShr(b, 62), a - (b >> 62)); + assertIntEquals(a - $noinline$IntShr(b, 63), a - (b >> 63)); + + assertIntEquals(a ^ $noinline$IntUshr(b, 1), a ^ (b >>> 1)); + assertIntEquals(a ^ $noinline$IntUshr(b, 6), a ^ (b >>> 6)); + assertIntEquals(a ^ $noinline$IntUshr(b, 7), a ^ (b >>> 7)); + assertIntEquals(a ^ $noinline$IntUshr(b, 8), a ^ (b >>> 8)); + assertIntEquals(a ^ $noinline$IntUshr(b, 14), a ^ (b >>> 14)); + assertIntEquals(a ^ $noinline$IntUshr(b, 15), a ^ (b >>> 15)); + assertIntEquals(a ^ $noinline$IntUshr(b, 16), a ^ (b >>> 16)); + assertIntEquals(a ^ $noinline$IntUshr(b, 30), a ^ (b >>> 30)); + assertIntEquals(a ^ $noinline$IntUshr(b, 31), a ^ (b >>> 31)); + assertIntEquals(a ^ $noinline$IntUshr(b, 32), a ^ (b >>> 32)); + assertIntEquals(a ^ $noinline$IntUshr(b, 62), a ^ (b >>> 62)); + assertIntEquals(a ^ $noinline$IntUshr(b, 63), a ^ (b >>> 63)); + } + + + static long $noinline$LongShl(long b, long c) { + if (doThrow) throw new Error(); + return b << c; + } + static long $noinline$LongShr(long b, long c) { + if (doThrow) throw new Error(); + return b >> c; + } + static long $noinline$LongUshr(long b, long c) { + if (doThrow) throw new Error(); + return b >>> c; + } + + // Each test line below should see one merge. + /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Shl + /// CHECK-NOT: Shr + /// CHECK-NOT: UShr + + public static void $opt$validateShiftLong(long a, long b) { + assertLongEquals(a + $noinline$LongShl(b, 1), a + (b << 1)); + assertLongEquals(a + $noinline$LongShl(b, 6), a + (b << 6)); + assertLongEquals(a + $noinline$LongShl(b, 7), a + (b << 7)); + assertLongEquals(a + $noinline$LongShl(b, 8), a + (b << 8)); + assertLongEquals(a + $noinline$LongShl(b, 14), a + (b << 14)); + assertLongEquals(a + $noinline$LongShl(b, 15), a + (b << 15)); + assertLongEquals(a + $noinline$LongShl(b, 16), a + (b << 16)); + assertLongEquals(a + $noinline$LongShl(b, 30), a + (b << 30)); + assertLongEquals(a + $noinline$LongShl(b, 31), a + (b << 31)); + assertLongEquals(a + $noinline$LongShl(b, 32), a + (b << 32)); + assertLongEquals(a + $noinline$LongShl(b, 62), a + (b << 62)); + assertLongEquals(a + $noinline$LongShl(b, 63), a + (b << 63)); + + assertLongEquals(a - $noinline$LongShr(b, 1), a - (b >> 1)); + assertLongEquals(a - $noinline$LongShr(b, 6), a - (b >> 6)); + assertLongEquals(a - $noinline$LongShr(b, 7), a - (b >> 7)); + assertLongEquals(a - $noinline$LongShr(b, 8), a - (b >> 8)); + assertLongEquals(a - $noinline$LongShr(b, 14), a - (b >> 14)); + assertLongEquals(a - $noinline$LongShr(b, 15), a - (b >> 15)); + assertLongEquals(a - $noinline$LongShr(b, 16), a - (b >> 16)); + assertLongEquals(a - $noinline$LongShr(b, 30), a - (b >> 30)); + assertLongEquals(a - $noinline$LongShr(b, 31), a - (b >> 31)); + assertLongEquals(a - $noinline$LongShr(b, 32), a - (b >> 32)); + assertLongEquals(a - $noinline$LongShr(b, 62), a - (b >> 62)); + assertLongEquals(a - $noinline$LongShr(b, 63), a - (b >> 63)); + + assertLongEquals(a ^ $noinline$LongUshr(b, 1), a ^ (b >>> 1)); + assertLongEquals(a ^ $noinline$LongUshr(b, 6), a ^ (b >>> 6)); + assertLongEquals(a ^ $noinline$LongUshr(b, 7), a ^ (b >>> 7)); + assertLongEquals(a ^ $noinline$LongUshr(b, 8), a ^ (b >>> 8)); + assertLongEquals(a ^ $noinline$LongUshr(b, 14), a ^ (b >>> 14)); + assertLongEquals(a ^ $noinline$LongUshr(b, 15), a ^ (b >>> 15)); + assertLongEquals(a ^ $noinline$LongUshr(b, 16), a ^ (b >>> 16)); + assertLongEquals(a ^ $noinline$LongUshr(b, 30), a ^ (b >>> 30)); + assertLongEquals(a ^ $noinline$LongUshr(b, 31), a ^ (b >>> 31)); + assertLongEquals(a ^ $noinline$LongUshr(b, 32), a ^ (b >>> 32)); + assertLongEquals(a ^ $noinline$LongUshr(b, 62), a ^ (b >>> 62)); + assertLongEquals(a ^ $noinline$LongUshr(b, 63), a ^ (b >>> 63)); + } + + + public static void main(String[] args) { + assertLongEquals(10000L - 3L, $opt$noinline$translate(10000L, (byte)3)); + assertLongEquals(-10000L - -3L, $opt$noinline$translate(-10000L, (byte)-3)); + + assertIntEquals(4096, $opt$noinline$sameInput(512)); + assertIntEquals(-8192, $opt$noinline$sameInput(-1024)); + + assertIntEquals(((1 << 23) | 1), $opt$noinline$multipleUses(1)); + assertIntEquals(((1 << 20) | 5), $opt$noinline$multipleUses(1 << 20)); + + long inputs[] = { + -((1L << 7) - 1L), -((1L << 7)), -((1L << 7) + 1L), + -((1L << 15) - 1L), -((1L << 15)), -((1L << 15) + 1L), + -((1L << 16) - 1L), -((1L << 16)), -((1L << 16) + 1L), + -((1L << 31) - 1L), -((1L << 31)), -((1L << 31) + 1L), + -((1L << 32) - 1L), -((1L << 32)), -((1L << 32) + 1L), + -((1L << 63) - 1L), -((1L << 63)), -((1L << 63) + 1L), + -42L, -314L, -2718281828L, -0x123456789L, -0x987654321L, + -1L, -20L, -300L, -4000L, -50000L, -600000L, -7000000L, -80000000L, + 0L, + 1L, 20L, 300L, 4000L, 50000L, 600000L, 7000000L, 80000000L, + 42L, 314L, 2718281828L, 0x123456789L, 0x987654321L, + (1L << 7) - 1L, (1L << 7), (1L << 7) + 1L, + (1L << 8) - 1L, (1L << 8), (1L << 8) + 1L, + (1L << 15) - 1L, (1L << 15), (1L << 15) + 1L, + (1L << 16) - 1L, (1L << 16), (1L << 16) + 1L, + (1L << 31) - 1L, (1L << 31), (1L << 31) + 1L, + (1L << 32) - 1L, (1L << 32), (1L << 32) + 1L, + (1L << 63) - 1L, (1L << 63), (1L << 63) + 1L, + Long.MIN_VALUE, Long.MAX_VALUE + }; + for (int i = 0; i < inputs.length; i++) { + $opt$noinline$testNeg((int)inputs[i]); + for (int j = 0; j < inputs.length; j++) { + $opt$noinline$testAnd(inputs[i], inputs[j]); + $opt$noinline$testOr((int)inputs[i], (int)inputs[j]); + $opt$noinline$testXor(inputs[i], inputs[j]); + + $opt$validateExtendByte(inputs[i], (byte)inputs[j]); + $opt$validateExtendChar(inputs[i], (char)inputs[j]); + $opt$validateExtendShort(inputs[i], (short)inputs[j]); + $opt$validateExtendInt(inputs[i], (int)inputs[j]); + $opt$validateExtendLong(inputs[i], inputs[j]); + + $opt$validateShiftInt((int)inputs[i], (int)inputs[j]); + $opt$validateShiftLong(inputs[i], inputs[j]); + } + } + + } +} diff --git a/test/551-implicit-null-checks/expected.txt b/test/551-implicit-null-checks/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/551-implicit-null-checks/expected.txt diff --git a/test/551-implicit-null-checks/info.txt b/test/551-implicit-null-checks/info.txt new file mode 100644 index 0000000000..bdd066bec3 --- /dev/null +++ b/test/551-implicit-null-checks/info.txt @@ -0,0 +1 @@ +Test that implicit null checks are recorded correctly for longs.
\ No newline at end of file diff --git a/test/551-implicit-null-checks/src/Main.java b/test/551-implicit-null-checks/src/Main.java new file mode 100644 index 0000000000..677e8d34ca --- /dev/null +++ b/test/551-implicit-null-checks/src/Main.java @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + private class Inner { + private long i1; + } + private Inner inst; + + public static void main(String args[]) throws Exception { + Main m = new Main(); + try { + m.$opt$noinline$testGetLong(); + } catch (NullPointerException ex) { + // good + } + try { + m.$opt$noinline$testPutLong(778899112233L); + } catch (NullPointerException ex) { + // good + } + } + + public void $opt$noinline$testGetLong() throws Exception { + long result = inst.i1; + throw new Exception(); // prevent inline + } + + public void $opt$noinline$testPutLong(long a) throws Exception { + inst.i1 = a; + throw new Exception(); // prevent inline + } +} diff --git a/test/960-default-smali/build b/test/960-default-smali/build index 4dc848cfa6..b72afcdf18 100755 --- a/test/960-default-smali/build +++ b/test/960-default-smali/build @@ -22,7 +22,7 @@ ${ANDROID_BUILD_TOP}/art/test/utils/python/generate_smali_main.py ./smali # Should we compile with Java source code. By default we will use Smali. USES_JAVA_SOURCE="false" -if [[ $ARGS == *"--jvm"* ]]; then +if [[ $@ == *"--jvm"* ]]; then USES_JAVA_SOURCE="true" elif [[ "$USE_JACK" == "true" ]]; then if $JACK -D jack.java.source.version=1.8 >& /dev/null; then diff --git a/test/961-default-iface-resolution-generated/build b/test/961-default-iface-resolution-generated/build index b4ced3e82e..005f76c2dc 100755 --- a/test/961-default-iface-resolution-generated/build +++ b/test/961-default-iface-resolution-generated/build @@ -33,7 +33,7 @@ mkdir -p ./smali # Should we compile with Java source code. By default we will use Smali. USES_JAVA_SOURCE="false" -if [[ $ARGS == *"--jvm"* ]]; then +if [[ $@ == *"--jvm"* ]]; then USES_JAVA_SOURCE="true" elif [[ $USE_JACK == "true" ]]; then if "$JACK" -D jack.java.source.version=1.8 >& /dev/null; then diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk index 7a22e1b74a..f74a516486 100644 --- a/test/Android.libarttest.mk +++ b/test/Android.libarttest.mk @@ -37,7 +37,8 @@ LIBARTTEST_COMMON_SRC_FILES := \ 457-regs/regs_jni.cc \ 461-get-reference-vreg/get_reference_vreg_jni.cc \ 466-get-live-vreg/get_live_vreg_jni.cc \ - 497-inlining-and-class-loader/clear_dex_cache.cc + 497-inlining-and-class-loader/clear_dex_cache.cc \ + 543-env-long-ref/env_long_ref.cc ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so diff --git a/test/run-test b/test/run-test index d0da34e78c..6e13b8a976 100755 --- a/test/run-test +++ b/test/run-test @@ -669,9 +669,9 @@ export TEST_NAME=`basename ${test_dir}` # ------------------------------- # Return whether the Optimizing compiler has read barrier support for ARCH. function arch_supports_read_barrier() { - # Optimizing has read barrier support for ARM, x86 and x86-64 at the + # Optimizing has read barrier support for ARM, ARM64, x86 and x86-64 at the # moment. - [ "x$1" = xarm ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ] + [ "x$1" = xarm ] || [ "x$1" = xarm64 ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ] } # Tests named '<number>-checker-*' will also have their CFGs verified with @@ -739,8 +739,8 @@ fi if [ "$run_checker" = "yes" -a "$target_mode" = "yes" ]; then # We will need to `adb pull` the .cfg output from the target onto the host to # run checker on it. This file can be big. - build_file_size_limit=16384 - run_file_size_limit=16384 + build_file_size_limit=24576 + run_file_size_limit=24576 fi if [ ${USE_JACK} = "false" ]; then # Set ulimit if we build with dx only, Jack can generate big temp files. diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh index de27a6faaa..47fc50fbd2 100755 --- a/tools/run-jdwp-tests.sh +++ b/tools/run-jdwp-tests.sh @@ -28,6 +28,18 @@ if [ ! -f $test_jar ]; then exit 1 fi +if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then + # For the moment, skip JDWP tests when read barriers are enabled, as + # they sometimes exhibit a deadlock issue with the concurrent + # copying collector in the read barrier configuration, between the + # HeapTaskDeamon and the JDWP thread (b/25800335). + # + # TODO: Re-enable the JDWP tests when this deadlock issue is fixed. + echo "JDWP tests are temporarily disabled in the read barrier configuration because of" + echo "a deadlock issue (b/25800335)." + exit 0 +fi + art="/data/local/tmp/system/bin/art" art_debugee="sh /data/local/tmp/system/bin/art" args=$@ @@ -43,9 +55,11 @@ image="-Ximage:/data/art-test/core-jit.art" vm_args="" # By default, we run the whole JDWP test suite. test="org.apache.harmony.jpda.tests.share.AllTests" +host="no" while true; do if [[ "$1" == "--mode=host" ]]; then + host="yes" # Specify bash explicitly since the art script cannot, since it has to run on the device # with mksh. art="bash ${OUT_DIR-out}/host/linux-x86/bin/art" @@ -118,3 +132,15 @@ vogar $vm_command \ --classpath $test_jar \ --vm-arg -Xcompiler-option --vm-arg --debuggable \ $test + +vogar_exit_status=$? + +echo "Killing stalled dalvikvm processes..." +if [[ $host == "yes" ]]; then + pkill -9 -f /bin/dalvikvm +else + adb shell pkill -9 -f /bin/dalvikvm +fi +echo "Done." + +exit $vogar_exit_status |