diff options
| -rw-r--r-- | compiler/common_compiler_test.cc | 21 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 20 | ||||
| -rw-r--r-- | compiler/optimizing/reference_type_propagation.cc | 16 | ||||
| -rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 102 | ||||
| -rw-r--r-- | runtime/art_method.cc | 19 | ||||
| -rw-r--r-- | runtime/art_method.h | 10 | ||||
| -rw-r--r-- | runtime/exception_test.cc | 23 | ||||
| -rw-r--r-- | runtime/jit/jit.h | 2 | ||||
| -rw-r--r-- | runtime/jit/jit_code_cache.cc | 108 | ||||
| -rw-r--r-- | runtime/jit/jit_code_cache.h | 21 | ||||
| -rw-r--r-- | runtime/jit/jit_instrumentation.cc | 40 | ||||
| -rw-r--r-- | runtime/jit/profiling_info.cc | 17 | ||||
| -rw-r--r-- | runtime/jit/profiling_info.h | 29 | ||||
| -rw-r--r-- | runtime/oat_quick_method_header.h | 2 | ||||
| -rwxr-xr-x | test/004-ThreadStress/run | 19 | ||||
| -rwxr-xr-x | test/etc/run-test-jar | 2 | ||||
| -rwxr-xr-x | test/run-all-tests | 58 | ||||
| -rwxr-xr-x | test/run-test | 7 | ||||
| -rwxr-xr-x | tools/run-jdwp-tests.sh | 9 |
19 files changed, 428 insertions, 97 deletions
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index 151437b4cb..c37cecaeac 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -77,11 +77,10 @@ void CommonCompilerTest::MakeExecutable(ArtMethod* method) { header_code_and_maps_chunks_.push_back(std::vector<uint8_t>()); std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back(); - size_t size = sizeof(method_header) + code_size + vmap_table.size() + mapping_table_size + - gc_map_size; - size_t code_offset = compiled_method->AlignCode(size - code_size); - size_t padding = code_offset - (size - code_size); - chunk->reserve(padding + size); + const size_t max_padding = GetInstructionSetAlignment(compiled_method->GetInstructionSet()); + const size_t size = + gc_map_size + mapping_table_size + vmap_table.size() + sizeof(method_header) + code_size; + chunk->reserve(size + max_padding); chunk->resize(sizeof(method_header)); memcpy(&(*chunk)[0], &method_header, sizeof(method_header)); chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end()); @@ -91,10 +90,16 @@ void CommonCompilerTest::MakeExecutable(ArtMethod* method) { if (gc_map_used) { chunk->insert(chunk->begin(), gc_map.begin(), gc_map.end()); } - chunk->insert(chunk->begin(), padding, 0); chunk->insert(chunk->end(), code.begin(), code.end()); - CHECK_EQ(padding + size, chunk->size()); - const void* code_ptr = &(*chunk)[code_offset]; + CHECK_EQ(chunk->size(), size); + const void* unaligned_code_ptr = chunk->data() + (size - code_size); + size_t offset = dchecked_integral_cast<size_t>(reinterpret_cast<uintptr_t>(unaligned_code_ptr)); + size_t padding = compiled_method->AlignCode(offset) - offset; + // Make sure no resizing takes place. + CHECK_GE(chunk->capacity(), chunk->size() + padding); + chunk->insert(chunk->begin(), padding, 0); + const void* code_ptr = reinterpret_cast<const uint8_t*>(unaligned_code_ptr) + padding; + CHECK_EQ(code_ptr, static_cast<const void*>(chunk->data() + (chunk->size() - code_size))); MakeExecutable(code_ptr, code.size()); const void* method_code = CompiledMethod::CodePointer(code_ptr, compiled_method->GetInstructionSet()); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 6aed4447f7..e6b9273d24 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -3118,15 +3118,25 @@ void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { - LocationSummary::CallKind call_kind = cls->CanCallRuntime() ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Location::RegisterLocation(V0)); } void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { LocationSummary* locations = cls->GetLocations(); + if (cls->NeedsAccessCheck()) { + codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), + cls, + cls->GetDexPc(), + nullptr, + IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess)); + return; + } + Register out = locations->Out().AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); if (cls->IsReferrersClass()) { diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 26a05da4cb..659da068a9 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -373,12 +373,18 @@ void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, if (instr->IsInvokeStaticOrDirect() && instr->AsInvokeStaticOrDirect()->IsStringInit()) { // Calls to String.<init> are replaced with a StringFactory. if (kIsDebugBuild) { - ScopedObjectAccess soa(Thread::Current()); + HInvoke* invoke = instr->AsInvoke(); ClassLinker* cl = Runtime::Current()->GetClassLinker(); - mirror::DexCache* dex_cache = cl->FindDexCache( - soa.Self(), instr->AsInvoke()->GetDexFile(), false); - ArtMethod* method = dex_cache->GetResolvedMethod( - instr->AsInvoke()->GetDexMethodIndex(), cl->GetImagePointerSize()); + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<2> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache( + hs.NewHandle(cl->FindDexCache(soa.Self(), invoke->GetDexFile(), false))); + // Use a null loader. We should probably use the compiling method's class loader, + // but then we would need to pass it to RTPVisitor just for this debug check. Since + // the method is from the String class, the null loader is good enough. + Handle<mirror::ClassLoader> loader; + ArtMethod* method = cl->ResolveMethod( + invoke->GetDexFile(), invoke->GetDexMethodIndex(), dex_cache, loader, nullptr, kDirect); DCHECK(method != nullptr); mirror::Class* declaring_class = method->GetDeclaringClass(); DCHECK(declaring_class != nullptr); diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index be5a15ec39..9ccabad1cc 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1437,7 +1437,107 @@ END art_quick_set64_static ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Generate the allocation entrypoints for each allocator. -GENERATE_ALL_ALLOC_ENTRYPOINTS +GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). +ENTRY art_quick_alloc_object_rosalloc + // Fast path rosalloc allocation. + // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current + // x2-x7: free. + ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array + // Load the class (x2) + ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] + cbz x2, .Lart_quick_alloc_object_rosalloc_slow_path // Check null class + // Check class status. + ldr w3, [x2, #MIRROR_CLASS_STATUS_OFFSET] + cmp x3, #MIRROR_CLASS_STATUS_INITIALIZED + bne .Lart_quick_alloc_object_rosalloc_slow_path + // Add a fake dependence from the + // following access flag and size + // loads to the status load. + // This is to prevent those loads + // from being reordered above the + // status load and reading wrong + // values (an alternative is to use + // a load-acquire for the status). + eor x3, x3, x3 + add x2, x2, x3 + // Check access flags has + // kAccClassIsFinalizable + ldr w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET] + tst x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE + bne .Lart_quick_alloc_object_rosalloc_slow_path + ldr x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local + // allocation stack has room. + // ldp won't work due to large offset. + ldr x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] + cmp x3, x4 + bhs .Lart_quick_alloc_object_rosalloc_slow_path + ldr w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET] // Load the object size (x3) + cmp x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread + // local allocation + bhs .Lart_quick_alloc_object_rosalloc_slow_path + // Compute the rosalloc bracket index + // from the size. + // Align up the size by the rosalloc + // bracket quantum size and divide + // by the quantum size and subtract + // by 1. This code is a shorter but + // equivalent version. + sub x3, x3, #1 + lsr x3, x3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT + // Load the rosalloc run (x4) + add x4, xSELF, x3, lsl #POINTER_SIZE_SHIFT + ldr x4, [x4, #THREAD_ROSALLOC_RUNS_OFFSET] + // Load the free list head (x3). This + // will be the return val. + ldr x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] + cbz x3, .Lart_quick_alloc_object_rosalloc_slow_path + // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1. + ldr x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head + // and update the list head with the + // next pointer. + str x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] + // Store the class pointer in the + // header. This also overwrites the + // next pointer. The offsets are + // asserted to match. +#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET +#error "Class pointer needs to overwrite next pointer." +#endif + POISON_HEAP_REF w2 + str w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET] + // Push the new object onto the thread + // local allocation stack and + // increment the thread local + // allocation stack top. + ldr x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] + str w3, [x1], #COMPRESSED_REFERENCE_SIZE // (Increment x1 as a side effect.) + str x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] + // Decrement the size of the free list + ldr w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] + sub x1, x1, #1 + // TODO: consider combining this store + // and the list head store above using + // strd. + str w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] + // Fence. This is "ish" not "ishst" so + // that the code after this allocation + // site will see the right values in + // the fields of the class. + // Alternatively we could use "ishst" + // if we use load-acquire for the + // class status load.) + dmb ish + mov x0, x3 // Set the return value and return. + ret +.Lart_quick_alloc_object_rosalloc_slow_path: + SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC + mov x2, xSELF // pass Thread::Current + bl artAllocObjectFromCodeRosAlloc // (uint32_t type_idx, Method* method, Thread*) + RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +END art_quick_alloc_object_rosalloc /* * Called by managed code when the thread has been asked to suspend. diff --git a/runtime/art_method.cc b/runtime/art_method.cc index a10d7afb0f..fe0afa6ebf 100644 --- a/runtime/art_method.cc +++ b/runtime/art_method.cc @@ -361,19 +361,6 @@ bool ArtMethod::EqualParameters(Handle<mirror::ObjectArray<mirror::Class>> param return true; } -ProfilingInfo* ArtMethod::CreateProfilingInfo() { - DCHECK(!Runtime::Current()->IsAotCompiler()); - ProfilingInfo* info = ProfilingInfo::Create(this); - MemberOffset offset = ArtMethod::EntryPointFromJniOffset(sizeof(void*)); - uintptr_t pointer = reinterpret_cast<uintptr_t>(this) + offset.Uint32Value(); - if (!reinterpret_cast<Atomic<ProfilingInfo*>*>(pointer)-> - CompareExchangeStrongSequentiallyConsistent(nullptr, info)) { - return GetProfilingInfo(sizeof(void*)); - } else { - return info; - } -} - const uint8_t* ArtMethod::GetQuickenedInfo() { bool found = false; OatFile::OatMethod oat_method = @@ -427,6 +414,12 @@ const OatQuickMethodHeader* ArtMethod::GetOatQuickMethodHeader(uintptr_t pc) { bool found; OatFile::OatMethod oat_method = class_linker->FindOatMethodFor(this, &found); if (!found) { + if (class_linker->IsQuickResolutionStub(existing_entry_point)) { + // We are running the generic jni stub, but the entry point of the method has not + // been updated yet. + DCHECK(IsNative()); + return nullptr; + } // Only for unit tests. // TODO(ngeoffray): Update these tests to pass the right pc? return OatQuickMethodHeader::FromEntryPoint(existing_entry_point); diff --git a/runtime/art_method.h b/runtime/art_method.h index bb9804eede..551989d182 100644 --- a/runtime/art_method.h +++ b/runtime/art_method.h @@ -305,12 +305,18 @@ class ArtMethod FINAL { PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size); } - ProfilingInfo* CreateProfilingInfo() SHARED_REQUIRES(Locks::mutator_lock_); - ProfilingInfo* GetProfilingInfo(size_t pointer_size) { return reinterpret_cast<ProfilingInfo*>(GetEntryPointFromJniPtrSize(pointer_size)); } + ALWAYS_INLINE void SetProfilingInfo(ProfilingInfo* info) { + SetEntryPointFromJniPtrSize(info, sizeof(void*)); + } + + static MemberOffset ProfilingInfoOffset() { + return EntryPointFromJniOffset(sizeof(void*)); + } + void* GetEntryPointFromJni() { return GetEntryPointFromJniPtrSize(sizeof(void*)); } diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc index b1d4d35077..18ccd082ec 100644 --- a/runtime/exception_test.cc +++ b/runtime/exception_test.cc @@ -92,10 +92,25 @@ class ExceptionTest : public CommonRuntimeTest { fake_header_code_and_maps_.insert(fake_header_code_and_maps_.end(), fake_code_.begin(), fake_code_.end()); - // NOTE: Don't align the code (it will not be executed) but check that the Thumb2 - // adjustment will be a NOP, see EntryPointToCodePointer(). - CHECK_ALIGNED(mapping_table_offset, 2); - const uint8_t* code_ptr = &fake_header_code_and_maps_[gc_map_offset]; + // Align the code. + const size_t alignment = GetInstructionSetAlignment(kRuntimeISA); + fake_header_code_and_maps_.reserve(fake_header_code_and_maps_.size() + alignment); + const void* unaligned_code_ptr = + fake_header_code_and_maps_.data() + (fake_header_code_and_maps_.size() - code_size); + size_t offset = dchecked_integral_cast<size_t>(reinterpret_cast<uintptr_t>(unaligned_code_ptr)); + size_t padding = RoundUp(offset, alignment) - offset; + // Make sure no resizing takes place. + CHECK_GE(fake_header_code_and_maps_.capacity(), fake_header_code_and_maps_.size() + padding); + fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(), padding, 0); + const void* code_ptr = reinterpret_cast<const uint8_t*>(unaligned_code_ptr) + padding; + CHECK_EQ(code_ptr, + static_cast<const void*>(fake_header_code_and_maps_.data() + + (fake_header_code_and_maps_.size() - code_size))); + + if (kRuntimeISA == kArm) { + // Check that the Thumb2 adjustment will be a NOP, see EntryPointToCodePointer(). + CHECK_ALIGNED(mapping_table_offset, 2); + } method_f_ = my_klass_->FindVirtualMethod("f", "()I", sizeof(void*)); ASSERT_TRUE(method_f_ != nullptr); diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h index e73ba82278..1f89f9b1b7 100644 --- a/runtime/jit/jit.h +++ b/runtime/jit/jit.h @@ -43,7 +43,7 @@ class JitOptions; class Jit { public: static constexpr bool kStressMode = kIsDebugBuild; - static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 1000; + static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 500; static constexpr size_t kDefaultWarmupThreshold = kDefaultCompileThreshold / 2; virtual ~Jit(); diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index 2d0a2a57f1..60568b2f77 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -21,6 +21,7 @@ #include "art_method-inl.h" #include "entrypoints/runtime_asm_entrypoints.h" #include "gc/accounting/bitmap-inl.h" +#include "jit/profiling_info.h" #include "linear_alloc.h" #include "mem_map.h" #include "oat_file-inl.h" @@ -56,9 +57,9 @@ JitCodeCache* JitCodeCache::Create(size_t capacity, std::string* error_msg) { return nullptr; } - // Data cache is 1 / 4 of the map. + // Data cache is 1 / 2 of the map. // TODO: Make this variable? - size_t data_size = RoundUp(data_map->Size() / 4, kPageSize); + size_t data_size = RoundUp(data_map->Size() / 2, kPageSize); size_t code_size = data_map->Size() - data_size; uint8_t* divider = data_map->Begin() + data_size; @@ -206,10 +207,23 @@ void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) { // We do not check if a code cache GC is in progress, as this method comes // with the classlinker_classes_lock_ held, and suspending ourselves could // lead to a deadlock. - for (auto it = method_code_map_.begin(); it != method_code_map_.end();) { - if (alloc.ContainsUnsafe(it->second)) { - FreeCode(it->first, it->second); - it = method_code_map_.erase(it); + { + ScopedCodeCacheWrite scc(code_map_.get()); + for (auto it = method_code_map_.begin(); it != method_code_map_.end();) { + if (alloc.ContainsUnsafe(it->second)) { + FreeCode(it->first, it->second); + it = method_code_map_.erase(it); + } else { + ++it; + } + } + } + for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) { + ProfilingInfo* info = *it; + if (alloc.ContainsUnsafe(info->GetMethod())) { + info->GetMethod()->SetProfilingInfo(nullptr); + mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info)); + it = profiling_infos_.erase(it); } else { ++it; } @@ -387,6 +401,9 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { for (auto& it : method_code_map_) { it.second->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge()); } + for (ProfilingInfo* info : profiling_infos_) { + info->GetMethod()->SetProfilingInfo(nullptr); + } } // Run a checkpoint on all threads to mark the JIT compiled code they are running. @@ -400,27 +417,37 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { } } - // Free unused compiled code, and restore the entry point of used compiled code. { MutexLock mu(self, lock_); DCHECK_EQ(map_size, method_code_map_.size()); - ScopedCodeCacheWrite scc(code_map_.get()); - for (auto it = method_code_map_.begin(); it != method_code_map_.end();) { - const void* code_ptr = it->first; - ArtMethod* method = it->second; - uintptr_t allocation = FromCodeToAllocation(code_ptr); - const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr); - if (GetLiveBitmap()->Test(allocation)) { - method->SetEntryPointFromQuickCompiledCode(method_header->GetEntryPoint()); - ++it; - } else { - method->ClearCounter(); - DCHECK_NE(method->GetEntryPointFromQuickCompiledCode(), method_header->GetEntryPoint()); - FreeCode(code_ptr, method); - it = method_code_map_.erase(it); + // Free unused compiled code, and restore the entry point of used compiled code. + { + ScopedCodeCacheWrite scc(code_map_.get()); + for (auto it = method_code_map_.begin(); it != method_code_map_.end();) { + const void* code_ptr = it->first; + ArtMethod* method = it->second; + uintptr_t allocation = FromCodeToAllocation(code_ptr); + const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr); + if (GetLiveBitmap()->Test(allocation)) { + method->SetEntryPointFromQuickCompiledCode(method_header->GetEntryPoint()); + ++it; + } else { + method->ClearCounter(); + DCHECK_NE(method->GetEntryPointFromQuickCompiledCode(), method_header->GetEntryPoint()); + FreeCode(code_ptr, method); + it = method_code_map_.erase(it); + } } } GetLiveBitmap()->Bitmap::Clear(); + + // Free all profiling info. + for (ProfilingInfo* info : profiling_infos_) { + DCHECK(info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr); + mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info)); + } + profiling_infos_.clear(); + collection_in_progress_ = false; lock_cond_.Broadcast(self); } @@ -460,5 +487,44 @@ OatQuickMethodHeader* JitCodeCache::LookupMethodHeader(uintptr_t pc, ArtMethod* return method_header; } +ProfilingInfo* JitCodeCache::AddProfilingInfo(Thread* self, + ArtMethod* method, + const std::vector<uint32_t>& entries, + bool retry_allocation) { + ProfilingInfo* info = AddProfilingInfoInternal(self, method, entries); + + if (info == nullptr && retry_allocation) { + GarbageCollectCache(self); + info = AddProfilingInfoInternal(self, method, entries); + } + return info; +} + +ProfilingInfo* JitCodeCache::AddProfilingInfoInternal(Thread* self, + ArtMethod* method, + const std::vector<uint32_t>& entries) { + size_t profile_info_size = RoundUp( + sizeof(ProfilingInfo) + sizeof(ProfilingInfo::InlineCache) * entries.size(), + sizeof(void*)); + ScopedThreadSuspension sts(self, kSuspended); + MutexLock mu(self, lock_); + WaitForPotentialCollectionToComplete(self); + + // Check whether some other thread has concurrently created it. + ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*)); + if (info != nullptr) { + return info; + } + + uint8_t* data = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, profile_info_size)); + if (data == nullptr) { + return nullptr; + } + info = new (data) ProfilingInfo(method, entries); + method->SetProfilingInfo(info); + profiling_infos_.push_back(info); + return info; +} + } // namespace jit } // namespace art diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index 4e415b8403..e10f9629ae 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -35,6 +35,7 @@ namespace art { class ArtMethod; class LinearAlloc; +class ProfilingInfo; namespace jit { @@ -109,11 +110,21 @@ class JitCodeCache { REQUIRES(!lock_) SHARED_REQUIRES(Locks::mutator_lock_); + // Remove all methods in our cache that were allocated by 'alloc'. void RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) REQUIRES(!lock_) REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + // Create a 'ProfileInfo' for 'method'. If 'retry_allocation' is true, + // will collect and retry if the first allocation is unsuccessful. + ProfilingInfo* AddProfilingInfo(Thread* self, + ArtMethod* method, + const std::vector<uint32_t>& entries, + bool retry_allocation) + REQUIRES(!lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + private: // Take ownership of code_mem_map. JitCodeCache(MemMap* code_map, MemMap* data_map); @@ -133,6 +144,12 @@ class JitCodeCache { REQUIRES(!lock_) SHARED_REQUIRES(Locks::mutator_lock_); + ProfilingInfo* AddProfilingInfoInternal(Thread* self, + ArtMethod* method, + const std::vector<uint32_t>& entries) + REQUIRES(!lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + // If a collection is in progress, wait for it to finish. Return // whether the thread actually waited. bool WaitForPotentialCollectionToComplete(Thread* self) @@ -157,8 +174,10 @@ class JitCodeCache { void* data_mspace_ GUARDED_BY(lock_); // Bitmap for collecting code and data. std::unique_ptr<CodeCacheBitmap> live_bitmap_; - // This map holds compiled code associated to the ArtMethod + // This map holds compiled code associated to the ArtMethod. SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_); + // ProfilingInfo objects we have allocated. + std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_); DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache); }; diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc index 666b8e73d3..8aaa5fa304 100644 --- a/runtime/jit/jit_instrumentation.cc +++ b/runtime/jit/jit_instrumentation.cc @@ -26,7 +26,12 @@ namespace jit { class JitCompileTask FINAL : public Task { public: - explicit JitCompileTask(ArtMethod* method) : method_(method) { + enum TaskKind { + kAllocateProfile, + kCompile + }; + + JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) { ScopedObjectAccess soa(Thread::Current()); // Add a global ref to the class to prevent class unloading until compilation is done. klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass()); @@ -40,9 +45,16 @@ class JitCompileTask FINAL : public Task { void Run(Thread* self) OVERRIDE { ScopedObjectAccess soa(self); - VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_); - if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) { - VLOG(jit) << "Failed to compile method " << PrettyMethod(method_); + if (kind_ == kCompile) { + VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_); + if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) { + VLOG(jit) << "Failed to compile method " << PrettyMethod(method_); + } + } else { + DCHECK(kind_ == kAllocateProfile); + if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) { + VLOG(jit) << "Start profiling " << PrettyMethod(method_); + } } } @@ -52,6 +64,7 @@ class JitCompileTask FINAL : public Task { private: ArtMethod* const method_; + const TaskKind kind_; jobject klass_; DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask); @@ -73,7 +86,6 @@ void JitInstrumentationCache::DeleteThreadPool() { } void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) { - ScopedObjectAccessUnchecked soa(self); // Since we don't have on-stack replacement, some methods can remain in the interpreter longer // than we want resulting in samples even after the method is compiled. if (method->IsClassInitializer() || method->IsNative()) { @@ -85,14 +97,20 @@ void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t } uint16_t sample_count = method->IncrementCounter(); if (sample_count == warm_method_threshold_) { - ProfilingInfo* info = method->CreateProfilingInfo(); - if (info != nullptr) { + if (ProfilingInfo::Create(self, method, /* retry_allocation */ false)) { VLOG(jit) << "Start profiling " << PrettyMethod(method); + } else { + // We failed allocating. Instead of doing the collection on the Java thread, we push + // an allocation to a compiler thread, that will do the collection. + thread_pool_->AddTask(self, new JitCompileTask( + method->GetInterfaceMethodIfProxy(sizeof(void*)), JitCompileTask::kAllocateProfile)); + thread_pool_->StartWorkers(self); } } + if (sample_count == hot_method_threshold_) { thread_pool_->AddTask(self, new JitCompileTask( - method->GetInterfaceMethodIfProxy(sizeof(void*)))); + method->GetInterfaceMethodIfProxy(sizeof(void*)), JitCompileTask::kCompile)); thread_pool_->StartWorkers(self); } } @@ -107,14 +125,18 @@ void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread, ArtMethod* caller, uint32_t dex_pc, ArtMethod* callee ATTRIBUTE_UNUSED) { + instrumentation_cache_->AddSamples(thread, caller, 1); + // We make sure we cannot be suspended, as the profiling info can be concurrently deleted. + thread->StartAssertNoThreadSuspension("Instrumenting invoke"); DCHECK(this_object != nullptr); ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*)); if (info != nullptr) { // Since the instrumentation is marked from the declaring class we need to mark the card so // that mod-union tables and card rescanning know about the update. Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass()); - info->AddInvokeInfo(thread, dex_pc, this_object->GetClass()); + info->AddInvokeInfo(dex_pc, this_object->GetClass()); } + thread->EndAssertNoThreadSuspension(nullptr); } void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) { diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc index 7c5f78e229..2e52b1b4dc 100644 --- a/runtime/jit/profiling_info.cc +++ b/runtime/jit/profiling_info.cc @@ -25,7 +25,7 @@ namespace art { -ProfilingInfo* ProfilingInfo::Create(ArtMethod* method) { +bool ProfilingInfo::Create(Thread* self, ArtMethod* method, bool retry_allocation) { // Walk over the dex instructions of the method and keep track of // instructions we are interested in profiling. DCHECK(!method->IsNative()); @@ -57,23 +57,15 @@ ProfilingInfo* ProfilingInfo::Create(ArtMethod* method) { // If there is no instruction we are interested in, no need to create a `ProfilingInfo` // object, it will never be filled. if (entries.empty()) { - return nullptr; + return true; } // Allocate the `ProfilingInfo` object int the JIT's data space. jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache(); - size_t profile_info_size = sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size(); - uint8_t* data = code_cache->ReserveData(Thread::Current(), profile_info_size); - - if (data == nullptr) { - VLOG(jit) << "Cannot allocate profiling info anymore"; - return nullptr; - } - - return new (data) ProfilingInfo(entries); + return code_cache->AddProfilingInfo(self, method, entries, retry_allocation) != nullptr; } -void ProfilingInfo::AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls) { +void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) { InlineCache* cache = nullptr; // TODO: binary search if array is too long. for (size_t i = 0; i < number_of_inline_caches_; ++i) { @@ -84,7 +76,6 @@ void ProfilingInfo::AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* } DCHECK(cache != nullptr); - ScopedObjectAccess soa(self); for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) { mirror::Class* existing = cache->classes_[i].Read(); if (existing == cls) { diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h index 7a2d1a8881..b13a315d64 100644 --- a/runtime/jit/profiling_info.h +++ b/runtime/jit/profiling_info.h @@ -26,6 +26,10 @@ namespace art { class ArtMethod; +namespace jit { +class JitCodeCache; +} + namespace mirror { class Class; } @@ -36,10 +40,17 @@ class Class; */ class ProfilingInfo { public: - static ProfilingInfo* Create(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_); + // Create a ProfilingInfo for 'method'. Return whether it succeeded, or if it is + // not needed in case the method does not have virtual/interface invocations. + static bool Create(Thread* self, ArtMethod* method, bool retry_allocation) + SHARED_REQUIRES(Locks::mutator_lock_); // Add information from an executed INVOKE instruction to the profile. - void AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls); + void AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) + // Method should not be interruptible, as it manipulates the ProfilingInfo + // which can be concurrently collected. + REQUIRES(Roles::uninterruptible_) + SHARED_REQUIRES(Locks::mutator_lock_); // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires. template<typename RootVisitorType> @@ -52,6 +63,10 @@ class ProfilingInfo { } } + ArtMethod* GetMethod() const { + return method_; + } + private: // Structure to store the classes seen at runtime for a specific instruction. // Once the classes_ array is full, we consider the INVOKE to be megamorphic. @@ -84,8 +99,9 @@ class ProfilingInfo { GcRoot<mirror::Class> classes_[kIndividualCacheSize]; }; - explicit ProfilingInfo(const std::vector<uint32_t>& entries) - : number_of_inline_caches_(entries.size()) { + ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries) + : number_of_inline_caches_(entries.size()), + method_(method) { memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache)); for (size_t i = 0; i < number_of_inline_caches_; ++i) { cache_[i].dex_pc = entries[i]; @@ -95,9 +111,14 @@ class ProfilingInfo { // Number of instructions we are profiling in the ArtMethod. const uint32_t number_of_inline_caches_; + // Method this profiling info is for. + ArtMethod* const method_; + // Dynamically allocated array of size `number_of_inline_caches_`. InlineCache cache_[0]; + friend class jit::JitCodeCache; + DISALLOW_COPY_AND_ASSIGN(ProfilingInfo); }; diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h index c9a2cfbc0e..03cad0835e 100644 --- a/runtime/oat_quick_method_header.h +++ b/runtime/oat_quick_method_header.h @@ -43,6 +43,8 @@ class PACKED(4) OatQuickMethodHeader { static OatQuickMethodHeader* FromCodePointer(const void* code_ptr) { uintptr_t code = reinterpret_cast<uintptr_t>(code_ptr); uintptr_t header = code - OFFSETOF_MEMBER(OatQuickMethodHeader, code_); + DCHECK(IsAlignedParam(code, GetInstructionSetAlignment(kRuntimeISA)) || + IsAlignedParam(header, GetInstructionSetAlignment(kRuntimeISA))); return reinterpret_cast<OatQuickMethodHeader*>(header); } diff --git a/test/004-ThreadStress/run b/test/004-ThreadStress/run new file mode 100755 index 0000000000..27c501da8d --- /dev/null +++ b/test/004-ThreadStress/run @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Be less agressive than the default debug option for the jit code cache +# to avoid timeouts. +exec ${RUN} "$@" --runtime-option -Xjitcodecachesize:1M diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index 280b4bcd84..18867fd035 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -214,7 +214,7 @@ done if [ "$USE_JVM" = "n" ]; then for feature in ${EXPERIMENTAL}; do - FLAGS="${FLAGS} -Xexperimental:${feature}" + FLAGS="${FLAGS} -Xexperimental:${feature} -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:${feature}" COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xexperimental:${feature}" done fi diff --git a/test/run-all-tests b/test/run-all-tests index 76283b7a8d..6d5c28c7e0 100755 --- a/test/run-all-tests +++ b/test/run-all-tests @@ -44,12 +44,45 @@ while true; do elif [ "x$1" = "x--use-java-home" ]; then run_args="${run_args} --use-java-home" shift + elif [ "x$1" = "x--no-image" ]; then + run_args="${run_args} --no-image" + shift + elif [ "x$1" = "x--quick" ]; then + run_args="${run_args} --quick" + shift + elif [ "x$1" = "x--optimizing" ]; then + run_args="${run_args} --optimizing" + shift + elif [ "x$1" = "x--image" ]; then + run_args="${run_args} --image" + shift + elif [ "x$1" = "x--never-clean" ]; then + run_args="${run_args} --never-clean" + shift elif [ "x$1" = "x--jvm" ]; then run_args="${run_args} --jvm" shift elif [ "x$1" = "x--debug" ]; then run_args="${run_args} --debug" shift + elif [ "x$1" = "x--build-only" ]; then + run_args="${run_args} --build-only" + shift + elif [ "x$1" = "x--build-with-jack" ]; then + run_args="${run_args} --build-with-jack" + shift + elif [ "x$1" = "x--build-with-javac-dx" ]; then + run_args="${run_args} --build-with-javac-dx" + shift + elif [ "x$1" = "x--dex2oat-swap" ]; then + run_args="${run_args} --dex2oat-swap" + shift + elif [ "x$1" = "x--dalvik" ]; then + run_args="${run_args} --dalvik" + shift + elif [ "x$1" = "x--debuggable" ]; then + run_args="${run_args} --debuggable" + shift elif [ "x$1" = "x--zygote" ]; then run_args="${run_args} --zygote" shift @@ -59,15 +92,15 @@ while true; do elif [ "x$1" = "x--jit" ]; then run_args="${run_args} --jit" shift + elif [ "x$1" = "x--verify-soft-fail" ]; then + run_args="${run_args} --verify-soft-fail" + shift elif [ "x$1" = "x--no-verify" ]; then run_args="${run_args} --no-verify" shift elif [ "x$1" = "x--no-optimize" ]; then run_args="${run_args} --no-optimize" shift - elif [ "x$1" = "x--valgrind" ]; then - run_args="${run_args} --valgrind" - shift elif [ "x$1" = "x--dev" ]; then run_args="${run_args} --dev" shift @@ -116,6 +149,15 @@ while true; do elif [ "x$1" = "x--always-clean" ]; then run_args="${run_args} --always-clean" shift + elif [ "x$1" = "x--pic-test" ]; then + run_args="${run_args} --pic-test" + shift + elif [ "x$1" = "x--pic-image" ]; then + run_args="${run_args} --pic-image" + shift + elif [ "x$1" = "x--strace" ]; then + run_args="${run_args} --strace" + shift elif expr "x$1" : "x--" >/dev/null 2>&1; then echo "unknown $0 option: $1" 1>&2 usage="yes" @@ -134,9 +176,13 @@ if [ "$usage" = "yes" ]; then echo " Options are all passed to run-test; refer to that for " \ "further documentation:" echo " --debug --dev --host --interpreter --jit --jvm --no-optimize" - echo " --no-verify -O --update --valgrind --zygote --64 --relocate" - echo " --prebuild --always-clean --gcstress --gcverify --trace" - echo " --no-patchoat --no-dex2oat --use-java-home" + echo " --no-verify --verify-soft-fail -O --update --zygote --64" + echo " --relocate --prebuild --always-clean --gcstress --gcverify" + echo " --trace --no-patchoat --no-dex2oat --use-java-home --pic-image" + echo " --pic-test --strace --debuggable --dalvik --dex2oat-swap" + echo " --build-only --build-with-jack --build-with-javac-dx" + echo " --never-clean --image --no-image --quick --optimizing" + echo " --no-relocate --no-prebuild" echo " Specific Runtime Options:" echo " --seq Run tests one-by-one, avoiding failures caused by busy CPU" ) 1>&2 diff --git a/test/run-test b/test/run-test index 5a43fb05c3..3442fcf67d 100755 --- a/test/run-test +++ b/test/run-test @@ -528,6 +528,7 @@ if [ "$usage" = "yes" ]; then echo " --debug Wait for a debugger to attach." echo " --debuggable Whether to compile Java code for a debugger." echo " --gdb Run under gdb; incompatible with some tests." + echo " --gdb-arg Pass an option to gdb." echo " --build-only Build test files only (off by default)." echo " --build-with-javac-dx Build test files with javac and dx (on by default)." echo " --build-with-jack Build test files with jack and jill (off by default)." @@ -553,6 +554,8 @@ if [ "$usage" = "yes" ]; then echo " the image and oat files be relocated to a random" echo " address before running. (default)" echo " --no-relocate Force the use of no relocating in the test" + echo " --image Run the test using a precompiled boot image. (default)" + echo " --no-image Run the test without a precompiled boot image." echo " --host Use the host-mode virtual machine." echo " --invoke-with Pass --invoke-with option to runtime." echo " --dalvik Use Dalvik (off by default)." @@ -564,6 +567,7 @@ if [ "$usage" = "yes" ]; then "files." echo " --64 Run the test in 64-bit mode" echo " --trace Run with method tracing" + echo " --strace Run with syscall tracing from strace." echo " --stream Run method tracing in streaming mode (requires --trace)" echo " --gcstress Run with gc stress testing" echo " --gcverify Run with gc verification" @@ -573,6 +577,9 @@ if [ "$usage" = "yes" ]; then echo " --dex2oat-swap Use a dex2oat swap file." echo " --instruction-set-features [string]" echo " Set instruction-set-features for compilation." + echo " --pic-image Use an image compiled with position independent code for the" + echo " boot class path." + echo " --pic-test Compile the test code position independent." ) 1>&2 exit 1 fi diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh index 9aed271c82..de27a6faaa 100755 --- a/tools/run-jdwp-tests.sh +++ b/tools/run-jdwp-tests.sh @@ -30,8 +30,6 @@ fi art="/data/local/tmp/system/bin/art" art_debugee="sh /data/local/tmp/system/bin/art" -# We use Quick's image on target because optimizing's image is not compiled debuggable. -image="-Ximage:/data/art-test/core.art" args=$@ debuggee_args="-Xcompiler-option --debuggable" device_dir="--device-dir=/data/local/tmp" @@ -41,6 +39,8 @@ vm_command="--vm-command=$art" image_compiler_option="" debug="no" verbose="no" +image="-Ximage:/data/art-test/core-jit.art" +vm_args="" # By default, we run the whole JDWP test suite. test="org.apache.harmony.jpda.tests.share.AllTests" @@ -88,7 +88,10 @@ while true; do fi done -vm_args="--vm-arg $image --vm-arg -Xusejit:true" +if [[ "$image" != "" ]]; then + vm_args="--vm-arg $image" +fi +vm_args="$vm_args --vm-arg -Xusejit:true" debuggee_args="$debuggee_args -Xusejit:true" if [[ $debug == "yes" ]]; then art="$art -d" |