diff options
-rw-r--r-- | build/Android.common_build.mk | 2 | ||||
-rw-r--r-- | compiler/Android.mk | 3 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 20 | ||||
-rw-r--r-- | runtime/arch/arm/quick_entrypoints_arm.S | 34 | ||||
-rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 45 | ||||
-rw-r--r-- | runtime/arch/x86/quick_entrypoints_x86.S | 39 | ||||
-rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 31 | ||||
-rw-r--r-- | runtime/base/arena_allocator_test.cc | 136 | ||||
-rw-r--r-- | runtime/debugger.cc | 6 | ||||
-rw-r--r-- | runtime/gc/collector/concurrent_copying.cc | 2 | ||||
-rw-r--r-- | runtime/gc/heap.cc | 10 | ||||
-rw-r--r-- | runtime/simulator/Android.mk | 4 | ||||
-rw-r--r-- | runtime/thread-inl.h | 1 | ||||
-rw-r--r-- | runtime/thread.cc | 12 | ||||
-rw-r--r-- | runtime/thread.h | 38 | ||||
-rw-r--r-- | runtime/thread_list.cc | 36 |
16 files changed, 367 insertions, 52 deletions
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index 1e2cfa3e97..c8e3654633 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -152,7 +152,7 @@ art_cflags += \ # The architectures the compiled tools are able to run on. Setting this to 'all' will cause all # architectures to be included. -ART_TARGET_CODEGEN_ARCHS ?= all +ART_TARGET_CODEGEN_ARCHS ?= svelte ART_HOST_CODEGEN_ARCHS ?= all ifeq ($(ART_TARGET_CODEGEN_ARCHS),all) diff --git a/compiler/Android.mk b/compiler/Android.mk index 410b2d05f2..16c6a7b2ce 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -92,6 +92,8 @@ LIBART_COMPILER_SRC_FILES_arm := \ linker/arm/relative_patcher_thumb2.cc \ optimizing/code_generator_arm.cc \ optimizing/dex_cache_array_fixups_arm.cc \ + optimizing/instruction_simplifier_arm.cc \ + optimizing/instruction_simplifier_shared.cc \ optimizing/intrinsics_arm.cc \ utils/arm/assembler_arm.cc \ utils/arm/assembler_arm32.cc \ @@ -109,7 +111,6 @@ LIBART_COMPILER_SRC_FILES_arm64 := \ linker/arm64/relative_patcher_arm64.cc \ optimizing/nodes_arm64.cc \ optimizing/code_generator_arm64.cc \ - optimizing/instruction_simplifier_arm.cc \ optimizing/instruction_simplifier_arm64.cc \ optimizing/instruction_simplifier_shared.cc \ optimizing/intrinsics_arm64.cc \ diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 55e1ab2451..6e5eb6622b 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2456,16 +2456,18 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { __ FloorWS(FTMP, in); __ Mfc1(out, FTMP); - __ LoadConst32(TMP, 1); + if (!IsR6()) { + __ LoadConst32(TMP, -1); + } - // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0; + // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0; __ LoadConst32(AT, std::numeric_limits<int32_t>::max()); __ Bne(AT, out, &finite); __ Mtc1(ZERO, FTMP); if (IsR6()) { __ CmpLtS(FTMP, in, FTMP); - __ Mfc1(AT, FTMP); + __ Mfc1(TMP, FTMP); } else { __ ColtS(in, FTMP); } @@ -2474,28 +2476,26 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { __ Bind(&finite); - // TMP = (0.5f <= (in - out)) ? 1 : 0; + // TMP = (0.5f <= (in - out)) ? -1 : 0; __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); __ SubS(FTMP, in, FTMP); __ Mtc1(AT, half); if (IsR6()) { __ CmpLeS(FTMP, half, FTMP); - __ Mfc1(AT, FTMP); + __ Mfc1(TMP, FTMP); } else { __ ColeS(half, FTMP); } __ Bind(&add); - if (IsR6()) { - __ Selnez(TMP, TMP, AT); - } else { + if (!IsR6()) { __ Movf(TMP, ZERO); } - // Return out += TMP. - __ Addu(out, out, TMP); + // Return out -= TMP. + __ Subu(out, out, TMP); __ Bind(&done); } diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 11357b5596..881bebe002 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -1030,11 +1030,37 @@ ENTRY art_quick_set64_instance END art_quick_set64_instance /* - * Entry from managed code to resolve a string, this stub will allocate a String and deliver an - * exception on error. On success the String is returned. R0 holds the string index. The fast - * path check for hit in strings cache has already been performed. + * Entry from managed code to resolve a string, this stub will + * check the dex cache for a matching string (the fast path), and if not found, + * it will allocate a String and deliver an exception on error. + * On success the String is returned. R0 holds the string index. */ -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + +ENTRY art_quick_resolve_string + ldr r1, [sp] @ load referrer + ldr r1, [r1, #ART_METHOD_DECLARING_CLASS_OFFSET] @ load declaring class + ldr r1, [r1, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache + ubfx r2, r0, #0, #STRING_DEX_CACHE_HASH_BITS + add r1, r1, r2, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT + ldrd r2, r3, [r1] @ load index into r3 and pointer into r2 + cmp r0, r3 + bne .Lart_quick_resolve_string_slow_path +#ifdef USE_READ_BARRIER + ldr r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET] + tst r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + beq .Lart_quick_resolve_string_slow_path +#endif + mov r0, r2 + bx lr + +.Lart_quick_resolve_string_slow_path: + SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC + mov r1, r9 @ pass Thread::Current + mov r3, sp + bl artResolveStringFromCode @ (uint32_t type_idx, Method* method, Thread*) + RESTORE_SAVE_REFS_ONLY_FRAME + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 3e6fbaf64b..202846a679 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1786,11 +1786,48 @@ ENTRY art_quick_set64_static END art_quick_set64_static /* - * Entry from managed code to resolve a string, this stub will allocate a String and deliver an - * exception on error. On success the String is returned. w0 holds the string index. The fast - * path check for hit in strings cache has already been performed. + * Entry from managed code to resolve a string, this stub will + * check the dex cache for a matching string (the fast path), and if not found, + * it will allocate a String and deliver an exception on error. + * On success the String is returned. R0 holds the string index. */ -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + +ENTRY art_quick_resolve_string + ldr x1, [sp] // load referrer + ldr w2, [x1, #ART_METHOD_DECLARING_CLASS_OFFSET] // load declaring class + ldr x1, [x2, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] // load string dex cache + and x2, x0, #STRING_DEX_CACHE_SIZE_MINUS_ONE // get masked string index into x2 + ldr x2, [x1, x2, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT] // load dex cache pair into x2 + cmp x0, x2, lsr #32 // compare against upper 32 bits + bne .Lart_quick_resolve_string_slow_path + ubfx x0, x2, #0, #32 // extract lower 32 bits into x0 +#ifdef USE_READ_BARRIER + // Most common case: GC is not marking. + ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] + cbnz x3, .Lart_quick_resolve_string_marking +#endif + ret + +// Slow path case, the index did not match. +.Lart_quick_resolve_string_slow_path: + SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC + mov x1, xSELF // pass Thread::Current + bl artResolveStringFromCode // (int32_t string_idx, Thread* self) + RESTORE_SAVE_REFS_ONLY_FRAME + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + +// GC is marking case, need to check the mark bit. +.Lart_quick_resolve_string_marking: + ldr x3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + tbnz x3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb + // Save LR so that we can return, also x1 for alignment purposes. + stp x1, xLR, [sp, #-16]! // Save x1, LR. + bl artReadBarrierMark // Get the marked string back. + ldp x1, xLR, [sp], #16 // Restore registers. +.Lart_quick_resolve_string_no_rb: + ret + +END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 2e9682e563..d685ace90f 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1108,7 +1108,44 @@ DEFINE_FUNCTION art_quick_alloc_object_region_tlab ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB END_FUNCTION art_quick_alloc_object_region_tlab -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +DEFINE_FUNCTION art_quick_resolve_string + SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx + movl FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ecx // get referrer + movl ART_METHOD_DECLARING_CLASS_OFFSET(%ecx), %ecx // get declaring class + movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %ecx // get string dex cache + movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %edx + andl %eax, %edx + shl LITERAL(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), %edx + addl %ecx, %edx + movlps (%edx), %xmm0 // load string idx and pointer to xmm0 + movd %xmm0, %ecx // extract pointer + pshufd LITERAL(0x55), %xmm0, %xmm0 // shuffle index into lowest bits + movd %xmm0, %edx // extract index + cmp %edx, %eax + jne .Lart_quick_resolve_string_slow_path +#ifdef USE_READ_BARRIER + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) + jz .Lart_quick_resolve_string_slow_path +#endif + movl %ecx, %eax + RESTORE_SAVE_REFS_ONLY_FRAME + ret + +.Lart_quick_resolve_string_slow_path: + // Outgoing argument set up + subl LITERAL(8), %esp // push padding + CFI_ADJUST_CFA_OFFSET(8) + pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() + CFI_ADJUST_CFA_OFFSET(4) + PUSH eax // pass arg1 + call SYMBOL(artResolveStringFromCode) + addl LITERAL(16), %esp // pop arguments + CFI_ADJUST_CFA_OFFSET(-16) + RESTORE_SAVE_REFS_ONLY_FRAME + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + +END_FUNCTION art_quick_resolve_string + ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 32768b0263..647fe1d660 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1330,7 +1330,36 @@ DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB END_FUNCTION art_quick_alloc_object_initialized_region_tlab -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +DEFINE_FUNCTION art_quick_resolve_string + movq 8(%rsp), %rcx // get referrer + movl ART_METHOD_DECLARING_CLASS_OFFSET(%rcx), %ecx // get declaring class + movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %rcx // get string dex cache + movq LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %rdx + andq %rdi, %rdx + shlq LITERAL(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), %rdx + addq %rcx, %rdx + movq %rax, %rcx + movq (%rdx), %rdx + movq %rdx, %rax + movl %eax, %eax + shrq LITERAL(32), %rdx + cmp %rdx, %rdi + jne .Lart_quick_resolve_string_slow_path +#ifdef USE_READ_BARRIER + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%rax) + jz .Lart_quick_resolve_string_slow_path +#endif + ret +.Lart_quick_resolve_string_slow_path: + SETUP_SAVE_REFS_ONLY_FRAME + movq %rcx, %rax + // Outgoing argument set up + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + call SYMBOL(artResolveStringFromCode) // artResolveStringFromCode(arg0, referrer, Thread*) + RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +END_FUNCTION art_quick_resolve_string + ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER diff --git a/runtime/base/arena_allocator_test.cc b/runtime/base/arena_allocator_test.cc index 9de3cc4312..9932586ed9 100644 --- a/runtime/base/arena_allocator_test.cc +++ b/runtime/base/arena_allocator_test.cc @@ -124,4 +124,140 @@ TEST_F(ArenaAllocatorTest, LargeAllocations) { } } +TEST_F(ArenaAllocatorTest, AllocAlignment) { + ArenaPool pool; + ArenaAllocator arena(&pool); + for (size_t iterations = 0; iterations <= 10; ++iterations) { + for (size_t size = 1; size <= ArenaAllocator::kAlignment + 1; ++size) { + void* allocation = arena.Alloc(size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(allocation)) + << reinterpret_cast<uintptr_t>(allocation); + } + } +} + +TEST_F(ArenaAllocatorTest, ReallocAlignment) { + { + // Case 1: small aligned allocation, aligned extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2; + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = ArenaAllocator::kAlignment * 3; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + // Secondary: expect the same buffer. + EXPECT_EQ(original_allocation, realloc_allocation); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } + + { + // Case 2: small aligned allocation, non-aligned extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2; + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2); + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + // Secondary: expect the same buffer. + EXPECT_EQ(original_allocation, realloc_allocation); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } + + { + // Case 3: small non-aligned allocation, aligned extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2); + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = ArenaAllocator::kAlignment * 4; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + // Secondary: expect the same buffer. + EXPECT_EQ(original_allocation, realloc_allocation); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } + + { + // Case 4: small non-aligned allocation, aligned non-extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2); + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = ArenaAllocator::kAlignment * 3; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + // Secondary: expect the same buffer. + EXPECT_EQ(original_allocation, realloc_allocation); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } + + // The next part is brittle, as the default size for an arena is variable, and we don't know about + // sanitization. + + { + // Case 5: large allocation, aligned extend into next arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = Arena::kDefaultSize - ArenaAllocator::kAlignment * 5; + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = Arena::kDefaultSize + ArenaAllocator::kAlignment * 2; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + // Secondary: expect new buffer. + EXPECT_NE(original_allocation, realloc_allocation); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } + + { + // Case 6: large allocation, non-aligned extend into next arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = Arena::kDefaultSize - + ArenaAllocator::kAlignment * 4 - + ArenaAllocator::kAlignment / 2; + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = Arena::kDefaultSize + + ArenaAllocator::kAlignment * 2 + + ArenaAllocator::kAlignment / 2; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + // Secondary: expect new buffer. + EXPECT_NE(original_allocation, realloc_allocation); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } +} + + } // namespace art diff --git a/runtime/debugger.cc b/runtime/debugger.cc index b4acc27c48..89bebb4ccb 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -4059,7 +4059,7 @@ void Dbg::ExecuteMethodWithoutPendingException(ScopedObjectAccess& soa, DebugInv // Prepare JDWP ids for the reply. JDWP::JdwpTag result_tag = BasicTagFromDescriptor(m->GetShorty()); const bool is_object_result = (result_tag == JDWP::JT_OBJECT); - StackHandleScope<2> hs(soa.Self()); + StackHandleScope<3> hs(soa.Self()); Handle<mirror::Object> object_result = hs.NewHandle(is_object_result ? result.GetL() : nullptr); Handle<mirror::Throwable> exception = hs.NewHandle(soa.Self()->GetException()); soa.Self()->ClearException(); @@ -4100,9 +4100,9 @@ void Dbg::ExecuteMethodWithoutPendingException(ScopedObjectAccess& soa, DebugInv if (exceptionObjectId == 0) { if (m->GetDeclaringClass()->IsStringClass()) { // For string constructors, the new string is remapped to the receiver (stored in ref). - mirror::Object* decoded_ref = soa.Self()->DecodeJObject(ref.get()); + Handle<mirror::Object> decoded_ref = hs.NewHandle(soa.Self()->DecodeJObject(ref.get())); result_value = gRegistry->Add(decoded_ref); - result_tag = TagFromObject(soa, decoded_ref); + result_tag = TagFromObject(soa, decoded_ref.Get()); } else { // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the // object registry. diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 7afe6f9ab4..42816a04f1 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -435,10 +435,8 @@ void ConcurrentCopying::FlipThreadRoots() { gc_barrier_->Init(self, 0); ThreadFlipVisitor thread_flip_visitor(this, heap_->use_tlab_); FlipCallback flip_callback(this); - heap_->ThreadFlipBegin(self); // Sync with JNI critical calls. size_t barrier_count = Runtime::Current()->FlipThreadRoots( &thread_flip_visitor, &flip_callback, this); - heap_->ThreadFlipEnd(self); { ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); gc_barrier_->Increment(self, barrier_count); diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 39f26e7fe2..638c1d841a 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -878,9 +878,13 @@ void Heap::IncrementDisableThreadFlip(Thread* self) { MutexLock mu(self, *thread_flip_lock_); bool has_waited = false; uint64_t wait_start = NanoTime(); - while (thread_flip_running_) { - has_waited = true; - thread_flip_cond_->Wait(self); + if (thread_flip_running_) { + TimingLogger::ScopedTiming split("IncrementDisableThreadFlip", + GetCurrentGcIteration()->GetTimings()); + while (thread_flip_running_) { + has_waited = true; + thread_flip_cond_->Wait(self); + } } ++disable_thread_flip_count_; if (has_waited) { diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk index a34a84100a..e39af2dcd9 100644 --- a/runtime/simulator/Android.mk +++ b/runtime/simulator/Android.mk @@ -22,6 +22,9 @@ LIBART_SIMULATOR_SRC_FILES := \ code_simulator.cc \ code_simulator_arm64.cc +LIBART_SIMULATOR_CFLAGS := \ + -DVIXL_INCLUDE_SIMULATOR_AARCH64 + # $(1): target or host # $(2): ndebug or debug define build-libart-simulator @@ -54,6 +57,7 @@ define build-libart-simulator LOCAL_MODULE_CLASS := SHARED_LIBRARIES LOCAL_SRC_FILES := $$(LIBART_SIMULATOR_SRC_FILES) + LOCAL_CFLAGS := $$(LIBART_SIMULATOR_CFLAGS) ifeq ($$(art_target_or_host),target) $(call set-target-local-clang-vars) diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h index 3aa1fc256d..216d8a7194 100644 --- a/runtime/thread-inl.h +++ b/runtime/thread-inl.h @@ -224,6 +224,7 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() { thread_to_pass = this; } MutexLock mu(thread_to_pass, *Locks::thread_suspend_count_lock_); + ScopedTransitioningToRunnable scoped_transitioning_to_runnable(this); old_state_and_flags.as_int = tls32_.state_and_flags.as_int; DCHECK_EQ(old_state_and_flags.as_struct.state, old_state); while ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) { diff --git a/runtime/thread.cc b/runtime/thread.cc index b35a614e99..79b9f02991 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -1217,10 +1217,8 @@ void Thread::FullSuspendCheck() { ScopedTrace trace(__FUNCTION__); VLOG(threads) << this << " self-suspending"; // Make thread appear suspended to other threads, release mutator_lock_. - tls32_.suspended_at_suspend_check = true; // Transition to suspended and back to runnable, re-acquire share on mutator_lock_. ScopedThreadSuspension(this, kSuspended); - tls32_.suspended_at_suspend_check = false; VLOG(threads) << this << " self-reviving"; } @@ -1433,6 +1431,12 @@ struct StackDumpVisitor : public StackVisitor { if (o == nullptr) { os << "an unknown object"; } else { + if (kUseReadBarrier && Thread::Current()->GetIsGcMarking()) { + // We may call Thread::Dump() in the middle of the CC thread flip and this thread's stack + // may have not been flipped yet and "o" may be a from-space (stale) ref, in which case the + // IdentityHashCode call below will crash. So explicitly mark/forward it here. + o = ReadBarrier::Mark(o); + } if ((o->GetLockWord(false).GetState() == LockWord::kThinLocked) && Locks::mutator_lock_->IsExclusiveHeld(Thread::Current())) { // Getting the identity hashcode here would result in lock inflation and suspension of the @@ -1635,7 +1639,7 @@ Thread::Thread(bool daemon) : tls32_(daemon), wait_monitor_(nullptr), interrupte } tlsPtr_.flip_function = nullptr; tlsPtr_.thread_local_mark_stack = nullptr; - tls32_.suspended_at_suspend_check = false; + tls32_.is_transitioning_to_runnable = false; } bool Thread::IsStillStarting() const { @@ -1773,7 +1777,7 @@ Thread::~Thread() { CHECK(tlsPtr_.checkpoint_function == nullptr); CHECK_EQ(checkpoint_overflow_.size(), 0u); CHECK(tlsPtr_.flip_function == nullptr); - CHECK_EQ(tls32_.suspended_at_suspend_check, false); + CHECK_EQ(tls32_.is_transitioning_to_runnable, false); // Make sure we processed all deoptimization requests. CHECK(tlsPtr_.deoptimization_context_stack == nullptr) << "Missed deoptimization"; diff --git a/runtime/thread.h b/runtime/thread.h index 840b7817f8..1c2d4ab533 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -1085,8 +1085,12 @@ class Thread { return tlsPtr_.nested_signal_state; } - bool IsSuspendedAtSuspendCheck() const { - return tls32_.suspended_at_suspend_check; + bool IsTransitioningToRunnable() const { + return tls32_.is_transitioning_to_runnable; + } + + void SetIsTransitioningToRunnable(bool value) { + tls32_.is_transitioning_to_runnable = value; } void PushVerifier(verifier::MethodVerifier* verifier); @@ -1264,7 +1268,7 @@ class Thread { suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0), daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0), thread_exit_check_count(0), handling_signal_(false), - suspended_at_suspend_check(false), ready_for_debug_invoke(false), + is_transitioning_to_runnable(false), ready_for_debug_invoke(false), debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true), disable_thread_flip_count(0) { } @@ -1306,10 +1310,10 @@ class Thread { // True if signal is being handled by this thread. bool32_t handling_signal_; - // True if the thread is suspended in FullSuspendCheck(). This is - // used to distinguish runnable threads that are suspended due to - // a normal suspend check from other threads. - bool32_t suspended_at_suspend_check; + // True if the thread is in TransitionFromSuspendedToRunnable(). This is used to distinguish the + // non-runnable threads (eg. kNative, kWaiting) that are about to transition to runnable from + // the rest of them. + bool32_t is_transitioning_to_runnable; // True if the thread has been suspended by a debugger event. This is // used to invoke method from the debugger which is only allowed when @@ -1588,6 +1592,26 @@ class ScopedDebugDisallowReadBarriers { Thread* const self_; }; +class ScopedTransitioningToRunnable : public ValueObject { + public: + explicit ScopedTransitioningToRunnable(Thread* self) + : self_(self) { + DCHECK_EQ(self, Thread::Current()); + if (kUseReadBarrier) { + self_->SetIsTransitioningToRunnable(true); + } + } + + ~ScopedTransitioningToRunnable() { + if (kUseReadBarrier) { + self_->SetIsTransitioningToRunnable(false); + } + } + + private: + Thread* const self_; +}; + std::ostream& operator<<(std::ostream& os, const Thread& thread); std::ostream& operator<<(std::ostream& os, const StackedShadowFrameType& thread); diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 419ecec696..688514cd76 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -405,6 +405,8 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, Locks::thread_suspend_count_lock_->AssertNotHeld(self); CHECK_NE(self->GetState(), kRunnable); + collector->GetHeap()->ThreadFlipBegin(self); // Sync with JNI critical calls. + SuspendAllInternal(self, self, nullptr); // Run the flip callback for the collector. @@ -414,26 +416,31 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, collector->RegisterPause(NanoTime() - start_time); // Resume runnable threads. - std::vector<Thread*> runnable_threads; + size_t runnable_thread_count = 0; std::vector<Thread*> other_threads; { + TimingLogger::ScopedTiming split2("ResumeRunnableThreads", collector->GetTimings()); MutexLock mu(self, *Locks::thread_list_lock_); MutexLock mu2(self, *Locks::thread_suspend_count_lock_); --suspend_all_count_; for (const auto& thread : list_) { + // Set the flip function for all threads because Thread::DumpState/DumpJavaStack() (invoked by + // a checkpoint) may cause the flip function to be run for a runnable/suspended thread before + // a runnable thread runs it for itself or we run it for a suspended thread below. + thread->SetFlipFunction(thread_flip_visitor); if (thread == self) { continue; } - // Set the flip function for both runnable and suspended threads - // because Thread::DumpState/DumpJavaStack() (invoked by a - // checkpoint) may cause the flip function to be run for a - // runnable/suspended thread before a runnable threads runs it - // for itself or we run it for a suspended thread below. - thread->SetFlipFunction(thread_flip_visitor); - if (thread->IsSuspendedAtSuspendCheck()) { + // Resume early the threads that were runnable but are suspended just for this thread flip or + // about to transition from non-runnable (eg. kNative at the SOA entry in a JNI function) to + // runnable (both cases waiting inside Thread::TransitionFromSuspendedToRunnable), or waiting + // for the thread flip to end at the JNI critical section entry (kWaitingForGcThreadFlip), + ThreadState state = thread->GetState(); + if (state == kWaitingForGcThreadFlip || + thread->IsTransitioningToRunnable()) { // The thread will resume right after the broadcast. thread->ModifySuspendCount(self, -1, nullptr, false); - runnable_threads.push_back(thread); + ++runnable_thread_count; } else { other_threads.push_back(thread); } @@ -441,8 +448,11 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, Thread::resume_cond_->Broadcast(self); } + collector->GetHeap()->ThreadFlipEnd(self); + // Run the closure on the other threads and let them resume. { + TimingLogger::ScopedTiming split3("FlipOtherThreads", collector->GetTimings()); ReaderMutexLock mu(self, *Locks::mutator_lock_); for (const auto& thread : other_threads) { Closure* flip_func = thread->GetFlipFunction(); @@ -451,11 +461,15 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, } } // Run it for self. - thread_flip_visitor->Run(self); + Closure* flip_func = self->GetFlipFunction(); + if (flip_func != nullptr) { + flip_func->Run(self); + } } // Resume other threads. { + TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings()); MutexLock mu2(self, *Locks::thread_suspend_count_lock_); for (const auto& thread : other_threads) { thread->ModifySuspendCount(self, -1, nullptr, false); @@ -463,7 +477,7 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, Thread::resume_cond_->Broadcast(self); } - return runnable_threads.size() + other_threads.size() + 1; // +1 for self. + return runnable_thread_count + other_threads.size() + 1; // +1 for self. } void ThreadList::SuspendAll(const char* cause, bool long_suspend) { |