Change one read barrier bit to mark bit
Optimization to help slow path performance. When the GC marks an
object through the read barrier slow path. The GC sets the mark bit
in the lock word of that reference. This bit is checked from the
assembly entrypoint the common case is that it is set. If the bit is
set, the read barrier knows the object is already marked and there is
no work to do.
To prevent dirty pages in zygote and image, the bit is set by the
image writer and zygote space creation.
EAAC score (lower is better):
N9: 777 -> 700 (average 31 of runs)
N6P (960000 mhz): 1737.48 -> 1442.31 (average of 25 runs)
Bug: 30162165
Bug: 12687968
Test: N9, N6P booting, test-art-host, test-art-target all with CC
Change-Id: Iae0cacfae221e33151d3c0ab65338d1c822ab63d
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 7a34683..c787dc5 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -39,6 +39,7 @@
#include "gc/accounting/card_table-inl.h"
#include "gc/accounting/heap_bitmap.h"
#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/collector/concurrent_copying.h"
#include "gc/heap.h"
#include "gc/space/large_object_space.h"
#include "gc/space/space-inl.h"
@@ -1823,6 +1824,11 @@
const auto it = saved_hashcode_map_.find(obj);
dst->SetLockWord(it != saved_hashcode_map_.end() ?
LockWord::FromHashCode(it->second, 0u) : LockWord::Default(), false);
+ if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) {
+ // Treat all of the objects in the image as marked to avoid unnecessary dirty pages. This is
+ // safe since we mark all of the objects that may reference non immune objects as gray.
+ CHECK(dst->AtomicSetMarkBit(0, 1));
+ }
FixupObject(obj, dst);
}
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 626a975..7d13656 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -217,8 +217,7 @@
// uint32 = typeof(lockword_)
// Subtract read barrier bits since we want these to remain 0, or else it may result in DCHECK
// failures due to invalid read barrier bits during object field reads.
- static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits -
- LockWord::kReadBarrierStateSize;
+ static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits - LockWord::kGCStateSize;
// 111000.....0
static const size_t kBinMask = ((static_cast<size_t>(1) << kBinBits) - 1) << kBinShift;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index f3a09fd..bccde49 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -609,6 +609,8 @@
DCHECK_NE(obj_.reg(), LR);
DCHECK_NE(obj_.reg(), WSP);
DCHECK_NE(obj_.reg(), WZR);
+ // WIP0 is used by the slow path as a temp, it can not be the object register.
+ DCHECK_NE(obj_.reg(), IP0);
DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
// "Compact" slow path, saving two moves.
//
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 34d3158..8828dce 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -520,7 +520,7 @@
ldr r2, [r9, #THREAD_ID_OFFSET]
ldrex r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
mov r3, r1
- and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits
+ and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits
cbnz r3, .Lnot_unlocked @ already thin locked
@ unlocked case - r1: original lock word that's zero except for the read barrier bits.
orr r2, r1, r2 @ r2 holds thread id with count of 0 with preserved read barrier bits
@@ -536,9 +536,9 @@
cbnz r2, .Lslow_lock @ lock word and self thread id's match -> recursive lock
@ else contention, go to slow path
mov r3, r1 @ copy the lock word to check count overflow.
- and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits.
+ and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits.
add r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count in lock word placing in r2 to check overflow
- lsr r3, r2, LOCK_WORD_READ_BARRIER_STATE_SHIFT @ if either of the upper two bits (28-29) are set, we overflowed.
+ lsr r3, r2, #LOCK_WORD_GC_STATE_SHIFT @ if the first gc state bit is set, we overflowed.
cbnz r3, .Lslow_lock @ if we overflow the count go slow path
add r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count for real
strex r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
@@ -581,17 +581,17 @@
cbnz r2, .Lslow_unlock @ if either of the top two bits are set, go slow path
ldr r2, [r9, #THREAD_ID_OFFSET]
mov r3, r1 @ copy lock word to check thread id equality
- and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits
+ and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits
eor r3, r3, r2 @ lock_word.ThreadId() ^ self->ThreadId()
uxth r3, r3 @ zero top 16 bits
cbnz r3, .Lslow_unlock @ do lock word and self thread id's match?
mov r3, r1 @ copy lock word to detect transition to unlocked
- and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits
+ and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits
cmp r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
bpl .Lrecursive_thin_unlock
@ transition to unlocked
mov r3, r1
- and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK @ r3: zero except for the preserved read barrier bits
+ and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED @ r3: zero except for the preserved gc bits
dmb ish @ full (LoadStore|StoreStore) memory barrier
#ifndef USE_READ_BARRIER
str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
@@ -1772,6 +1772,20 @@
*/
.macro READ_BARRIER_MARK_REG name, reg
ENTRY \name
+ // Null check so that we can load the lock word.
+ cmp \reg, #0
+ beq .Lret_rb_\name
+ // Check lock word for mark bit, if marked return.
+ push {r0}
+ ldr r0, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ and r0, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+ cbz r0, .Lslow_rb_\name
+ // Restore LR and return.
+ pop {r0}
+ bx lr
+
+.Lslow_rb_\name:
+ pop {r0}
push {r0-r4, r9, r12, lr} @ save return address and core caller-save registers
.cfi_adjust_cfa_offset 32
.cfi_rel_offset r0, 0
@@ -1831,6 +1845,8 @@
.endif
.endif
pop {r0-r4, r9, r12, pc} @ restore caller-save registers and return
+.Lret_rb_\name:
+ bx lr
END \name
.endm
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index a5be52d..51094d5 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1090,7 +1090,7 @@
ldr w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
ldxr w1, [x4]
mov x3, x1
- and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits
+ and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits
cbnz w3, .Lnot_unlocked // already thin locked
// unlocked case - x1: original lock word that's zero except for the read barrier bits.
orr x2, x1, x2 // x2 holds thread id with count of 0 with preserved read barrier bits
@@ -1106,9 +1106,9 @@
cbnz w2, .Lslow_lock // lock word and self thread id's match -> recursive lock
// else contention, go to slow path
mov x3, x1 // copy the lock word to check count overflow.
- and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits.
+ and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits.
add w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE // increment count in lock word placing in w2 to check overflow
- lsr w3, w2, LOCK_WORD_READ_BARRIER_STATE_SHIFT // if either of the upper two bits (28-29) are set, we overflowed.
+ lsr w3, w2, #LOCK_WORD_GC_STATE_SHIFT // if the first gc state bit is set, we overflowed.
cbnz w3, .Lslow_lock // if we overflow the count go slow path
add w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE // increment count for real
stxr w3, w2, [x4]
@@ -1152,17 +1152,17 @@
cbnz w2, .Lslow_unlock // if either of the top two bits are set, go slow path
ldr w2, [xSELF, #THREAD_ID_OFFSET]
mov x3, x1 // copy lock word to check thread id equality
- and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits
+ and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits
eor w3, w3, w2 // lock_word.ThreadId() ^ self->ThreadId()
uxth w3, w3 // zero top 16 bits
cbnz w3, .Lslow_unlock // do lock word and self thread id's match?
mov x3, x1 // copy lock word to detect transition to unlocked
- and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits
+ and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits
cmp w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
bpl .Lrecursive_thin_unlock
// transition to unlocked
mov x3, x1
- and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK // w3: zero except for the preserved read barrier bits
+ and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED // w3: zero except for the preserved read barrier bits
dmb ish // full (LoadStore|StoreStore) memory barrier
#ifndef USE_READ_BARRIER
str w3, [x4]
@@ -1791,12 +1791,20 @@
ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array
// Load the class (x2)
ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
- // Read barrier for class load.
+
+ // Most common case: GC is not marking.
ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
- cbnz x3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+ cbnz x3, .Lart_quick_alloc_object_region_tlab_marking
+.Lart_quick_alloc_object_region_tlab_do_allocation:
ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+.Lart_quick_alloc_object_region_tlab_marking:
+ // GC is marking, check the lock word of the class for the mark bit.
+ // If the class is null, go slow path. The check is required to read the lock word.
+ cbz w2, .Lart_quick_alloc_object_region_tlab_slow_path
+ // Class is not null, check mark bit in lock word.
+ ldr w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ // If the bit is not zero, do the allocation.
+ tbnz w3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_alloc_object_region_tlab_do_allocation
// The read barrier slow path. Mark
// the class.
stp x0, x1, [sp, #-32]! // Save registers (x0, x1, lr).
@@ -1807,7 +1815,7 @@
ldp x0, x1, [sp, #0] // Restore registers.
ldr xLR, [sp, #16]
add sp, sp, #32
- b .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+ b .Lart_quick_alloc_object_region_tlab_do_allocation
.Lart_quick_alloc_object_region_tlab_slow_path:
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // Save callee saves in case of GC.
mov x2, xSELF // Pass Thread::Current.
@@ -2265,6 +2273,8 @@
*/
.macro READ_BARRIER_MARK_REG name, wreg, xreg
ENTRY \name
+ // Reference is null, no work to do at all.
+ cbz \wreg, .Lret_rb_\name
/*
* Allocate 46 stack slots * 8 = 368 bytes:
* - 20 slots for core registers X0-X19
@@ -2272,6 +2282,11 @@
* - 1 slot for return address register XLR
* - 1 padding slot for 16-byte stack alignment
*/
+ // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
+ ldr wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ tbz wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name
+ ret
+.Lslow_path_rb_\name:
// Save all potentially live caller-save core registers.
stp x0, x1, [sp, #-368]!
.cfi_adjust_cfa_offset 368
@@ -2360,6 +2375,7 @@
.cfi_restore x30
add sp, sp, #368
.cfi_adjust_cfa_offset -368
+.Lret_rb_\name:
ret
END \name
.endm
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 77e04e7..6c9239f 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1028,7 +1028,13 @@
movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
// Read barrier for class load.
cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
- jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+ // Null check so that we can load the lock word.
+ testl %edx, %edx
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+ // Check the mark bit, if it is 1 return.
+ testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
@@ -1065,7 +1071,7 @@
test LITERAL(LOCK_WORD_STATE_MASK), %ecx // test the 2 high bits.
jne .Lslow_lock // slow path if either of the two high bits are set.
movl %ecx, %edx // save lock word (edx) to keep read barrier bits.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits.
test %ecx, %ecx
jnz .Lalready_thin // lock word contains a thin lock
// unlocked case - edx: original lock word, eax: obj.
@@ -1081,9 +1087,9 @@
cmpw %cx, %dx // do we hold the lock already?
jne .Lslow_lock
movl %edx, %ecx // copy the lock word to check count overflow.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the read barrier bits.
addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count for overflow check.
- test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if either of the upper two bits (28-29) are set.
+ test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // overflowed if the first gc state bit is set.
jne .Lslow_lock // count overflowed so go slow
movl %eax, %ecx // save obj to use eax for cmpxchg.
movl %edx, %eax // copy the lock word as the old val for cmpxchg.
@@ -1137,13 +1143,13 @@
cmpw %cx, %dx // does the thread id match?
jne .Lslow_unlock
movl %ecx, %edx // copy the lock word to detect new count of 0.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits.
cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
jae .Lrecursive_thin_unlock
// update lockword, cmpxchg necessary for read barrier bits.
movl %eax, %edx // edx: obj
movl %ecx, %eax // eax: old lock word.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // ecx: new lock word zero except original rb bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original rb bits.
#ifndef USE_READ_BARRIER
movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
#else
@@ -1923,6 +1929,14 @@
// convention (e.g. standard callee-save registers are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
+ // Null check so that we can load the lock word.
+ test REG_VAR(reg), REG_VAR(reg)
+ jz .Lret_rb_\name
+ // Check the mark bit, if it is 1 return.
+ testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
+ jz .Lslow_rb_\name
+ ret
+.Lslow_rb_\name:
// Save all potentially live caller-save core registers.
PUSH eax
PUSH ecx
@@ -1970,6 +1984,7 @@
POP_REG_NE edx, RAW_VAR(reg)
POP_REG_NE ecx, RAW_VAR(reg)
POP_REG_NE eax, RAW_VAR(reg)
+.Lret_rb_\name:
ret
END_FUNCTION VAR(name)
END_MACRO
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 784ec39..127d7db 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -989,7 +989,13 @@
// Load the class
movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
- jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+ // Null check so that we can load the lock word.
+ testl %edx, %edx
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+ // Check the mark bit, if it is 1 return.
+ testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+ jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
@@ -1022,7 +1028,7 @@
test LITERAL(LOCK_WORD_STATE_MASK), %ecx // Test the 2 high bits.
jne .Lslow_lock // Slow path if either of the two high bits are set.
movl %ecx, %edx // save lock word (edx) to keep read barrier bits.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits.
test %ecx, %ecx
jnz .Lalready_thin // Lock word contains a thin lock.
// unlocked case - edx: original lock word, edi: obj.
@@ -1037,9 +1043,9 @@
cmpw %cx, %dx // do we hold the lock already?
jne .Lslow_lock
movl %edx, %ecx // copy the lock word to check count overflow.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits.
addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count
- test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if either of the upper two bits (28-29) are set
+ test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if the upper bit (28) is set
jne .Lslow_lock // count overflowed so go slow
movl %edx, %eax // copy the lock word as the old val for cmpxchg.
addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real.
@@ -1074,12 +1080,12 @@
cmpw %cx, %dx // does the thread id match?
jne .Lslow_unlock
movl %ecx, %edx // copy the lock word to detect new count of 0.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx // zero the read barrier bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits.
cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
jae .Lrecursive_thin_unlock
// update lockword, cmpxchg necessary for read barrier bits.
movl %ecx, %eax // eax: old lock word.
- andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // ecx: new lock word zero except original rb bits.
+ andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original gc bits.
#ifndef USE_READ_BARRIER
movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
#else
@@ -1833,6 +1839,14 @@
// convention (e.g. standard callee-save registers are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
+ // Null check so that we can load the lock word.
+ testq REG_VAR(reg), REG_VAR(reg)
+ jz .Lret_rb_\name
+ // Check the mark bit, if it is 1 return.
+ testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
+ jz .Lslow_rb_\name
+ ret
+.Lslow_rb_\name:
// Save all potentially live caller-save core registers.
PUSH rax
PUSH rcx
@@ -1897,6 +1911,7 @@
POP_REG_NE rdx, RAW_VAR(reg)
POP_REG_NE rcx, RAW_VAR(reg)
POP_REG_NE rax, RAW_VAR(reg)
+.Lret_rb_\name:
ret
END_FUNCTION VAR(name)
END_MACRO
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 4019a5b..1a7cb36 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -154,11 +154,29 @@
}
inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
+ mirror::Object* ret;
+ // TODO: Delete GetMarkBit check when all of the callers properly check the bit. Remaining caller
+ // is array allocations.
+ if (from_ref == nullptr || from_ref->GetMarkBit()) {
+ return from_ref;
+ }
// TODO: Consider removing this check when we are done investigating slow paths. b/30162165
if (UNLIKELY(mark_from_read_barrier_measurements_)) {
- return MarkFromReadBarrierWithMeasurements(from_ref);
+ ret = MarkFromReadBarrierWithMeasurements(from_ref);
+ } else {
+ ret = Mark(from_ref);
}
- return Mark(from_ref);
+ if (LIKELY(!rb_mark_bit_stack_full_ && ret->AtomicSetMarkBit(0, 1))) {
+ // If the mark stack is full, we may temporarily go to mark and back to unmarked. Seeing both
+ // values are OK since the only race is doing an unnecessary Mark.
+ if (!rb_mark_bit_stack_->AtomicPushBack(ret)) {
+ // Mark stack is full, set the bit back to zero.
+ CHECK(ret->AtomicSetMarkBit(1, 0));
+ // Set rb_mark_bit_stack_full_, this is racy but OK since AtomicPushBack is thread safe.
+ rb_mark_bit_stack_full_ = true;
+ }
+ }
+ return ret;
}
inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index d7221e4..071537d 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -42,9 +42,6 @@
namespace collector {
static constexpr size_t kDefaultGcMarkStackSize = 2 * MB;
-// If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty
-// pages.
-static constexpr bool kGrayDirtyImmuneObjects = true;
// If kFilterModUnionCards then we attempt to filter cards that don't need to be dirty in the mod
// union table. Disabled since it does not seem to help the pause much.
static constexpr bool kFilterModUnionCards = kIsDebugBuild;
@@ -52,6 +49,9 @@
// ConcurrentCopying::Scan. May be used to diagnose possibly unnecessary read barriers.
// Only enabled for kIsDebugBuild to avoid performance hit.
static constexpr bool kDisallowReadBarrierDuringScan = kIsDebugBuild;
+// Slow path mark stack size, increase this if the stack is getting full and it is causing
+// performance problems.
+static constexpr size_t kReadBarrierMarkStackSize = 512 * KB;
ConcurrentCopying::ConcurrentCopying(Heap* heap,
const std::string& name_prefix,
@@ -63,6 +63,10 @@
gc_mark_stack_(accounting::ObjectStack::Create("concurrent copying gc mark stack",
kDefaultGcMarkStackSize,
kDefaultGcMarkStackSize)),
+ rb_mark_bit_stack_(accounting::ObjectStack::Create("rb copying gc mark stack",
+ kReadBarrierMarkStackSize,
+ kReadBarrierMarkStackSize)),
+ rb_mark_bit_stack_full_(false),
mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock),
thread_running_gc_(nullptr),
is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false),
@@ -187,6 +191,7 @@
CHECK(false_gray_stack_.empty());
}
+ rb_mark_bit_stack_full_ = false;
mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_;
if (measure_read_barrier_slow_path_) {
rb_slow_path_ns_.StoreRelaxed(0);
@@ -914,9 +919,9 @@
}
collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
if (kUseBakerReadBarrier) {
- CHECK(ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+ CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
<< "Ref " << ref << " " << PrettyTypeOf(ref)
- << " has non-white rb_ptr " << ref->GetReadBarrierPointer();
+ << " has non-white rb_ptr ";
}
}
@@ -982,7 +987,7 @@
VerifyNoFromSpaceRefsFieldVisitor visitor(collector);
obj->VisitReferences(visitor, visitor);
if (kUseBakerReadBarrier) {
- CHECK(obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+ CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
<< "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
}
}
@@ -2243,6 +2248,15 @@
}
}
}
+ if (kUseBakerReadBarrier) {
+ TimingLogger::ScopedTiming split("EmptyRBMarkBitStack", GetTimings());
+ DCHECK(rb_mark_bit_stack_.get() != nullptr);
+ const auto* limit = rb_mark_bit_stack_->End();
+ for (StackReference<mirror::Object>* it = rb_mark_bit_stack_->Begin(); it != limit; ++it) {
+ CHECK(it->AsMirrorPtr()->AtomicSetMarkBit(1, 0));
+ }
+ rb_mark_bit_stack_->Reset();
+ }
}
if (measure_read_barrier_slow_path_) {
MutexLock mu(self, rb_slow_path_histogram_lock_);
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 72112fa..a862802 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -57,6 +57,9 @@
static constexpr bool kEnableFromSpaceAccountingCheck = kIsDebugBuild;
// Enable verbose mode.
static constexpr bool kVerboseMode = false;
+ // If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty
+ // pages.
+ static constexpr bool kGrayDirtyImmuneObjects = true;
ConcurrentCopying(Heap* heap,
const std::string& name_prefix = "",
@@ -230,6 +233,8 @@
space::RegionSpace* region_space_; // The underlying region space.
std::unique_ptr<Barrier> gc_barrier_;
std::unique_ptr<accounting::ObjectStack> gc_mark_stack_;
+ std::unique_ptr<accounting::ObjectStack> rb_mark_bit_stack_;
+ bool rb_mark_bit_stack_full_;
std::vector<mirror::Object*> false_gray_stack_ GUARDED_BY(mark_stack_lock_);
Mutex mark_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
std::vector<accounting::ObjectStack*> revoked_mark_stacks_
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index a92cb24..5485cd2 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2538,6 +2538,17 @@
AddSpace(zygote_space_);
non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
AddSpace(non_moving_space_);
+ if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) {
+ // Treat all of the objects in the zygote as marked to avoid unnecessary dirty pages. This is
+ // safe since we mark all of the objects that may reference non immune objects as gray.
+ zygote_space_->GetLiveBitmap()->VisitMarkedRange(
+ reinterpret_cast<uintptr_t>(zygote_space_->Begin()),
+ reinterpret_cast<uintptr_t>(zygote_space_->Limit()),
+ [](mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+ CHECK(obj->AtomicSetMarkBit(0, 1));
+ });
+ }
+
// Create the zygote space mod union table.
accounting::ModUnionTable* mod_union_table =
new accounting::ModUnionTableCardCache("zygote space mod-union table", this,
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 5d62b59..f5d5181 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -74,12 +74,22 @@
DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kStateMaskShifted)))
#define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28
DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_READ_BARRIER_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kReadBarrierStateShift)))
-#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x30000000
+#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x10000000
DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShifted)))
-#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xcfffffff
+#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xefffffff
DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled)))
#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_THIN_LOCK_COUNT_ONE), (static_cast<int32_t>(art::LockWord::kThinLockCountOne)))
+#define LOCK_WORD_GC_STATE_MASK_SHIFTED 0x30000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShifted)))
+#define LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED 0xcfffffff
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShiftedToggled)))
+#define LOCK_WORD_GC_STATE_SHIFT 28
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_GC_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kGCStateShift)))
+#define LOCK_WORD_MARK_BIT_SHIFT 29
+DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_MARK_BIT_SHIFT), (static_cast<int32_t>(art::LockWord::kMarkBitStateShift)))
+#define LOCK_WORD_MARK_BIT_MASK_SHIFTED 0x20000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_MARK_BIT_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kMarkBitStateMaskShifted)))
#define OBJECT_ALIGNMENT_MASK 0x7
DEFINE_CHECK_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), (static_cast<size_t>(art::kObjectAlignment - 1)))
#define OBJECT_ALIGNMENT_MASK_TOGGLED 0xfffffff8
diff --git a/runtime/globals.h b/runtime/globals.h
index 0b44c47..9045d40 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -47,7 +47,8 @@
}
// Required object alignment
-static constexpr size_t kObjectAlignment = 8;
+static constexpr size_t kObjectAlignmentShift = 3;
+static constexpr size_t kObjectAlignment = 1u << kObjectAlignmentShift;
static constexpr size_t kLargeObjectAlignment = kPageSize;
// Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't.
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index 341501b..4a2a293 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -43,17 +43,15 @@
inline size_t LockWord::ForwardingAddress() const {
DCHECK_EQ(GetState(), kForwardingAddress);
- return value_ << kStateSize;
+ return value_ << kForwardingAddressShift;
}
inline LockWord::LockWord() : value_(0) {
DCHECK_EQ(GetState(), kUnlocked);
}
-inline LockWord::LockWord(Monitor* mon, uint32_t rb_state)
- : value_(mon->GetMonitorId() | (rb_state << kReadBarrierStateShift) |
- (kStateFat << kStateShift)) {
- DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+inline LockWord::LockWord(Monitor* mon, uint32_t gc_state)
+ : value_(mon->GetMonitorId() | (gc_state << kGCStateShift) | (kStateFat << kStateShift)) {
#ifndef __LP64__
DCHECK_ALIGNED(mon, kMonitorIdAlignment);
#endif
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 5d0d204..538b6eb 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -35,27 +35,27 @@
* the state. The four possible states are fat locked, thin/unlocked, hash code, and forwarding
* address. When the lock word is in the "thin" state and its bits are formatted as follows:
*
- * |33|22|222222221111|1111110000000000|
- * |10|98|765432109876|5432109876543210|
- * |00|rb| lock count |thread id owner |
+ * |33|2|2|222222221111|1111110000000000|
+ * |10|9|8|765432109876|5432109876543210|
+ * |00|m|r| lock count |thread id owner |
*
* When the lock word is in the "fat" state and its bits are formatted as follows:
*
- * |33|22|2222222211111111110000000000|
- * |10|98|7654321098765432109876543210|
- * |01|rb| MonitorId |
+ * |33|2|2|2222222211111111110000000000|
+ * |10|9|8|7654321098765432109876543210|
+ * |01|m|r| MonitorId |
*
* When the lock word is in hash state and its bits are formatted as follows:
*
- * |33|22|2222222211111111110000000000|
- * |10|98|7654321098765432109876543210|
- * |10|rb| HashCode |
+ * |33|2|2|2222222211111111110000000000|
+ * |10|9|8|7654321098765432109876543210|
+ * |10|m|r| HashCode |
*
- * When the lock word is in fowarding address state and its bits are formatted as follows:
+ * When the lock word is in forwarding address state and its bits are formatted as follows:
*
- * |33|22|2222222211111111110000000000|
- * |10|98|7654321098765432109876543210|
- * |11| ForwardingAddress |
+ * |33|2|22222222211111111110000000000|
+ * |10|9|87654321098765432109876543210|
+ * |11|0| ForwardingAddress |
*
* The rb bits store the read barrier state.
*/
@@ -64,11 +64,13 @@
enum SizeShiftsAndMasks { // private marker to avoid generate-operator-out.py from processing.
// Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
kStateSize = 2,
- kReadBarrierStateSize = 2,
+ kReadBarrierStateSize = 1,
+ kMarkBitStateSize = 1,
// Number of bits to encode the thin lock owner.
kThinLockOwnerSize = 16,
// Remaining bits are the recursive lock count.
- kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize,
+ kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize -
+ kMarkBitStateSize,
// Thin lock bits. Owner in lowest bits.
kThinLockOwnerShift = 0,
@@ -81,25 +83,43 @@
kThinLockCountOne = 1 << kThinLockCountShift, // == 65536 (0x10000)
// State in the highest bits.
- kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift,
+ kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift +
+ kMarkBitStateSize,
kStateMask = (1 << kStateSize) - 1,
kStateMaskShifted = kStateMask << kStateShift,
kStateThinOrUnlocked = 0,
kStateFat = 1,
kStateHash = 2,
kStateForwardingAddress = 3,
+
+ // Read barrier bit.
kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
kReadBarrierStateMask = (1 << kReadBarrierStateSize) - 1,
kReadBarrierStateMaskShifted = kReadBarrierStateMask << kReadBarrierStateShift,
kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted,
+ // Mark bit.
+ kMarkBitStateShift = kReadBarrierStateSize + kReadBarrierStateShift,
+ kMarkBitStateMask = (1 << kMarkBitStateSize) - 1,
+ kMarkBitStateMaskShifted = kMarkBitStateMask << kMarkBitStateShift,
+ kMarkBitStateMaskShiftedToggled = ~kMarkBitStateMaskShifted,
+
+ // GC state is mark bit and read barrier state.
+ kGCStateSize = kReadBarrierStateSize + kMarkBitStateSize,
+ kGCStateShift = kReadBarrierStateShift,
+ kGCStateMaskShifted = kReadBarrierStateMaskShifted | kMarkBitStateMaskShifted,
+ kGCStateMaskShiftedToggled = ~kGCStateMaskShifted,
+
// When the state is kHashCode, the non-state bits hold the hashcode.
// Note Object.hashCode() has the hash code layout hardcoded.
kHashShift = 0,
- kHashSize = 32 - kStateSize - kReadBarrierStateSize,
+ kHashSize = 32 - kStateSize - kReadBarrierStateSize - kMarkBitStateSize,
kHashMask = (1 << kHashSize) - 1,
kMaxHash = kHashMask,
+ // Forwarding address shift.
+ kForwardingAddressShift = kObjectAlignmentShift,
+
kMonitorIdShift = kHashShift,
kMonitorIdSize = kHashSize,
kMonitorIdMask = kHashMask,
@@ -108,31 +128,31 @@
kMaxMonitorId = kMaxHash
};
- static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t rb_state) {
+ static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t gc_state) {
CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockMaxOwner));
CHECK_LE(count, static_cast<uint32_t>(kThinLockMaxCount));
- DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
- return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift) |
- (rb_state << kReadBarrierStateShift) |
+ // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U);
+ return LockWord((thread_id << kThinLockOwnerShift) |
+ (count << kThinLockCountShift) |
+ (gc_state << kGCStateShift) |
(kStateThinOrUnlocked << kStateShift));
}
static LockWord FromForwardingAddress(size_t target) {
DCHECK_ALIGNED(target, (1 << kStateSize));
- return LockWord((target >> kStateSize) | (kStateForwardingAddress << kStateShift));
+ return LockWord((target >> kForwardingAddressShift) | (kStateForwardingAddress << kStateShift));
}
- static LockWord FromHashCode(uint32_t hash_code, uint32_t rb_state) {
+ static LockWord FromHashCode(uint32_t hash_code, uint32_t gc_state) {
CHECK_LE(hash_code, static_cast<uint32_t>(kMaxHash));
- DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+ // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U);
return LockWord((hash_code << kHashShift) |
- (rb_state << kReadBarrierStateShift) |
+ (gc_state << kGCStateShift) |
(kStateHash << kStateShift));
}
- static LockWord FromDefault(uint32_t rb_state) {
- DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
- return LockWord(rb_state << kReadBarrierStateShift);
+ static LockWord FromDefault(uint32_t gc_state) {
+ return LockWord(gc_state << kGCStateShift);
}
static bool IsDefault(LockWord lw) {
@@ -154,7 +174,7 @@
LockState GetState() const {
CheckReadBarrierState();
if ((!kUseReadBarrier && UNLIKELY(value_ == 0)) ||
- (kUseReadBarrier && UNLIKELY((value_ & kReadBarrierStateMaskShiftedToggled) == 0))) {
+ (kUseReadBarrier && UNLIKELY((value_ & kGCStateMaskShiftedToggled) == 0))) {
return kUnlocked;
} else {
uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
@@ -176,6 +196,10 @@
return (value_ >> kReadBarrierStateShift) & kReadBarrierStateMask;
}
+ uint32_t GCState() const {
+ return (value_ & kGCStateMaskShifted) >> kGCStateShift;
+ }
+
void SetReadBarrierState(uint32_t rb_state) {
DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
@@ -184,6 +208,19 @@
value_ |= (rb_state & kReadBarrierStateMask) << kReadBarrierStateShift;
}
+
+ uint32_t MarkBitState() const {
+ return (value_ >> kMarkBitStateShift) & kMarkBitStateMask;
+ }
+
+ void SetMarkBitState(uint32_t mark_bit) {
+ DCHECK_EQ(mark_bit & ~kMarkBitStateMask, 0U);
+ DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
+ // Clear and or the bits.
+ value_ &= kMarkBitStateMaskShiftedToggled;
+ value_ |= mark_bit << kMarkBitStateShift;
+ }
+
// Return the owner thin lock thread id.
uint32_t ThinLockOwner() const;
@@ -197,7 +234,7 @@
size_t ForwardingAddress() const;
// Constructor a lock word for inflation to use a Monitor.
- LockWord(Monitor* mon, uint32_t rb_state);
+ LockWord(Monitor* mon, uint32_t gc_state);
// Return the hash code stored in the lock word, must be kHashCode state.
int32_t GetHashCode() const;
@@ -207,7 +244,7 @@
if (kIncludeReadBarrierState) {
return lw1.GetValue() == lw2.GetValue();
}
- return lw1.GetValueWithoutReadBarrierState() == lw2.GetValueWithoutReadBarrierState();
+ return lw1.GetValueWithoutGCState() == lw2.GetValueWithoutGCState();
}
void Dump(std::ostream& os) {
@@ -248,9 +285,9 @@
return value_;
}
- uint32_t GetValueWithoutReadBarrierState() const {
+ uint32_t GetValueWithoutGCState() const {
CheckReadBarrierState();
- return value_ & ~(kReadBarrierStateMask << kReadBarrierStateShift);
+ return value_ & kGCStateMaskShiftedToggled;
}
// Only Object should be converting LockWords to/from uints.
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 0592c6c..061bee1 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -147,10 +147,21 @@
#endif
}
+inline uint32_t Object::GetMarkBit() {
+#ifdef USE_READ_BARRIER
+ DCHECK(kUseBakerReadBarrier);
+ return GetLockWord(false).MarkBitState();
+#else
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+#endif
+}
+
inline void Object::SetReadBarrierPointer(Object* rb_ptr) {
#ifdef USE_BAKER_READ_BARRIER
DCHECK(kUseBakerReadBarrier);
DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+ DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
LockWord lw = GetLockWord(false);
lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
SetLockWord(lw, false);
@@ -173,6 +184,8 @@
DCHECK(kUseBakerReadBarrier);
DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U);
DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+ DCHECK_NE(expected_rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
+ DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
LockWord expected_lw;
LockWord new_lw;
do {
@@ -216,6 +229,24 @@
#endif
}
+inline bool Object::AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit) {
+ LockWord expected_lw;
+ LockWord new_lw;
+ do {
+ LockWord lw = GetLockWord(false);
+ if (UNLIKELY(lw.MarkBitState() != expected_mark_bit)) {
+ // Lost the race.
+ return false;
+ }
+ expected_lw = lw;
+ new_lw = lw;
+ new_lw.SetMarkBitState(mark_bit);
+ // Since this is only set from the mutator, we can use the non release Cas.
+ } while (!CasLockWordWeakRelaxed(expected_lw, new_lw));
+ return true;
+}
+
+
inline void Object::AssertReadBarrierPointer() const {
if (kUseBakerReadBarrier) {
Object* obj = const_cast<Object*>(this);
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 701c600..13c536e 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -163,8 +163,7 @@
case LockWord::kUnlocked: {
// Try to compare and swap in a new hash, if we succeed we will return the hash on the next
// loop iteration.
- LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(),
- lw.ReadBarrierState());
+ LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(), lw.GCState());
DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode);
if (const_cast<Object*>(this)->CasLockWordWeakRelaxed(lw, hash_word)) {
return hash_word.GetHashCode();
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index a4bdbad..5b129bf 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -93,6 +93,7 @@
template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_);
+ // TODO: Clean this up and change to return int32_t
Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_);
#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
@@ -103,6 +104,12 @@
template<bool kCasRelease = false>
ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
SHARED_REQUIRES(Locks::mutator_lock_);
+
+ ALWAYS_INLINE uint32_t GetMarkBit() SHARED_REQUIRES(Locks::mutator_lock_);
+
+ ALWAYS_INLINE bool AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit)
+ SHARED_REQUIRES(Locks::mutator_lock_);
+
void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_);
// The verifier treats all interfaces as java.lang.Object and relies on runtime checks in
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index bf9f931..e863ea9 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -155,7 +155,7 @@
return false;
}
}
- LockWord fat(this, lw.ReadBarrierState());
+ LockWord fat(this, lw.GCState());
// Publish the updated lock word, which may race with other threads.
bool success = GetObject()->CasLockWordWeakSequentiallyConsistent(lw, fat);
// Lock profiling.
@@ -774,20 +774,21 @@
return false;
}
// Deflate to a thin lock.
- LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_,
- lw.ReadBarrierState());
+ LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(),
+ monitor->lock_count_,
+ lw.GCState());
// Assume no concurrent read barrier state changes as mutators are suspended.
obj->SetLockWord(new_lw, false);
VLOG(monitor) << "Deflated " << obj << " to thin lock " << owner->GetTid() << " / "
<< monitor->lock_count_;
} else if (monitor->HasHashCode()) {
- LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.ReadBarrierState());
+ LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.GCState());
// Assume no concurrent read barrier state changes as mutators are suspended.
obj->SetLockWord(new_lw, false);
VLOG(monitor) << "Deflated " << obj << " to hash monitor " << monitor->GetHashCode();
} else {
// No lock and no hash, just put an empty lock word inside the object.
- LockWord new_lw = LockWord::FromDefault(lw.ReadBarrierState());
+ LockWord new_lw = LockWord::FromDefault(lw.GCState());
// Assume no concurrent read barrier state changes as mutators are suspended.
obj->SetLockWord(new_lw, false);
VLOG(monitor) << "Deflated" << obj << " to empty lock word";
@@ -876,7 +877,7 @@
LockWord lock_word = h_obj->GetLockWord(true);
switch (lock_word.GetState()) {
case LockWord::kUnlocked: {
- LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.ReadBarrierState()));
+ LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.GCState()));
if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) {
AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
// CasLockWord enforces more than the acquire ordering we need here.
@@ -890,8 +891,9 @@
// We own the lock, increase the recursion count.
uint32_t new_count = lock_word.ThinLockCount() + 1;
if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
- LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count,
- lock_word.ReadBarrierState()));
+ LockWord thin_locked(LockWord::FromThinLockId(thread_id,
+ new_count,
+ lock_word.GCState()));
if (!kUseReadBarrier) {
h_obj->SetLockWord(thin_locked, true);
AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */);
@@ -975,9 +977,9 @@
LockWord new_lw = LockWord::Default();
if (lock_word.ThinLockCount() != 0) {
uint32_t new_count = lock_word.ThinLockCount() - 1;
- new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.ReadBarrierState());
+ new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.GCState());
} else {
- new_lw = LockWord::FromDefault(lock_word.ReadBarrierState());
+ new_lw = LockWord::FromDefault(lock_word.GCState());
}
if (!kUseReadBarrier) {
DCHECK_EQ(new_lw.ReadBarrierState(), 0U);
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 42e959c..5d32c09 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -99,8 +99,9 @@
// Note: These couldn't be constexpr pointers as reinterpret_cast isn't compatible with them.
static constexpr uintptr_t white_ptr_ = 0x0; // Not marked.
static constexpr uintptr_t gray_ptr_ = 0x1; // Marked, but not marked through. On mark stack.
+ // TODO: black_ptr_ is unused, we should remove it.
static constexpr uintptr_t black_ptr_ = 0x2; // Marked through. Used for non-moving objects.
- static constexpr uintptr_t rb_ptr_mask_ = 0x3; // The low 2 bits for white|gray|black.
+ static constexpr uintptr_t rb_ptr_mask_ = 0x1; // The low bits for white|gray.
};
} // namespace art
diff --git a/tools/cpp-define-generator/constant_lockword.def b/tools/cpp-define-generator/constant_lockword.def
index c1e6099..67ed5b5 100644
--- a/tools/cpp-define-generator/constant_lockword.def
+++ b/tools/cpp-define-generator/constant_lockword.def
@@ -30,5 +30,12 @@
DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK_TOGGLED, uint32_t, kReadBarrierStateMaskShiftedToggled)
DEFINE_LOCK_WORD_EXPR(THIN_LOCK_COUNT_ONE, int32_t, kThinLockCountOne)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED, uint32_t, kGCStateMaskShifted)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED_TOGGLED, uint32_t, kGCStateMaskShiftedToggled)
+DEFINE_LOCK_WORD_EXPR(GC_STATE_SHIFT, int32_t, kGCStateShift)
+
+DEFINE_LOCK_WORD_EXPR(MARK_BIT_SHIFT, int32_t, kMarkBitStateShift)
+DEFINE_LOCK_WORD_EXPR(MARK_BIT_MASK_SHIFTED, uint32_t, kMarkBitStateMaskShifted)
+
#undef DEFINE_LOCK_WORD_EXPR