Check for forwarding address in READ_BARRIER_MARK_REG
When the object is in the from-space, the mark bit is not set.
In this case, we can also check the lock word for being a forwarding
address. The forwarding address case happens around 25% of the time.
This CL adds the case for forwarding address lock words to
READ_BARRIER_MARK_REG.
Reduces total read barriers reaching runtime on ritzperf:
Slow paths: 20758783 -> 15457783
Deleted the mark bit check in MarkFromReadBarrier since most of the
callers check the bit now.
Perf:
ReadBarrier::Mark: 2.59% -> 2.12%
art_quick_read_barrier_mark_reg01: 0.79% -> 0.78%
art_quick_read_barrier_mark_reg00: 0.54% -> 0.50%
art_quick_read_barrier_mark_reg02: 0.31% -> 0.25%
Only X86_64 for now, will do other archs after.
Bug: 30162165
Test: test-art-host
Change-Id: Ie7289d684d0e37a887943d77710092e380457860
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 2856766..e3f39cd 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2301,8 +2301,15 @@
jz .Lslow_rb_\name
ret
.Lslow_rb_\name:
- // Save all potentially live caller-save core registers.
PUSH rax
+ movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
+ addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
+ // Jump if overflow, the only case where it overflows should be the forwarding address one.
+ // Taken ~25% of the time.
+ jnae .Lret_overflow\name
+
+ // Save all potentially live caller-save core registers.
+ movq 0(%rsp), %rax
PUSH rcx
PUSH rdx
PUSH rsi
@@ -2367,6 +2374,12 @@
POP_REG_NE rax, RAW_VAR(reg)
.Lret_rb_\name:
ret
+.Lret_overflow\name:
+ // The overflow cleared the top bits.
+ sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
+ movq %rax, REG_VAR(reg)
+ POP_REG_NE rax, RAW_VAR(reg)
+ ret
END_FUNCTION VAR(name)
END_MACRO
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 76f500c..b23897b 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -155,7 +155,7 @@
mirror::Object* ret;
// TODO: Delete GetMarkBit check when all of the callers properly check the bit. Remaining caller
// is array allocations.
- if (from_ref == nullptr || from_ref->GetMarkBit()) {
+ if (from_ref == nullptr) {
return from_ref;
}
// TODO: Consider removing this check when we are done investigating slow paths. b/30162165
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 6c189b0..f938c9f 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -98,6 +98,10 @@
DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled)))
#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_THIN_LOCK_COUNT_ONE), (static_cast<int32_t>(art::LockWord::kThinLockCountOne)))
+#define LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW 0x40000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), (static_cast<uint32_t>(art::LockWord::kStateForwardingAddressOverflow)))
+#define LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 0x3
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), (static_cast<uint32_t>(art::LockWord::kForwardingAddressShift)))
#define LOCK_WORD_GC_STATE_MASK_SHIFTED 0x30000000
DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShifted)))
#define LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED 0xcfffffff
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 538b6eb..e9d06b3 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -61,7 +61,7 @@
*/
class LockWord {
public:
- enum SizeShiftsAndMasks { // private marker to avoid generate-operator-out.py from processing.
+ enum SizeShiftsAndMasks : uint32_t { // private marker to avoid generate-operator-out.py from processing.
// Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
kStateSize = 2,
kReadBarrierStateSize = 1,
@@ -91,6 +91,8 @@
kStateFat = 1,
kStateHash = 2,
kStateForwardingAddress = 3,
+ kStateForwardingAddressShifted = kStateForwardingAddress << kStateShift,
+ kStateForwardingAddressOverflow = (1 + kStateMask - kStateForwardingAddress) << kStateShift,
// Read barrier bit.
kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
@@ -140,7 +142,7 @@
static LockWord FromForwardingAddress(size_t target) {
DCHECK_ALIGNED(target, (1 << kStateSize));
- return LockWord((target >> kForwardingAddressShift) | (kStateForwardingAddress << kStateShift));
+ return LockWord((target >> kForwardingAddressShift) | kStateForwardingAddressShifted);
}
static LockWord FromHashCode(uint32_t hash_code, uint32_t gc_state) {
@@ -256,6 +258,11 @@
LockWord();
explicit LockWord(uint32_t val) : value_(val) {
+ // Make sure adding the overflow causes an overflow.
+ constexpr uint64_t overflow = static_cast<uint64_t>(kStateForwardingAddressShifted) +
+ static_cast<uint64_t>(kStateForwardingAddressOverflow);
+ constexpr bool is_larger = overflow > static_cast<uint64_t>(0xFFFFFFFF);
+ static_assert(is_larger, "should have overflowed");
CheckReadBarrierState();
}
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index eb74fcf..f583167 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -771,7 +771,7 @@
return false;
}
// Can't deflate if our lock count is too high.
- if (monitor->lock_count_ > LockWord::kThinLockMaxCount) {
+ if (static_cast<uint32_t>(monitor->lock_count_) > LockWord::kThinLockMaxCount) {
return false;
}
// Deflate to a thin lock.
diff --git a/tools/cpp-define-generator/constant_lockword.def b/tools/cpp-define-generator/constant_lockword.def
index 67ed5b5..f9b6b19 100644
--- a/tools/cpp-define-generator/constant_lockword.def
+++ b/tools/cpp-define-generator/constant_lockword.def
@@ -30,6 +30,9 @@
DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK_TOGGLED, uint32_t, kReadBarrierStateMaskShiftedToggled)
DEFINE_LOCK_WORD_EXPR(THIN_LOCK_COUNT_ONE, int32_t, kThinLockCountOne)
+DEFINE_LOCK_WORD_EXPR(STATE_FORWARDING_ADDRESS_OVERFLOW, uint32_t, kStateForwardingAddressOverflow)
+DEFINE_LOCK_WORD_EXPR(STATE_FORWARDING_ADDRESS_SHIFT, uint32_t, kForwardingAddressShift)
+
DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED, uint32_t, kGCStateMaskShifted)
DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED_TOGGLED, uint32_t, kGCStateMaskShiftedToggled)
DEFINE_LOCK_WORD_EXPR(GC_STATE_SHIFT, int32_t, kGCStateShift)