summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/optimizing/code_generator_arm.cc4
-rw-r--r--compiler/optimizing/code_generator_arm64.cc4
-rw-r--r--compiler/optimizing/code_generator_x86.cc110
-rw-r--r--compiler/optimizing/code_generator_x86.h6
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc124
-rw-r--r--compiler/optimizing/code_generator_x86_64.h6
-rw-r--r--compiler/optimizing/intrinsics_x86.cc50
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc64
-rw-r--r--compiler/utils/x86/assembler_x86.cc17
-rw-r--r--compiler/utils/x86/assembler_x86.h3
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc36
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc19
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h3
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc42
-rw-r--r--test/537-checker-arraycopy/src/Main.java4
15 files changed, 212 insertions, 280 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 6d9c55cd75..404f044cef 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -6377,7 +6377,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path marking the GC root `root`.
+ // Slow path used to mark the GC root `root`.
SlowPathCode* slow_path =
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root);
codegen_->AddSlowPath(slow_path);
@@ -6518,7 +6518,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Object* ref = ref_addr->AsMirrorPtr()
__ MaybeUnpoisonHeapReference(ref_reg);
- // Slow path marking the object `ref` when it is gray.
+ // Slow path used to mark the object `ref` when it is gray.
SlowPathCode* slow_path =
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
AddSlowPath(slow_path);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index cc8985d0b0..122c174eae 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -5041,7 +5041,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path marking the GC root `root`.
+ // Slow path used to mark the GC root `root`.
SlowPathCodeARM64* slow_path =
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
codegen_->AddSlowPath(slow_path);
@@ -5239,7 +5239,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// Object* ref = ref_addr->AsMirrorPtr()
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
- // Slow path marking the object `ref` when it is gray.
+ // Slow path used to mark the object `ref` when it is gray.
SlowPathCodeARM64* slow_path =
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
AddSlowPath(slow_path);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f50eb5cb7e..7aca16f867 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -445,8 +445,8 @@ class ArraySetSlowPathX86 : public SlowPathCode {
// Slow path marking an object during a read barrier.
class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
public:
- ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison)
- : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) {
+ ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj)
+ : SlowPathCode(instruction), obj_(obj) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -470,10 +470,6 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- if (unpoison_) {
- // Object* ref = ref_addr->AsMirrorPtr()
- __ MaybeUnpoisonHeapReference(reg);
- }
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
@@ -503,7 +499,6 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
private:
const Location obj_;
- const bool unpoison_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
};
@@ -4636,6 +4631,10 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI
// load the temp into the XMM and then copy the XMM into the
// output, 32 bits at a time).
locations->AddTemp(Location::RequiresFpuRegister());
+ } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
}
@@ -4679,10 +4678,11 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
case Primitive::kPrimNot: {
// /* HeapReference<Object> */ out = *(base + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp_loc = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, base, offset, /* needs_null_check */ true);
+ instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -5093,6 +5093,11 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
Location::kOutputOverlap :
Location::kNoOutputOverlap);
}
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
@@ -5167,10 +5172,11 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
} else {
Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
@@ -6275,8 +6281,8 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
return kEmitCompilerReadBarrier &&
- !kUseBakerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
type_check_kind == TypeCheckKind::kArrayObjectCheck);
}
@@ -6337,7 +6343,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
// /* HeapReference<Class> */ out = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
@@ -6559,7 +6565,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
@@ -6595,7 +6601,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -6634,7 +6641,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -6666,7 +6674,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -6674,7 +6683,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
__ j(kEqual, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -6865,17 +6875,17 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction*
Location maybe_temp) {
Register out_reg = out.AsRegister<Register>();
if (kEmitCompilerReadBarrier) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, out_reg, offset, /* needs_null_check */ false);
+ instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
// in the following move operation, as we will need it for the
// read barrier below.
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
__ movl(maybe_temp.AsRegister<Register>(), out_reg);
// /* HeapReference<Object> */ out = *(out + offset)
__ movl(out_reg, Address(out_reg, offset));
@@ -6892,15 +6902,17 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction*
void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset) {
+ uint32_t offset,
+ Location maybe_temp) {
Register out_reg = out.AsRegister<Register>();
Register obj_reg = obj.AsRegister<Register>();
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, obj_reg, offset, /* needs_null_check */ false);
+ instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -6943,9 +6955,9 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path marking the GC root `root`.
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
- instruction, root, /* unpoison */ false);
+ // Slow path used to mark the GC root `root`.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root);
codegen_->AddSlowPath(slow_path);
__ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()),
@@ -6979,13 +6991,14 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
Location ref,
Register obj,
uint32_t offset,
+ Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
// /* HeapReference<Object> */ ref = *(obj + offset)
Address src(obj, offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
}
void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6993,6 +7006,7 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
Register obj,
uint32_t data_offset,
Location index,
+ Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -7005,13 +7019,14 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
Address src = index.IsConstant() ?
Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
}
void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
const Address& src,
+ Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -7041,23 +7056,17 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// performance reasons.
Register ref_reg = ref.AsRegister<Register>();
+ Register temp_reg = temp.AsRegister<Register>();
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
- static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
- constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
- constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
- constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
-
- // if (rb_state == ReadBarrier::gray_ptr_)
- // ref = ReadBarrier::Mark(ref);
- // At this point, just do the "if" and make sure that flags are preserved until the branch.
- __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
+ // /* int32_t */ monitor = obj->monitor_
+ __ movl(temp_reg, Address(obj, monitor_offset));
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86 memory model.
@@ -7065,20 +7074,25 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// The actual reference load.
// /* HeapReference<Object> */ ref = *src
- __ movl(ref_reg, src); // Flags are unaffected.
-
- // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
- // Slow path marking the object `ref` when it is gray.
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
- instruction, ref, /* unpoison */ true);
- AddSlowPath(slow_path);
-
- // We have done the "if" of the gray bit check above, now branch based on the flags.
- __ j(kNotZero, slow_path->GetEntryLabel());
+ __ movl(ref_reg, src);
// Object* ref = ref_addr->AsMirrorPtr()
__ MaybeUnpoisonHeapReference(ref_reg);
+ // Slow path used to mark the object `ref` when it is gray.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref);
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index c644e401ff..894f2e8f40 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -254,7 +254,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset);
+ uint32_t offset,
+ Location maybe_temp);
// Generate a GC root reference load:
//
// root <- *address
@@ -486,6 +487,7 @@ class CodeGeneratorX86 : public CodeGenerator {
Location ref,
Register obj,
uint32_t offset,
+ Location temp,
bool needs_null_check);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
@@ -494,6 +496,7 @@ class CodeGeneratorX86 : public CodeGenerator {
Register obj,
uint32_t data_offset,
Location index,
+ Location temp,
bool needs_null_check);
// Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
@@ -501,6 +504,7 @@ class CodeGeneratorX86 : public CodeGenerator {
Location ref,
Register obj,
const Address& src,
+ Location temp,
bool needs_null_check);
// Generate a read barrier for a heap reference within `instruction`
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ec37e5db22..0c55ae44de 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -466,8 +466,8 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
// Slow path marking an object during a read barrier.
class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
public:
- ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison)
- : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) {
+ ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj)
+ : SlowPathCode(instruction), obj_(obj) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -491,10 +491,6 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- if (unpoison_) {
- // Object* ref = ref_addr->AsMirrorPtr()
- __ MaybeUnpoisonHeapReference(obj_.AsRegister<CpuRegister>());
- }
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
@@ -524,7 +520,6 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
private:
const Location obj_;
- const bool unpoison_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
};
@@ -4157,6 +4152,11 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
Location::RequiresRegister(),
object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
@@ -4200,10 +4200,11 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
case Primitive::kPrimNot: {
// /* HeapReference<Object> */ out = *(base + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp_loc = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, base, offset, /* needs_null_check */ true);
+ instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -4587,6 +4588,11 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
@@ -4661,10 +4667,11 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
} else {
CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
@@ -5680,8 +5687,8 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
return kEmitCompilerReadBarrier &&
- !kUseBakerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
type_check_kind == TypeCheckKind::kArrayObjectCheck);
}
@@ -5742,7 +5749,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
// /* HeapReference<Class> */ out = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
@@ -5972,7 +5979,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
if (cls.IsRegister()) {
__ cmpl(temp, cls.AsRegister<CpuRegister>());
@@ -5996,7 +6004,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
@@ -6016,7 +6025,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -6040,7 +6050,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
// Walk over the class hierarchy to find a match.
NearLabel loop;
@@ -6066,7 +6077,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&done);
break;
@@ -6085,7 +6097,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
// Do an exact check.
NearLabel check_non_primitive_component_type;
@@ -6113,7 +6126,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -6121,7 +6135,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
__ j(kEqual, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&done);
break;
@@ -6137,7 +6152,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
// We always go into the type check slow path for the unresolved
// and interface check cases.
@@ -6305,17 +6321,17 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi
Location maybe_temp) {
CpuRegister out_reg = out.AsRegister<CpuRegister>();
if (kEmitCompilerReadBarrier) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, out_reg, offset, /* needs_null_check */ false);
+ instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
// in the following move operation, as we will need it for the
// read barrier below.
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
__ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
// /* HeapReference<Object> */ out = *(out + offset)
__ movl(out_reg, Address(out_reg, offset));
@@ -6332,15 +6348,17 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi
void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset) {
+ uint32_t offset,
+ Location maybe_temp) {
CpuRegister out_reg = out.AsRegister<CpuRegister>();
CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, obj_reg, offset, /* needs_null_check */ false);
+ instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -6383,9 +6401,9 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path marking the GC root `root`.
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
- instruction, root, /* unpoison */ false);
+ // Slow path used to mark the GC root `root`.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root);
codegen_->AddSlowPath(slow_path);
__ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(),
@@ -6420,13 +6438,14 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in
Location ref,
CpuRegister obj,
uint32_t offset,
+ Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
// /* HeapReference<Object> */ ref = *(obj + offset)
Address src(obj, offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
}
void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6434,6 +6453,7 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
CpuRegister obj,
uint32_t data_offset,
Location index,
+ Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -6446,13 +6466,14 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
Address src = index.IsConstant() ?
Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
}
void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
CpuRegister obj,
const Address& src,
+ Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -6482,23 +6503,17 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// performance reasons.
CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
+ CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
- static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
- constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
- constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
- constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
-
- // if (rb_state == ReadBarrier::gray_ptr_)
- // ref = ReadBarrier::Mark(ref);
- // At this point, just do the "if" and make sure that flags are preserved until the branch.
- __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
+ // /* int32_t */ monitor = obj->monitor_
+ __ movl(temp_reg, Address(obj, monitor_offset));
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86-64 memory model.
@@ -6506,20 +6521,25 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// The actual reference load.
// /* HeapReference<Object> */ ref = *src
- __ movl(ref_reg, src); // Flags are unaffected.
-
- // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
- // Slow path marking the object `ref` when it is gray.
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
- instruction, ref, /* unpoison */ true);
- AddSlowPath(slow_path);
-
- // We have done the "if" of the gray bit check above, now branch based on the flags.
- __ j(kNotZero, slow_path->GetEntryLabel());
+ __ movl(ref_reg, src);
// Object* ref = ref_addr->AsMirrorPtr()
__ MaybeUnpoisonHeapReference(ref_reg);
+ // Slow path used to mark the object `ref` when it is gray.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref);
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 44844ac67a..4e0e34ce38 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -248,7 +248,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset);
+ uint32_t offset,
+ Location maybe_temp);
// Generate a GC root reference load:
//
// root <- *address
@@ -426,6 +427,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Location ref,
CpuRegister obj,
uint32_t offset,
+ Location temp,
bool needs_null_check);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
@@ -434,6 +436,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
CpuRegister obj,
uint32_t data_offset,
Location index,
+ Location temp,
bool needs_null_check);
// Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
@@ -441,6 +444,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Location ref,
CpuRegister obj,
const Address& src,
+ Location temp,
bool needs_null_check);
// Generate a read barrier for a heap reference within `instruction`
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index cf4a040551..49d6c1952c 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1934,9 +1934,10 @@ static void GenUnsafeGet(HInvoke* invoke,
Register output = output_loc.AsRegister<Register>();
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
- invoke, output_loc, base, src, /* needs_null_check */ false);
+ invoke, output_loc, base, src, temp, /* needs_null_check */ false);
} else {
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
codegen->GenerateReadBarrierSlow(
@@ -1999,6 +2000,11 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
locations->SetOut(Location::RequiresRegister(),
can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
@@ -2927,11 +2933,11 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
// Bail out if the source is not a non primitive array.
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
__ testl(temp1, temp1);
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp1` has been unpoisoned
@@ -2964,7 +2970,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// /* HeapReference<Class> */ temp1 = dest->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
@@ -2976,7 +2982,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// temporaries such a `temp1`.
// /* HeapReference<Class> */ temp2 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
__ testl(temp2, temp2);
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp2` has been unpoisoned
@@ -2989,7 +2995,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
// /* HeapReference<Class> */ temp2 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
// Note: if heap poisoning is on, we are comparing two unpoisoned references here.
__ cmpl(temp1, temp2);
@@ -2998,7 +3004,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kEqual, &do_copy);
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
// We do not need to emit a read barrier for the following
// heap reference load, as `temp1` is only used in a
// comparison with null below, and this reference is not
@@ -3052,10 +3058,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
__ testl(temp1, temp1);
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp1` has been unpoisoned
@@ -3133,18 +3139,11 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ cmpl(temp1, temp3);
__ j(kEqual, &done);
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
- static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
- constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
- constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
- constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
-
- // if (rb_state == ReadBarrier::gray_ptr_)
- // goto slow_path;
- // At this point, just do the "if" and make sure that flags are preserved until the branch.
- __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
+ // /* int32_t */ monitor = src->monitor_
+ __ movl(temp2, Address(src, monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86 memory model.
@@ -3155,8 +3154,13 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
codegen_->AddSlowPath(read_barrier_slow_path);
- // We have done the "if" of the gray bit check above, now branch based on the flags.
- __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp2, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel());
// Fast-path copy.
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index a4ee546237..311e1cd6eb 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1241,7 +1241,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = dest->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, dest, class_offset, temp3_loc, /* needs_null_check */ false);
// Register `temp1` is not trashed by the read barrier emitted
// by GenerateFieldLoadWithBakerReadBarrier below, as that
// method produces a call to a ReadBarrierMarkRegX entry point,
@@ -1249,7 +1249,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// temporaries such a `temp1`.
// /* HeapReference<Class> */ temp2 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
// If heap poisoning is enabled, `temp1` and `temp2` have been
// unpoisoned by the the previous calls to
// GenerateFieldLoadWithBakerReadBarrier.
@@ -1273,7 +1273,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ TMP = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
__ testl(CpuRegister(TMP), CpuRegister(TMP));
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `TMP` has been unpoisoned by
@@ -1296,7 +1296,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
// /* HeapReference<Class> */ TMP = temp2->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false);
+ invoke, TMP_loc, temp2, component_offset, temp3_loc, /* needs_null_check */ false);
__ testl(CpuRegister(TMP), CpuRegister(TMP));
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `TMP` has been unpoisoned by
@@ -1320,7 +1320,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
// We do not need to emit a read barrier for the following
// heap reference load, as `temp1` is only used in a
// comparison with null below, and this reference is not
@@ -1348,10 +1348,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
// /* HeapReference<Class> */ TMP = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
__ testl(CpuRegister(TMP), CpuRegister(TMP));
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
} else {
@@ -1421,18 +1421,11 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ cmpl(temp1, temp3);
__ j(kEqual, &done);
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
- static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
- constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
- constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
- constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
-
- // if (rb_state == ReadBarrier::gray_ptr_)
- // goto slow_path;
- // At this point, just do the "if" and make sure that flags are preserved until the branch.
- __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
+ // /* int32_t */ monitor = src->monitor_
+ __ movl(CpuRegister(TMP), Address(src, monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86-64 memory model.
@@ -1443,8 +1436,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
codegen_->AddSlowPath(read_barrier_slow_path);
- // We have done the "if" of the gray bit check above, now branch based on the flags.
- __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(CpuRegister(TMP), Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel());
// Fast-path copy.
// Iterate over the arrays and do a raw copy of the objects. We don't need to
@@ -2089,9 +2087,10 @@ static void GenUnsafeGet(HInvoke* invoke,
case Primitive::kPrimNot: {
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
- invoke, output_loc, base, src, /* needs_null_check */ false);
+ invoke, output_loc, base, src, temp, /* needs_null_check */ false);
} else {
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
codegen->GenerateReadBarrierSlow(
@@ -2114,7 +2113,9 @@ static void GenUnsafeGet(HInvoke* invoke,
}
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ Primitive::Type type) {
bool can_call = kEmitCompilerReadBarrier &&
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -2128,25 +2129,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(),
can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index f2ef41f400..f1a991574b 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1148,23 +1148,6 @@ void X86Assembler::testl(Register reg, const Immediate& immediate) {
}
-void X86Assembler::testb(const Address& dst, const Immediate& imm) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0xF6);
- EmitOperand(EAX, dst);
- CHECK(imm.is_int8());
- EmitUint8(imm.value() & 0xFF);
-}
-
-
-void X86Assembler::testl(const Address& dst, const Immediate& imm) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0xF7);
- EmitOperand(0, dst);
- EmitImmediate(imm);
-}
-
-
void X86Assembler::andl(Register dst, Register src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x23);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 2ddcd760dd..63aa4a4b8f 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -496,9 +496,6 @@ class X86Assembler FINAL : public Assembler {
void testl(Register reg, const Immediate& imm);
void testl(Register reg1, const Address& address);
- void testb(const Address& dst, const Immediate& imm);
- void testl(const Address& dst, const Immediate& imm);
-
void andl(Register dst, const Immediate& imm);
void andl(Register dst, Register src);
void andl(Register dst, const Address& address);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 61d70d714a..307e034b76 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -375,42 +375,6 @@ TEST_F(AssemblerX86Test, CmovlAddress) {
DriverStr(expected, "cmovl_address");
}
-TEST_F(AssemblerX86Test, TestbAddressImmediate) {
- GetAssembler()->testb(
- x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12),
- x86::Immediate(1));
- GetAssembler()->testb(
- x86::Address(x86::Register(x86::ESP), FrameOffset(7)),
- x86::Immediate(-128));
- GetAssembler()->testb(
- x86::Address(x86::Register(x86::EBX), MemberOffset(130)),
- x86::Immediate(127));
- const char* expected =
- "testb $1, 0xc(%EDI,%EBX,4)\n"
- "testb $-128, 0x7(%ESP)\n"
- "testb $127, 0x82(%EBX)\n";
-
- DriverStr(expected, "TestbAddressImmediate");
-}
-
-TEST_F(AssemblerX86Test, TestlAddressImmediate) {
- GetAssembler()->testl(
- x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12),
- x86::Immediate(1));
- GetAssembler()->testl(
- x86::Address(x86::Register(x86::ESP), FrameOffset(7)),
- x86::Immediate(-100000));
- GetAssembler()->testl(
- x86::Address(x86::Register(x86::EBX), MemberOffset(130)),
- x86::Immediate(77777777));
- const char* expected =
- "testl $1, 0xc(%EDI,%EBX,4)\n"
- "testl $-100000, 0x7(%ESP)\n"
- "testl $77777777, 0x82(%EBX)\n";
-
- DriverStr(expected, "TestlAddressImmediate");
-}
-
/////////////////
// Near labels //
/////////////////
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 1f73aa7374..ddc824425e 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1389,25 +1389,6 @@ void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
}
-void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitOptionalRex32(dst);
- EmitUint8(0xF6);
- EmitOperand(Register::RAX, dst);
- CHECK(imm.is_int8());
- EmitUint8(imm.value() & 0xFF);
-}
-
-
-void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitOptionalRex32(dst);
- EmitUint8(0xF7);
- EmitOperand(0, dst);
- EmitImmediate(imm);
-}
-
-
void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 3a4bfca6b0..a4166f965d 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -528,9 +528,6 @@ class X86_64Assembler FINAL : public Assembler {
void testq(CpuRegister reg1, CpuRegister reg2);
void testq(CpuRegister reg, const Address& address);
- void testb(const Address& address, const Immediate& imm);
- void testl(const Address& address, const Immediate& imm);
-
void andl(CpuRegister dst, const Immediate& imm);
void andl(CpuRegister dst, CpuRegister src);
void andl(CpuRegister reg, const Address& address);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 48a18760f1..36c966b3cf 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1526,48 +1526,6 @@ TEST_F(AssemblerX86_64Test, Cmpb) {
DriverStr(expected, "cmpb");
}
-TEST_F(AssemblerX86_64Test, TestbAddressImmediate) {
- GetAssembler()->testb(
- x86_64::Address(x86_64::CpuRegister(x86_64::RDI),
- x86_64::CpuRegister(x86_64::RBX),
- x86_64::TIMES_4,
- 12),
- x86_64::Immediate(1));
- GetAssembler()->testb(
- x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)),
- x86_64::Immediate(-128));
- GetAssembler()->testb(
- x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)),
- x86_64::Immediate(127));
- const char* expected =
- "testb $1, 0xc(%RDI,%RBX,4)\n"
- "testb $-128, 0x7(%RSP)\n"
- "testb $127, 0x82(%RBX)\n";
-
- DriverStr(expected, "TestbAddressImmediate");
-}
-
-TEST_F(AssemblerX86_64Test, TestlAddressImmediate) {
- GetAssembler()->testl(
- x86_64::Address(x86_64::CpuRegister(x86_64::RDI),
- x86_64::CpuRegister(x86_64::RBX),
- x86_64::TIMES_4,
- 12),
- x86_64::Immediate(1));
- GetAssembler()->testl(
- x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)),
- x86_64::Immediate(-100000));
- GetAssembler()->testl(
- x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)),
- x86_64::Immediate(77777777));
- const char* expected =
- "testl $1, 0xc(%RDI,%RBX,4)\n"
- "testl $-100000, 0x7(%RSP)\n"
- "testl $77777777, 0x82(%RBX)\n";
-
- DriverStr(expected, "TestlAddressImmediate");
-}
-
class JNIMacroAssemblerX86_64Test : public JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler> {
public:
using Base = JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler>;
diff --git a/test/537-checker-arraycopy/src/Main.java b/test/537-checker-arraycopy/src/Main.java
index 95a11ca010..7c124caa8e 100644
--- a/test/537-checker-arraycopy/src/Main.java
+++ b/test/537-checker-arraycopy/src/Main.java
@@ -51,10 +51,10 @@ public class Main {
/// CHECK-START-X86_64: void Main.arraycopy() disassembly (after)
/// CHECK: InvokeStaticOrDirect intrinsic:SystemArrayCopy
- /// CHECK-NOT: test {{^[^\[].*}}, {{^[^\[].*}}
+ /// CHECK-NOT: test
/// CHECK-NOT: call
/// CHECK: ReturnVoid
- // Checks that the call is intrinsified and that there is no register test instruction
+ // Checks that the call is intrinsified and that there is no test instruction
// when we know the source and destination are not null.
public static void arraycopy() {
Object[] obj = new Object[4];