diff options
author | 2016-08-23 17:48:38 +0000 | |
---|---|---|
committer | 2016-08-23 17:48:38 +0000 | |
commit | ccf15bca330f9a23337b1a4b5850f7fcc6c1bf15 (patch) | |
tree | 8e271269eb0f3e40388311478fe441bfeb47ab47 | |
parent | ccf06d8f19a37432de4a3b768747090adfbd18ec (diff) |
Revert "x86/x86-64: Avoid temporary for read barrier field load."
Fault handler does not recognize the instruction
F6 /0 ib TEST r/m8, imm8
so we get crashes instead of NPEs.
Bug: 29966877
Bug: 12687968
This reverts commit ccf06d8f19a37432de4a3b768747090adfbd18ec.
Change-Id: Ib7db3b59f44c0d3ed5e24a20b6c6ee596a89d709
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 110 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 6 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 124 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 6 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 50 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 64 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 17 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.h | 3 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86_test.cc | 36 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 19 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 3 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64_test.cc | 42 | ||||
-rw-r--r-- | test/537-checker-arraycopy/src/Main.java | 4 |
15 files changed, 212 insertions, 280 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 6d9c55cd75..404f044cef 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -6377,7 +6377,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path marking the GC root `root`. + // Slow path used to mark the GC root `root`. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root); codegen_->AddSlowPath(slow_path); @@ -6518,7 +6518,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path marking the object `ref` when it is gray. + // Slow path used to mark the object `ref` when it is gray. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); AddSlowPath(slow_path); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index cc8985d0b0..122c174eae 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -5041,7 +5041,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path marking the GC root `root`. + // Slow path used to mark the GC root `root`. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root); codegen_->AddSlowPath(slow_path); @@ -5239,7 +5239,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Object* ref = ref_addr->AsMirrorPtr() GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - // Slow path marking the object `ref` when it is gray. + // Slow path used to mark the object `ref` when it is gray. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); AddSlowPath(slow_path); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index f50eb5cb7e..7aca16f867 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -445,8 +445,8 @@ class ArraySetSlowPathX86 : public SlowPathCode { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathX86 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison) - : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { + ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj) + : SlowPathCode(instruction), obj_(obj) { DCHECK(kEmitCompilerReadBarrier); } @@ -470,10 +470,6 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - if (unpoison_) { - // Object* ref = ref_addr->AsMirrorPtr() - __ MaybeUnpoisonHeapReference(reg); - } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. @@ -503,7 +499,6 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { private: const Location obj_; - const bool unpoison_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); }; @@ -4636,6 +4631,10 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI // load the temp into the XMM and then copy the XMM into the // output, 32 bits at a time). locations->AddTemp(Location::RequiresFpuRegister()); + } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); } } @@ -4679,10 +4678,11 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimNot: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, /* needs_null_check */ true); + instruction, out, base, offset, temp_loc, /* needs_null_check */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -5093,6 +5093,11 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { Location::kOutputOverlap : Location::kNoOutputOverlap); } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { @@ -5167,10 +5172,11 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); } else { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { @@ -6275,8 +6281,8 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && - !kUseBakerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || type_check_kind == TypeCheckKind::kArrayObjectCheck); } @@ -6337,7 +6343,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -6559,7 +6565,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -6595,7 +6601,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6634,7 +6641,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6666,7 +6674,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6674,7 +6683,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6865,17 +6875,17 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* Location maybe_temp) { Register out_reg = out.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, /* needs_null_check */ false); + instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it // in the following move operation, as we will need it for the // read barrier below. - DCHECK(maybe_temp.IsRegister()) << maybe_temp; __ movl(maybe_temp.AsRegister<Register>(), out_reg); // /* HeapReference<Object> */ out = *(out + offset) __ movl(out_reg, Address(out_reg, offset)); @@ -6892,15 +6902,17 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset) { + uint32_t offset, + Location maybe_temp) { Register out_reg = out.AsRegister<Register>(); Register obj_reg = obj.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, /* needs_null_check */ false); + instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6943,9 +6955,9 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path marking the GC root `root`. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( - instruction, root, /* unpoison */ false); + // Slow path used to mark the GC root `root`. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root); codegen_->AddSlowPath(slow_path); __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()), @@ -6979,13 +6991,14 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr Location ref, Register obj, uint32_t offset, + Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) Address src(obj, offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); } void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6993,6 +7006,7 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Register obj, uint32_t data_offset, Location index, + Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -7005,13 +7019,14 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Address src = index.IsConstant() ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); } void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, const Address& src, + Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -7041,23 +7056,17 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // performance reasons. Register ref_reg = ref.AsRegister<Register>(); + Register temp_reg = temp.AsRegister<Register>(); uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; - constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; - constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - - // if (rb_state == ReadBarrier::gray_ptr_) - // ref = ReadBarrier::Mark(ref); - // At this point, just do the "if" and make sure that flags are preserved until the branch. - __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); + // /* int32_t */ monitor = obj->monitor_ + __ movl(temp_reg, Address(obj, monitor_offset)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86 memory model. @@ -7065,20 +7074,25 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The actual reference load. // /* HeapReference<Object> */ ref = *src - __ movl(ref_reg, src); // Flags are unaffected. - - // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. - // Slow path marking the object `ref` when it is gray. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( - instruction, ref, /* unpoison */ true); - AddSlowPath(slow_path); - - // We have done the "if" of the gray bit check above, now branch based on the flags. - __ j(kNotZero, slow_path->GetEntryLabel()); + __ movl(ref_reg, src); // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); + // Slow path used to mark the object `ref` when it is gray. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index c644e401ff..894f2e8f40 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -254,7 +254,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset); + uint32_t offset, + Location maybe_temp); // Generate a GC root reference load: // // root <- *address @@ -486,6 +487,7 @@ class CodeGeneratorX86 : public CodeGenerator { Location ref, Register obj, uint32_t offset, + Location temp, bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. @@ -494,6 +496,7 @@ class CodeGeneratorX86 : public CodeGenerator { Register obj, uint32_t data_offset, Location index, + Location temp, bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. @@ -501,6 +504,7 @@ class CodeGeneratorX86 : public CodeGenerator { Location ref, Register obj, const Address& src, + Location temp, bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ec37e5db22..0c55ae44de 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -466,8 +466,8 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison) - : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { + ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj) + : SlowPathCode(instruction), obj_(obj) { DCHECK(kEmitCompilerReadBarrier); } @@ -491,10 +491,6 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - if (unpoison_) { - // Object* ref = ref_addr->AsMirrorPtr() - __ MaybeUnpoisonHeapReference(obj_.AsRegister<CpuRegister>()); - } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. @@ -524,7 +520,6 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { private: const Location obj_; - const bool unpoison_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); }; @@ -4157,6 +4152,11 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { Location::RequiresRegister(), object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, @@ -4200,10 +4200,11 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimNot: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, /* needs_null_check */ true); + instruction, out, base, offset, temp_loc, /* needs_null_check */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -4587,6 +4588,11 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { @@ -4661,10 +4667,11 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); } else { CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { @@ -5680,8 +5687,8 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && - !kUseBakerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || type_check_kind == TypeCheckKind::kArrayObjectCheck); } @@ -5742,7 +5749,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -5972,7 +5979,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); @@ -5996,7 +6004,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. @@ -6016,7 +6025,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6040,7 +6050,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); // Walk over the class hierarchy to find a match. NearLabel loop; @@ -6066,7 +6077,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6085,7 +6097,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); // Do an exact check. NearLabel check_non_primitive_component_type; @@ -6113,7 +6126,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6121,7 +6135,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6137,7 +6152,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); // We always go into the type check slow path for the unresolved // and interface check cases. @@ -6305,17 +6321,17 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi Location maybe_temp) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); if (kEmitCompilerReadBarrier) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, /* needs_null_check */ false); + instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it // in the following move operation, as we will need it for the // read barrier below. - DCHECK(maybe_temp.IsRegister()) << maybe_temp; __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg); // /* HeapReference<Object> */ out = *(out + offset) __ movl(out_reg, Address(out_reg, offset)); @@ -6332,15 +6348,17 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset) { + uint32_t offset, + Location maybe_temp) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, /* needs_null_check */ false); + instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6383,9 +6401,9 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path marking the GC root `root`. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( - instruction, root, /* unpoison */ false); + // Slow path used to mark the GC root `root`. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root); codegen_->AddSlowPath(slow_path); __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(), @@ -6420,13 +6438,14 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in Location ref, CpuRegister obj, uint32_t offset, + Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) Address src(obj, offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); } void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6434,6 +6453,7 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in CpuRegister obj, uint32_t data_offset, Location index, + Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6446,13 +6466,14 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in Address src = index.IsConstant() ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); } void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, CpuRegister obj, const Address& src, + Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6482,23 +6503,17 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // performance reasons. CpuRegister ref_reg = ref.AsRegister<CpuRegister>(); + CpuRegister temp_reg = temp.AsRegister<CpuRegister>(); uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; - constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; - constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - - // if (rb_state == ReadBarrier::gray_ptr_) - // ref = ReadBarrier::Mark(ref); - // At this point, just do the "if" and make sure that flags are preserved until the branch. - __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); + // /* int32_t */ monitor = obj->monitor_ + __ movl(temp_reg, Address(obj, monitor_offset)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86-64 memory model. @@ -6506,20 +6521,25 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // The actual reference load. // /* HeapReference<Object> */ ref = *src - __ movl(ref_reg, src); // Flags are unaffected. - - // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. - // Slow path marking the object `ref` when it is gray. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( - instruction, ref, /* unpoison */ true); - AddSlowPath(slow_path); - - // We have done the "if" of the gray bit check above, now branch based on the flags. - __ j(kNotZero, slow_path->GetEntryLabel()); + __ movl(ref_reg, src); // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); + // Slow path used to mark the object `ref` when it is gray. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 44844ac67a..4e0e34ce38 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -248,7 +248,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset); + uint32_t offset, + Location maybe_temp); // Generate a GC root reference load: // // root <- *address @@ -426,6 +427,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location ref, CpuRegister obj, uint32_t offset, + Location temp, bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. @@ -434,6 +436,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { CpuRegister obj, uint32_t data_offset, Location index, + Location temp, bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. @@ -441,6 +444,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location ref, CpuRegister obj, const Address& src, + Location temp, bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index cf4a040551..49d6c1952c 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1934,9 +1934,10 @@ static void GenUnsafeGet(HInvoke* invoke, Register output = output_loc.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, /* needs_null_check */ false); + invoke, output_loc, base, src, temp, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -1999,6 +2000,11 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetOut(Location::RequiresRegister(), can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); } + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { @@ -2927,11 +2933,11 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); // Bail out if the source is not a non primitive array. // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); __ testl(temp1, temp1); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned @@ -2964,7 +2970,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // /* HeapReference<Class> */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false); if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. @@ -2976,7 +2982,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // temporaries such a `temp1`. // /* HeapReference<Class> */ temp2 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); __ testl(temp2, temp2); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned @@ -2989,7 +2995,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference<Class> */ temp2 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); // Note: if heap poisoning is on, we are comparing two unpoisoned references here. __ cmpl(temp1, temp2); @@ -2998,7 +3004,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &do_copy); // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a // comparison with null below, and this reference is not @@ -3052,10 +3058,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); __ testl(temp1, temp1); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned @@ -3133,18 +3139,11 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ cmpl(temp1, temp3); __ j(kEqual, &done); - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; - constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; - constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - - // if (rb_state == ReadBarrier::gray_ptr_) - // goto slow_path; - // At this point, just do the "if" and make sure that flags are preserved until the branch. - __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); + // /* int32_t */ monitor = src->monitor_ + __ movl(temp2, Address(src, monitor_offset)); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86 memory model. @@ -3155,8 +3154,13 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke); codegen_->AddSlowPath(read_barrier_slow_path); - // We have done the "if" of the gray bit check above, now branch based on the flags. - __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(temp2, Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel()); // Fast-path copy. diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index a4ee546237..311e1cd6eb 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1241,7 +1241,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, dest, class_offset, temp3_loc, /* needs_null_check */ false); // Register `temp1` is not trashed by the read barrier emitted // by GenerateFieldLoadWithBakerReadBarrier below, as that // method produces a call to a ReadBarrierMarkRegX entry point, @@ -1249,7 +1249,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // temporaries such a `temp1`. // /* HeapReference<Class> */ temp2 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); // If heap poisoning is enabled, `temp1` and `temp2` have been // unpoisoned by the the previous calls to // GenerateFieldLoadWithBakerReadBarrier. @@ -1273,7 +1273,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ TMP = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `TMP` has been unpoisoned by @@ -1296,7 +1296,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference<Class> */ TMP = temp2->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false); + invoke, TMP_loc, temp2, component_offset, temp3_loc, /* needs_null_check */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `TMP` has been unpoisoned by @@ -1320,7 +1320,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a // comparison with null below, and this reference is not @@ -1348,10 +1348,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); // /* HeapReference<Class> */ TMP = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } else { @@ -1421,18 +1421,11 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ cmpl(temp1, temp3); __ j(kEqual, &done); - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; - constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; - constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - - // if (rb_state == ReadBarrier::gray_ptr_) - // goto slow_path; - // At this point, just do the "if" and make sure that flags are preserved until the branch. - __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); + // /* int32_t */ monitor = src->monitor_ + __ movl(CpuRegister(TMP), Address(src, monitor_offset)); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86-64 memory model. @@ -1443,8 +1436,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke); codegen_->AddSlowPath(read_barrier_slow_path); - // We have done the "if" of the gray bit check above, now branch based on the flags. - __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(CpuRegister(TMP), Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel()); // Fast-path copy. // Iterate over the arrays and do a raw copy of the objects. We don't need to @@ -2089,9 +2087,10 @@ static void GenUnsafeGet(HInvoke* invoke, case Primitive::kPrimNot: { if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, /* needs_null_check */ false); + invoke, output_loc, base, src, temp, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -2114,7 +2113,9 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); @@ -2128,25 +2129,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index f2ef41f400..f1a991574b 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1148,23 +1148,6 @@ void X86Assembler::testl(Register reg, const Immediate& immediate) { } -void X86Assembler::testb(const Address& dst, const Immediate& imm) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0xF6); - EmitOperand(EAX, dst); - CHECK(imm.is_int8()); - EmitUint8(imm.value() & 0xFF); -} - - -void X86Assembler::testl(const Address& dst, const Immediate& imm) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0xF7); - EmitOperand(0, dst); - EmitImmediate(imm); -} - - void X86Assembler::andl(Register dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x23); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 2ddcd760dd..63aa4a4b8f 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -496,9 +496,6 @@ class X86Assembler FINAL : public Assembler { void testl(Register reg, const Immediate& imm); void testl(Register reg1, const Address& address); - void testb(const Address& dst, const Immediate& imm); - void testl(const Address& dst, const Immediate& imm); - void andl(Register dst, const Immediate& imm); void andl(Register dst, Register src); void andl(Register dst, const Address& address); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 61d70d714a..307e034b76 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -375,42 +375,6 @@ TEST_F(AssemblerX86Test, CmovlAddress) { DriverStr(expected, "cmovl_address"); } -TEST_F(AssemblerX86Test, TestbAddressImmediate) { - GetAssembler()->testb( - x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12), - x86::Immediate(1)); - GetAssembler()->testb( - x86::Address(x86::Register(x86::ESP), FrameOffset(7)), - x86::Immediate(-128)); - GetAssembler()->testb( - x86::Address(x86::Register(x86::EBX), MemberOffset(130)), - x86::Immediate(127)); - const char* expected = - "testb $1, 0xc(%EDI,%EBX,4)\n" - "testb $-128, 0x7(%ESP)\n" - "testb $127, 0x82(%EBX)\n"; - - DriverStr(expected, "TestbAddressImmediate"); -} - -TEST_F(AssemblerX86Test, TestlAddressImmediate) { - GetAssembler()->testl( - x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12), - x86::Immediate(1)); - GetAssembler()->testl( - x86::Address(x86::Register(x86::ESP), FrameOffset(7)), - x86::Immediate(-100000)); - GetAssembler()->testl( - x86::Address(x86::Register(x86::EBX), MemberOffset(130)), - x86::Immediate(77777777)); - const char* expected = - "testl $1, 0xc(%EDI,%EBX,4)\n" - "testl $-100000, 0x7(%ESP)\n" - "testl $77777777, 0x82(%EBX)\n"; - - DriverStr(expected, "TestlAddressImmediate"); -} - ///////////////// // Near labels // ///////////////// diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 1f73aa7374..ddc824425e 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1389,25 +1389,6 @@ void X86_64Assembler::testq(CpuRegister reg, const Address& address) { } -void X86_64Assembler::testb(const Address& dst, const Immediate& imm) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitOptionalRex32(dst); - EmitUint8(0xF6); - EmitOperand(Register::RAX, dst); - CHECK(imm.is_int8()); - EmitUint8(imm.value() & 0xFF); -} - - -void X86_64Assembler::testl(const Address& dst, const Immediate& imm) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitOptionalRex32(dst); - EmitUint8(0xF7); - EmitOperand(0, dst); - EmitImmediate(imm); -} - - void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(dst, src); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 3a4bfca6b0..a4166f965d 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -528,9 +528,6 @@ class X86_64Assembler FINAL : public Assembler { void testq(CpuRegister reg1, CpuRegister reg2); void testq(CpuRegister reg, const Address& address); - void testb(const Address& address, const Immediate& imm); - void testl(const Address& address, const Immediate& imm); - void andl(CpuRegister dst, const Immediate& imm); void andl(CpuRegister dst, CpuRegister src); void andl(CpuRegister reg, const Address& address); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 48a18760f1..36c966b3cf 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1526,48 +1526,6 @@ TEST_F(AssemblerX86_64Test, Cmpb) { DriverStr(expected, "cmpb"); } -TEST_F(AssemblerX86_64Test, TestbAddressImmediate) { - GetAssembler()->testb( - x86_64::Address(x86_64::CpuRegister(x86_64::RDI), - x86_64::CpuRegister(x86_64::RBX), - x86_64::TIMES_4, - 12), - x86_64::Immediate(1)); - GetAssembler()->testb( - x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)), - x86_64::Immediate(-128)); - GetAssembler()->testb( - x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)), - x86_64::Immediate(127)); - const char* expected = - "testb $1, 0xc(%RDI,%RBX,4)\n" - "testb $-128, 0x7(%RSP)\n" - "testb $127, 0x82(%RBX)\n"; - - DriverStr(expected, "TestbAddressImmediate"); -} - -TEST_F(AssemblerX86_64Test, TestlAddressImmediate) { - GetAssembler()->testl( - x86_64::Address(x86_64::CpuRegister(x86_64::RDI), - x86_64::CpuRegister(x86_64::RBX), - x86_64::TIMES_4, - 12), - x86_64::Immediate(1)); - GetAssembler()->testl( - x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)), - x86_64::Immediate(-100000)); - GetAssembler()->testl( - x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)), - x86_64::Immediate(77777777)); - const char* expected = - "testl $1, 0xc(%RDI,%RBX,4)\n" - "testl $-100000, 0x7(%RSP)\n" - "testl $77777777, 0x82(%RBX)\n"; - - DriverStr(expected, "TestlAddressImmediate"); -} - class JNIMacroAssemblerX86_64Test : public JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler> { public: using Base = JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler>; diff --git a/test/537-checker-arraycopy/src/Main.java b/test/537-checker-arraycopy/src/Main.java index 95a11ca010..7c124caa8e 100644 --- a/test/537-checker-arraycopy/src/Main.java +++ b/test/537-checker-arraycopy/src/Main.java @@ -51,10 +51,10 @@ public class Main { /// CHECK-START-X86_64: void Main.arraycopy() disassembly (after) /// CHECK: InvokeStaticOrDirect intrinsic:SystemArrayCopy - /// CHECK-NOT: test {{^[^\[].*}}, {{^[^\[].*}} + /// CHECK-NOT: test /// CHECK-NOT: call /// CHECK: ReturnVoid - // Checks that the call is intrinsified and that there is no register test instruction + // Checks that the call is intrinsified and that there is no test instruction // when we know the source and destination are not null. public static void arraycopy() { Object[] obj = new Object[4]; |