diff options
author | 2015-11-13 10:07:31 +0000 | |
---|---|---|
committer | 2015-11-15 12:16:41 +0000 | |
commit | 0d5a281c671444bfa75d63caf1427a8c0e6e1177 (patch) | |
tree | fd9bbe0f1c581bcc7c05bbfb2643ffe0b1fb014e | |
parent | dd4cbcc924c8ba2a578914a4a366996693bdcd74 (diff) |
x86/x86-64 read barrier support for concurrent GC in Optimizing.
This first implementation uses slow paths to instrument heap
reference loads and GC root loads for the concurrent copying
collector, respectively calling the artReadBarrierSlow and
artReadBarrierForRootSlow (new) runtime entry points.
Notes:
- This implementation does not instrument HInvokeVirtual
nor HInvokeInterface instructions (for class reference
loads), as the corresponding read barriers are not stricly
required with the current concurrent copying collector.
- Intrinsics which may eventually call (on slow path) are
disabled when read barriers are enabled, as the current
slow path infrastructure does not support this case.
- When read barriers are enabled, the code generated for a
HArraySet instruction always go into the array set slow
path for object arrays (delegating the operation to the
runtime), as we are lacking a mechanism to keep a
temporary register live accross a runtime call (needed for
the instrumentation of type checking code, which requires
two successive read barriers).
Bug: 12687968
Change-Id: I14cd6107233c326389120336f93955b28ffbb329
46 files changed, 2042 insertions, 521 deletions
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index c37cecaeac..e6cc50cc5e 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -223,6 +223,11 @@ void CommonCompilerTest::SetCompilerKind(Compiler::Kind compiler_kind) { compiler_kind_ = compiler_kind; } +InstructionSet CommonCompilerTest::GetInstructionSet() const { + DCHECK(compiler_driver_.get() != nullptr); + return compiler_driver_->GetInstructionSet(); +} + void CommonCompilerTest::TearDown() { timer_.reset(); compiler_driver_.reset(); diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index 67b4428324..a121f8b7a0 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -61,6 +61,8 @@ class CommonCompilerTest : public CommonRuntimeTest { Compiler::Kind GetCompilerKind() const; void SetCompilerKind(Compiler::Kind compiler_kind); + InstructionSet GetInstructionSet() const; + // Get the set of image classes given to the compiler-driver in SetUp. Note: the compiler // driver assumes ownership of the set, so the test should properly release the set. virtual std::unordered_set<std::string>* GetImageClasses(); @@ -115,6 +117,30 @@ class CommonCompilerTest : public CommonRuntimeTest { return; \ } +// TODO: When read barrier works with all compilers in use, get rid of this. +#define TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK() \ + if (kUseReadBarrier && GetCompilerKind() == Compiler::kQuick) { \ + printf("WARNING: TEST DISABLED FOR READ BARRIER WITH QUICK\n"); \ + return; \ + } + +// TODO: When read barrier works with all Optimizing back ends, get rid of this. +#define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \ + if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) { \ + switch (GetInstructionSet()) { \ + case kX86: \ + case kX86_64: \ + /* Instruction set has read barrier support. */ \ + break; \ + \ + default: \ + /* Instruction set does not have barrier support. */ \ + printf("WARNING: TEST DISABLED FOR READ BARRIER WITH OPTIMIZING " \ + "FOR THIS INSTRUCTION SET\n"); \ + return; \ + } \ + } + // TODO: When non-PIC works with all compilers in use, get rid of this. #define TEST_DISABLED_FOR_NON_PIC_COMPILING_WITH_OPTIMIZING() \ if (GetCompilerKind() == Compiler::kOptimizing) { \ diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index 6673ea8ac5..05dde9f649 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -673,6 +673,12 @@ CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } + if (kEmitCompilerReadBarrier) { + VLOG(compiler) << "Skipping method : " << PrettyMethod(method_idx, dex_file) + << " Reason = Quick does not support read barrier."; + return nullptr; + } + // TODO: check method fingerprint here to determine appropriate backend type. Until then, use // build default. CompilerDriver* driver = GetCompilerDriver(); diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 1107599779..f8de9fa4a1 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -147,6 +147,8 @@ TEST_F(CompilerDriverTest, DISABLED_LARGE_CompileDexLibCore) { TEST_F(CompilerDriverTest, AbstractMethodErrorStub) { TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK(); + TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK(); + TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS(); jobject class_loader; { ScopedObjectAccess soa(Thread::Current()); @@ -193,6 +195,8 @@ class CompilerDriverMethodsTest : public CompilerDriverTest { TEST_F(CompilerDriverMethodsTest, Selection) { TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK(); + TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK(); + TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS(); Thread* self = Thread::Current(); jobject class_loader; { diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 16f641ab56..030451c1cb 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -232,7 +232,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(72U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(28U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(113 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); + EXPECT_EQ(114 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); } TEST_F(OatTest, OatHeaderIsValid) { diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 5188e115e0..77d53fcd8f 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -310,7 +310,7 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fp_registers, + size_t maximum_number_of_live_fpu_registers, size_t number_of_out_slots, const ArenaVector<HBasicBlock*>& block_order) { block_order_ = &block_order; @@ -324,14 +324,14 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, && IsLeafMethod() && !RequiresCurrentMethod()) { DCHECK_EQ(maximum_number_of_live_core_registers, 0u); - DCHECK_EQ(maximum_number_of_live_fp_registers, 0u); + DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u); SetFrameSize(CallPushesPC() ? GetWordSize() : 0); } else { SetFrameSize(RoundUp( number_of_spill_slots * kVRegSize + number_of_out_slots * kVRegSize + maximum_number_of_live_core_registers * GetWordSize() - + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() + + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize() + FrameEntrySpillSize(), kStackAlignment)); } @@ -547,15 +547,19 @@ void CodeGenerator::GenerateUnresolvedFieldAccess( } } +// TODO: Remove argument `code_generator_supports_read_barrier` when +// all code generators have read barrier support. void CodeGenerator::CreateLoadClassLocationSummary(HLoadClass* cls, Location runtime_type_index_location, - Location runtime_return_location) { + Location runtime_return_location, + bool code_generator_supports_read_barrier) { ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena(); LocationSummary::CallKind call_kind = cls->NeedsAccessCheck() ? LocationSummary::kCall - : (cls->CanCallRuntime() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall); + : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) || + cls->CanCallRuntime()) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); LocationSummary* locations = new (allocator) LocationSummary(cls, call_kind); if (cls->NeedsAccessCheck()) { locations->SetInAt(0, Location::NoLocation()); @@ -1320,21 +1324,38 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod // coherent with the runtime call generated, and that the GC side effect is // set when required. if (slow_path == nullptr) { - DCHECK(instruction->GetLocations()->WillCall()) << instruction->DebugName(); + DCHECK(instruction->GetLocations()->WillCall()) + << "instruction->DebugName()=" << instruction->DebugName(); DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) - << instruction->DebugName() << instruction->GetSideEffects().ToString(); + << "instruction->DebugName()=" << instruction->DebugName() + << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString(); } else { DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal()) - << instruction->DebugName() << slow_path->GetDescription(); + << "instruction->DebugName()=" << instruction->DebugName() + << " slow_path->GetDescription()=" << slow_path->GetDescription(); DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || // Control flow would not come back into the code if a fatal slow // path is taken, so we do not care if it triggers GC. slow_path->IsFatal() || // HDeoptimize is a special case: we know we are not coming back from // it into the code. - instruction->IsDeoptimize()) - << instruction->DebugName() << instruction->GetSideEffects().ToString() - << slow_path->GetDescription(); + instruction->IsDeoptimize() || + // When read barriers are enabled, some instructions use a + // slow path to emit a read barrier, which does not trigger + // GC, is not fatal, nor is emitted by HDeoptimize + // instructions. + (kEmitCompilerReadBarrier && + (instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsArraySet() || + instruction->IsArrayGet() || + instruction->IsLoadClass() || + instruction->IsLoadString() || + instruction->IsInstanceOf() || + instruction->IsCheckCast()))) + << "instruction->DebugName()=" << instruction->DebugName() + << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString() + << " slow_path->GetDescription()=" << slow_path->GetDescription(); } // Check the coherency of leaf information. @@ -1346,11 +1367,12 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod } void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* register_set = locations->GetLiveRegisters(); + RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (register_set->ContainsCoreRegister(i)) { + if (live_registers->ContainsCoreRegister(i)) { // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { locations->SetStackBit(stack_offset / kVRegSize); @@ -1365,7 +1387,7 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (register_set->ContainsFloatingPointRegister(i)) { + if (live_registers->ContainsFloatingPointRegister(i)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); saved_fpu_stack_offsets_[i] = stack_offset; @@ -1376,12 +1398,14 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo } void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* register_set = locations->GetLiveRegisters(); + RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (register_set->ContainsCoreRegister(i)) { + if (live_registers->ContainsCoreRegister(i)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); stack_offset += codegen->RestoreCoreRegister(stack_offset, i); } } @@ -1389,8 +1413,9 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (register_set->ContainsFloatingPointRegister(i)) { + if (live_registers->ContainsFloatingPointRegister(i)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i); } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 2108abefcc..114d97be94 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -201,7 +201,7 @@ class CodeGenerator { virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; void InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fp_registers, + size_t maximum_number_of_live_fpu_registers, size_t number_of_out_slots, const ArenaVector<HBasicBlock*>& block_order); int32_t GetStackSlot(HLocal* local) const; @@ -250,6 +250,15 @@ class CodeGenerator { // Returns whether we should split long moves in parallel moves. virtual bool ShouldSplitLongMoves() const { return false; } + size_t GetNumberOfCoreCalleeSaveRegisters() const { + return POPCOUNT(core_callee_save_mask_); + } + + size_t GetNumberOfCoreCallerSaveRegisters() const { + DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); + return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); + } + bool IsCoreCalleeSaveRegister(int reg) const { return (core_callee_save_mask_ & (1 << reg)) != 0; } @@ -416,7 +425,8 @@ class CodeGenerator { // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design. static void CreateLoadClassLocationSummary(HLoadClass* cls, Location runtime_type_index_location, - Location runtime_return_location); + Location runtime_return_location, + bool code_generator_supports_read_barrier = false); static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 9b78dec6c4..851bced09a 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -420,7 +420,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, : CodeGenerator(graph, kNumberOfGpuRegisters, kNumberOfFpuRegisters, - 0, // kNumberOfRegisterPairs + /* number_of_register_pairs */ 0, ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), arraysize(kCoreCalleeSaves)), ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0147b010f2..32dc636d1d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -35,6 +35,9 @@ namespace art { +template<class MirrorType> +class GcRoot; + namespace x86 { static constexpr int kCurrentMethodStackOffset = 0; @@ -300,15 +303,6 @@ class TypeCheckSlowPathX86 : public SlowPathCode { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = locations->InAt(0).AsRegister<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -329,12 +323,15 @@ class TypeCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } if (!is_fatal_) { @@ -425,6 +422,221 @@ class ArraySetSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); }; +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { + public: + ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ movl(out, Address(out, offset)); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = index_.AsRegister<Register>(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::x86::X86Assembler::shll and + // art::x86::X86Assembler::AddImmediate below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ movl(free_reg, index_reg); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ shll(index_reg, Immediate(TIMES_4)); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ AddImmediate(index_reg, Immediate(offset_)); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_)); + } + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<Register>()); + size_t obj = static_cast<int>(obj_.AsRegister<Register>()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return static_cast<Register>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on x86 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathX86 : public SlowPathCode { + public: + ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86); +}; + #undef __ #define __ down_cast<X86Assembler*>(GetAssembler())-> @@ -513,9 +725,9 @@ void CodeGeneratorX86::InvokeRuntime(int32_t entry_point_offset, } CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) + const X86InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, @@ -582,7 +794,7 @@ Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const { LOG(FATAL) << "Unreachable type " << type; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const { @@ -783,7 +995,7 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type t LOG(FATAL) << "Unexpected parameter type " << type; break; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorX86::Move32(Location destination, Location source) { @@ -1767,6 +1979,9 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { + // This call to HandleInvoke allocates a temporary (core) register + // which is also used to transfer the hidden argument from FP to + // core register. HandleInvoke(invoke); // Add the hidden argument. invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7)); @@ -1774,31 +1989,42 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); + LocationSummary* locations = invoke->GetLocations(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // Set the hidden argument. + // Set the hidden argument. This is safe to do this here, as XMM7 + // won't be modified thereafter, before the `call` instruction. + DCHECK_EQ(XMM7, hidden_reg); __ movl(temp, Immediate(invoke->GetDexMethodIndex())); - __ movd(invoke->GetLocations()->GetTemp(1).AsFpuRegister<XmmRegister>(), temp); + __ movd(hidden_reg, temp); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ movl(temp, Address(ESP, receiver.GetStackIndex())); + // /* HeapReference<Class> */ temp = temp->klass_ __ movl(temp, Address(temp, class_offset)); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetImtEntryAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); - __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86WordSize).Int32Value())); + __ call(Address(temp, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -3871,7 +4097,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, method_reg = reg; __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); } - // temp = temp->dex_cache_resolved_methods_; + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ movl(reg, Address(method_reg, ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value())); // temp = temp[index_in_cache] @@ -3915,10 +4141,17 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // temp = object->GetClass(); DCHECK(receiver.IsRegister()); + // /* HeapReference<Class> */ temp = receiver->klass_ __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); @@ -3980,18 +4213,29 @@ void CodeGeneratorX86::MarkGCCard(Register temp, void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + kEmitCompilerReadBarrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - // The output overlaps in case of long: we don't want the low move to overwrite - // the object's location. - locations->SetOut(Location::RequiresRegister(), - (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap - : Location::kNoOutputOverlap); + // The output overlaps in case of long: we don't want the low move + // to overwrite the object's location. Likewise, in the case of + // an object field get with read barriers enabled, we do not want + // the move to overwrite the object's location, as we need it to emit + // the read barrier. + locations->SetOut( + Location::RequiresRegister(), + (object_field_get_with_read_barrier || instruction->GetType() == Primitive::kPrimLong) ? + Location::kOutputOverlap : + Location::kNoOutputOverlap); } if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { @@ -4007,7 +4251,8 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - Register base = locations->InAt(0).AsRegister<Register>(); + Location base_loc = locations->InAt(0); + Register base = base_loc.AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); Primitive::Type field_type = field_info.GetFieldType(); @@ -4082,7 +4327,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(out.AsRegister<Register>()); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); } } @@ -4410,24 +4655,35 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { } void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - // The output overlaps in case of long: we don't want the low move to overwrite - // the array's location. - locations->SetOut(Location::RequiresRegister(), - (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap - : Location::kNoOutputOverlap); + // The output overlaps in case of long: we don't want the low move + // to overwrite the array's location. Likewise, in the case of an + // object array get with read barriers enabled, we do not want the + // move to overwrite the array's location, as we need it to emit + // the read barrier. + locations->SetOut( + Location::RequiresRegister(), + (instruction->GetType() == Primitive::kPrimLong || object_array_get_with_read_barrier) ? + Location::kOutputOverlap : + Location::kNoOutputOverlap); } } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type type = instruction->GetType(); @@ -4482,6 +4738,9 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { @@ -4546,8 +4805,17 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } if (type == Primitive::kPrimNot) { - Register out = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(out); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Location out = locations->Out(); + if (index.IsConstant()) { + uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); + } } } @@ -4558,14 +4826,18 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // optimization. Primitive::Type value_type = instruction->GetComponentType(); + bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - - bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); bool is_byte_type = (value_type == Primitive::kPrimBoolean) || (value_type == Primitive::kPrimByte); @@ -4586,20 +4858,21 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + locations->AddTemp(Location::RegisterLocation(ECX)); // Possibly used for read barrier too. } } void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register array = locations->InAt(0).AsRegister<Register>(); + Location array_loc = locations->InAt(0); + Register array = array_loc.AsRegister<Register>(); Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -4639,6 +4912,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<Register>(), TIMES_4, offset); + if (!value.IsRegister()) { // Just setting null. DCHECK(instruction->InputAt(2)->IsNullConstant()); @@ -4646,7 +4920,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { __ movl(address, Immediate(0)); codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); - DCHECK(!may_need_runtime_call); + DCHECK(!may_need_runtime_call_for_type_check); break; } @@ -4655,7 +4929,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { NearLabel done, not_null, do_put; SlowPathCode* slow_path = nullptr; Register temp = locations->GetTemp(0).AsRegister<Register>(); - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -4667,22 +4941,62 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { __ Bind(¬_null); } - __ movl(temp, Address(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, component_offset)); - // No need to poison/unpoison, we're comparing two poisoned references. - __ cmpl(temp, Address(register_value, class_offset)); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - __ j(kEqual, &do_put); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, super_offset)); - // No need to unpoison, we're comparing against null.. - __ testl(temp, temp); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ movl(temp2, temp); + // // /* HeapReference<Class> */ temp = temp->component_type_ + // __ movl(temp, Address(temp, component_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp_loc, temp_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = register_value->klass_ + // __ movl(temp2, Address(register_value, class_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); + // + // __ cmpl(temp, temp2); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ jmp(slow_path->GetEntryLabel()); } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); + // /* HeapReference<Class> */ temp = array->klass_ + __ movl(temp, Address(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ movl(temp, Address(temp, component_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor the object reference in `register_value->klass`, as + // we are comparing two poisoned references. + __ cmpl(temp, Address(register_value, class_offset)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + __ j(kEqual, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->super_class_ + __ movl(temp, Address(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ testl(temp, temp); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ j(kNotEqual, slow_path->GetEntryLabel()); + } } } @@ -4693,7 +5007,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { } else { __ movl(address, register_value); } - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -4708,6 +5022,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { break; } + case Primitive::kPrimInt: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Address address = index.IsConstant() @@ -5178,7 +5493,8 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(EAX)); + Location::RegisterLocation(EAX), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { @@ -5192,18 +5508,40 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { return; } - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); + if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leal(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } } else { DCHECK(cls->CanCallRuntime()); - __ movl(out, Address( - current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - // TODO: We will need a read barrier here. + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + __ movl(out, Address(current_method, + ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ leal(out, Address(out, cache_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ movl(out, Address(out, cache_offset)); + } SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); @@ -5257,12 +5595,35 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { codegen_->AddSlowPath(slow_path); LocationSummary* locations = load->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leal(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - // TODO: We will need a read barrier here. + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ leal(out, Address(out, cache_offset)); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ movl(out, Address(out, cache_offset)); + } + __ testl(out, out); __ j(kEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -5306,40 +5667,44 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86 uses this register too. - locations->SetOut(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetOut(Location::RegisterLocation(EAX)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86 uses this "out" register too. + locations->SetOut(Location::RequiresRegister()); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location cls = locations->InAt(1); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5354,15 +5719,9 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ j(kEqual, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ movl(target, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ movl(out, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -5379,13 +5738,23 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ jmp(&done); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. NearLabel loop; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5402,6 +5771,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop, success; @@ -5413,8 +5783,17 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &success); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -5426,6 +5805,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. NearLabel exact_check; @@ -5436,9 +5816,18 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &exact_check); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ movl(out, Address(out, component_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5449,6 +5838,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ jmp(&done); break; } + case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<Register>()); @@ -5457,8 +5847,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -5467,13 +5857,25 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved & interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ jmp(&done); } @@ -5498,75 +5900,73 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - break; - case TypeCheckKind::kInterfaceCheck: - case TypeCheckKind::kUnresolvedCheck: - call_kind = LocationSummary::kCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; case TypeCheckKind::kArrayCheck: + case TypeCheckKind::kUnresolvedCheck: + case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86 uses this register too. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86 uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location cls = locations->InAt(1); - Register temp = locations->WillCall() - ? kNoRegister - : locations->GetTemp(0).AsRegister<Register>(); - + Location temp_loc = locations->GetTemp(0); + Register temp = temp_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCode* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCode* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); - NearLabel done, abstract_entry; + NearLabel done; // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { __ testl(obj, obj); __ j(kEqual, &done); } - if (locations->WillCall()) { - __ movl(obj, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(obj); - } else { - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { @@ -5577,19 +5977,44 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } // Jump to slow path for throwing the exception or doing a // more involved array check. - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - NearLabel loop, success; + NearLabel loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. __ testl(temp, temp); - // Jump to the slow path to throw the exception. - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<Register>()); } else { @@ -5599,6 +6024,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kNotEqual, &loop); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop; @@ -5610,16 +6036,39 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &done); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ testl(temp, temp); __ j(kNotEqual, &loop); - // Jump to the slow path to throw the exception. - __ jmp(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + NearLabel check_non_primitive_component_type; if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<Register>()); } else { @@ -5627,29 +6076,67 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &done); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ movl(temp, Address(temp, component_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. __ testl(temp, temp); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kEqual, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved & + // interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ jmp(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) { @@ -5800,6 +6287,82 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr } } +void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorX86::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<Register>()); + } +} + +void CodeGeneratorX86::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 177a059df8..cd606f697e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -397,6 +397,51 @@ class CodeGeneratorX86 : public CodeGenerator { void Finalize(CodeAllocator* allocator) OVERRIDE; + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index e2ad6673c7..d55c084618 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -34,6 +34,9 @@ namespace art { +template<class MirrorType> +class GcRoot; + namespace x86_64 { static constexpr int kCurrentMethodStackOffset = 0; @@ -52,16 +55,16 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), + instruction_, + instruction_->GetDexPc(), + this); } bool IsFatal() const OVERRIDE { return true; } @@ -78,16 +81,16 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), + instruction_, + instruction_->GetDexPc(), + this); } bool IsFatal() const OVERRIDE { return true; } @@ -139,18 +142,18 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { : instruction_(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), + instruction_, + instruction_->GetDexPc(), + this); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { - __ jmp(x64_codegen->GetLabelOf(successor_)); + __ jmp(x86_64_codegen->GetLabelOf(successor_)); } } @@ -180,7 +183,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. @@ -196,8 +199,10 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), - instruction_, instruction_->GetDexPc(), this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), + instruction_, + instruction_->GetDexPc(), + this); } bool IsFatal() const OVERRIDE { return true; } @@ -222,22 +227,25 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = at_->GetLocations(); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex())); - x64_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) - : QUICK_ENTRY_POINT(pInitializeType), - at_, dex_pc_, this); + x86_64_codegen->InvokeRuntime(do_clinit_ ? + QUICK_ENTRY_POINT(pInitializeStaticStorage) : + QUICK_ENTRY_POINT(pInitializeType), + at_, + dex_pc_, + this); Location out = locations->Out(); // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); - x64_codegen->Move(out, Location::RegisterLocation(RAX)); + x86_64_codegen->Move(out, Location::RegisterLocation(RAX)); } RestoreLiveRegisters(codegen, locations); @@ -271,18 +279,18 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction_->GetStringIndex())); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), - instruction_, - instruction_->GetDexPc(), - this); - x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), + instruction_, + instruction_->GetDexPc(), + this); + x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -308,18 +316,9 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -336,21 +335,24 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction_, - dex_pc, - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), + instruction_, + dex_pc, + this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction_, - dex_pc, - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), + instruction_, + dex_pc, + this); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } if (!is_fatal_) { if (instruction_->IsInstanceOf()) { - x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); + x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); } RestoreLiveRegisters(codegen, locations); @@ -375,15 +377,15 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); DCHECK(instruction_->IsDeoptimize()); HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - deoptimize, - deoptimize->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + deoptimize, + deoptimize->GetDexPc(), + this); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } @@ -421,11 +423,11 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction_, - instruction_->GetDexPc(), - this); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), + instruction_, + instruction_->GetDexPc(), + this); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -438,6 +440,219 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); }; +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial + // object has been overwritten by (or after) the heap object + // reference load to be instrumented, e.g.: + // + // __ movl(out, Address(out, offset)); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; +} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + CpuRegister reg_out = out_.AsRegister<CpuRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute real offset and store it in index_. + Register index_reg = index_.AsRegister<CpuRegister>().AsRegister(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::x86_64::X86_64Assembler::shll and + // art::x86_64::X86_64Assembler::AddImmediate below), but it + // has not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister(); + __ movl(CpuRegister(free_reg), CpuRegister(index_reg)); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the + // scale factor (2) cannot overflow in practice, as the + // runtime is unable to allocate object arrays with a size + // larger than 2^26 - 1 (that is, 2^28 - 4 bytes). + __ shll(CpuRegister(index_reg), Immediate(TIMES_4)); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ AddImmediate(CpuRegister(index_reg), Immediate(offset_)); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegister()); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_)); + } + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierForHeapReferenceSlowPathX86_64"; + } + + private: + CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister()); + size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return static_cast<CpuRegister>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on x86-64 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64); +}; + #undef __ #define __ down_cast<X86_64Assembler*>(GetAssembler())-> @@ -533,7 +748,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo method_reg = reg.AsRegister(); __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); } - // temp = temp->dex_cache_resolved_methods_; + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ movq(reg, Address(CpuRegister(method_reg), ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue())); @@ -578,10 +793,17 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); - // temp = object->GetClass(); DCHECK(receiver.IsRegister()); + // /* HeapReference<Class> */ temp = receiver->klass_ __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); __ movq(temp, Address(temp, method_offset)); @@ -672,9 +894,9 @@ static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, - const X86_64InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) + const X86_64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, @@ -728,7 +950,7 @@ Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const { LOG(FATAL) << "Unreachable type " << type; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { @@ -1836,7 +2058,7 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Typ LOG(FATAL) << "Unexpected parameter type " << type; break; } - return Location(); + return Location::NoLocation(); } void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { @@ -1907,7 +2129,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -1920,31 +2141,41 @@ void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); + LocationSummary* locations = invoke->GetLocations(); + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); - // Set the hidden argument. - CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(); + // Set the hidden argument. This is safe to do this here, as RAX + // won't be modified thereafter, before the `call` instruction. + DCHECK_EQ(RAX, hidden_reg.AsRegister()); codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); + // /* HeapReference<Class> */ temp = temp->klass_ __ movl(temp, Address(temp, class_offset)); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetImtEntryAt(method_offset); __ movq(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); - __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64WordSize).SizeValue())); + __ call(Address(temp, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -3686,13 +3917,23 @@ void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps for an object field get when read barriers + // are enabled: we do not want the move to overwrite the object's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } @@ -3701,7 +3942,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); + Location base_loc = locations->InAt(0); + CpuRegister base = base_loc.AsRegister<CpuRegister>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); Primitive::Type field_type = field_info.GetFieldType(); @@ -3761,7 +4003,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>()); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); } } @@ -4079,20 +4321,31 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { } void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps for an object array get when read barriers + // are enabled: we do not want the move to overwrite the array's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + Location obj_loc = locations->InAt(0); + CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location index = locations->InAt(1); Primitive::Type type = instruction->GetType(); @@ -4147,8 +4400,9 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { - static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); if (index.IsConstant()) { @@ -4203,8 +4457,17 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ MaybeUnpoisonHeapReference(out); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Location out = locations->Out(); + if (index.IsConstant()) { + uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); + } } } @@ -4214,10 +4477,14 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); @@ -4229,18 +4496,24 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { if (needs_write_barrier) { // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + + // This first temporary register is possibly used for heap + // reference poisoning and/or read barrier emission too. + locations->AddTemp(Location::RequiresRegister()); + // This second temporary register is possibly used for read + // barrier emission too. locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister array = locations->InAt(0).AsRegister<CpuRegister>(); + Location array_loc = locations->InAt(0); + CpuRegister array = array_loc.AsRegister<CpuRegister>(); Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -4284,6 +4557,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); + if (!value.IsRegister()) { // Just setting null. DCHECK(instruction->InputAt(2)->IsNullConstant()); @@ -4312,22 +4586,62 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ Bind(¬_null); } - __ movl(temp, Address(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, component_offset)); - // No need to poison/unpoison, we're comparing two poisoned references. - __ cmpl(temp, Address(register_value, class_offset)); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - __ j(kEqual, &do_put); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, super_offset)); - // No need to unpoison the result, we're comparing against null. - __ testl(temp, temp); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ movl(temp2, temp); + // // /* HeapReference<Class> */ temp = temp->component_type_ + // __ movl(temp, Address(temp, component_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp_loc, temp_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = register_value->klass_ + // __ movl(temp2, Address(register_value, class_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); + // + // __ cmpl(temp, temp2); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ jmp(slow_path->GetEntryLabel()); } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); + // /* HeapReference<Class> */ temp = array->klass_ + __ movl(temp, Address(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ movl(temp, Address(temp, component_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor the object reference in `register_value->klass`, as + // we are comparing two poisoned references. + __ cmpl(temp, Address(register_value, class_offset)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + __ j(kEqual, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->super_class_ + __ movl(temp, Address(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ testl(temp, temp); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ j(kNotEqual, slow_path->GetEntryLabel()); + } } } @@ -4353,6 +4667,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } + case Primitive::kPrimInt: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Address address = index.IsConstant() @@ -4802,7 +5117,8 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(RAX)); + Location::RegisterLocation(RAX), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { @@ -4816,18 +5132,40 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { return; } - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); + if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leaq(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } } else { DCHECK(cls->CanCallRuntime()); - __ movq(out, Address( - current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - // TODO: We will need a read barrier here. + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + __ movq(out, Address(current_method, + ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ leaq(out, Address(out, cache_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ movl(out, Address(out, cache_offset)); + } SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); @@ -4872,12 +5210,35 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { codegen_->AddSlowPath(slow_path); LocationSummary* locations = load->GetLocations(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - // TODO: We will need a read barrier here. + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leaq(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ + __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ leaq(out, Address(out, cache_offset)); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ movl(out, Address(out, cache_offset)); + } + __ testl(out, out); __ j(kEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -4921,40 +5282,44 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86_64 uses this register too. - locations->SetOut(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetOut(Location::RegisterLocation(RAX)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86_64 uses this "out" register too. + locations->SetOut(Location::RequiresRegister()); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + Location obj_loc = locations->InAt(0); + CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location cls = locations->InAt(1); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -4969,15 +5334,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ j(kEqual, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - CpuRegister target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ movl(target, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ movl(out, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -4999,13 +5358,23 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. NearLabel loop, success; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5022,6 +5391,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop, success; @@ -5033,8 +5403,17 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &success); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -5046,6 +5425,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. NearLabel exact_check; @@ -5056,9 +5436,18 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &exact_check); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ movl(out, Address(out, component_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5069,6 +5458,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ jmp(&done); break; } + case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<CpuRegister>()); @@ -5077,8 +5467,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -5087,13 +5477,25 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved & interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ jmp(&done); } @@ -5118,58 +5520,60 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86_64 uses this register too. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + Location obj_loc = locations->InAt(0); + CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location cls = locations->InAt(1); - CpuRegister temp = locations->WillCall() - ? CpuRegister(kNoRegister) - : locations->GetTemp(0).AsRegister<CpuRegister>(); - + Location temp_loc = locations->GetTemp(0); + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCode* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCode* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); NearLabel done; // Avoid null check if we know obj is not null. @@ -5178,15 +5582,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); } - if (locations->WillCall()) { - __ movl(obj, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(obj); - } else { - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { @@ -5197,19 +5597,44 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // Jump to slow path for throwing the exception or doing a // more involved array check. - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - NearLabel loop; + NearLabel loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. __ testl(temp, temp); - // Jump to the slow path to throw the exception. - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); } else { @@ -5219,6 +5644,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kNotEqual, &loop); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop; @@ -5230,16 +5656,39 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &done); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ testl(temp, temp); __ j(kNotEqual, &loop); - // Jump to the slow path to throw the exception. - __ jmp(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + NearLabel check_non_primitive_component_type; if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); } else { @@ -5247,29 +5696,67 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &done); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ movl(temp, Address(temp, component_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. __ testl(temp, temp); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kEqual, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved & + // interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ jmp(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { @@ -5402,6 +5889,82 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in } } +void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<CpuRegister>()); + } +} + +void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 708450835d..5791fcd0e6 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -352,6 +352,51 @@ class CodeGeneratorX86_64 : public CodeGenerator { return isa_features_; } + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` provided (i.e., when it is different from + // Location::NoLocation()), the offset value passed to + // artReadBarrierSlow is adjusted to take `index` into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + int ConstantAreaStart() const { return constant_area_start_; } diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 040bf6a45e..371588fc47 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -55,7 +55,23 @@ ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() { bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathX86 slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect, + // ReadBarrierSlowPathX86 for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { @@ -1571,26 +1587,32 @@ void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) { GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>())); } -static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, - bool is_volatile, X86Assembler* assembler) { - Register base = locations->InAt(1).AsRegister<Register>(); - Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); - Location output = locations->Out(); +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile, + CodeGeneratorX86* codegen) { + X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + Location base_loc = locations->InAt(1); + Register base = base_loc.AsRegister<Register>(); + Location offset_loc = locations->InAt(2); + Register offset = offset_loc.AsRegisterPairLow<Register>(); + Location output_loc = locations->Out(); switch (type) { case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register output_reg = output.AsRegister<Register>(); - __ movl(output_reg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + Register output = output_loc.AsRegister<Register>(); + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); if (type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(output_reg); + codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc); } break; } case Primitive::kPrimLong: { - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); + Register output_lo = output_loc.AsRegisterPairLow<Register>(); + Register output_hi = output_loc.AsRegisterPairHigh<Register>(); if (is_volatile) { // Need to use a XMM to read atomically. XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); @@ -1613,8 +1635,13 @@ static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long, bool is_volatile) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1653,22 +1680,22 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); } @@ -1890,13 +1917,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); - // locked cmpxchg has full barrier semantics, and we don't need + // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. // Convert ZF into the boolean result. __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); + // In the case of the `UnsafeCASObject` intrinsic, accessing an + // object in the heap with LOCK CMPXCHG does not require a read + // barrier, as we do not keep a reference to this heap location. + // However, if heap poisoning is enabled, we need to unpoison the + // values that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value` has been moved to a temporary register, no need to @@ -1929,8 +1961,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code LOG(FATAL) << "Unexpected CAS type " << type; } - // locked cmpxchg has full barrier semantics, and we don't need - // scheduling barriers at this time. + // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we + // don't need scheduling barriers at this time. // Convert ZF into the boolean result. __ setb(kZero, out.AsRegister<Register>()); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index a29f3ef1d1..2d9f01b821 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -50,8 +50,24 @@ ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() { bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { Dispatch(invoke); - const LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + LocationSummary* res = invoke->GetLocations(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathX86_64 slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect, + // ReadBarrierSlowPathX86_64 for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { @@ -917,6 +933,10 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); } +// TODO: Implement read barriers in the SystemArrayCopy intrinsic. +// Note that this code path is not used (yet) because we do not +// intrinsify methods that can go into the IntrinsicSlowPathX86_64 +// slow path. void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1698,23 +1718,30 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true)); } -static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, - bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) { - CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); - CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); - CpuRegister trg = locations->Out().AsRegister<CpuRegister>(); +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile ATTRIBUTE_UNUSED, + CodeGeneratorX86_64* codegen) { + X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + Location base_loc = locations->InAt(1); + CpuRegister base = base_loc.AsRegister<CpuRegister>(); + Location offset_loc = locations->InAt(2); + CpuRegister offset = offset_loc.AsRegister<CpuRegister>(); + Location output_loc = locations->Out(); + CpuRegister output = locations->Out().AsRegister<CpuRegister>(); switch (type) { case Primitive::kPrimInt: case Primitive::kPrimNot: - __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); if (type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc); } break; case Primitive::kPrimLong: - __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); break; default: @@ -1724,8 +1751,13 @@ static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1754,22 +1786,22 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invo void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); } @@ -1961,13 +1993,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); - // locked cmpxchg has full barrier semantics, and we don't need + // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. // Convert ZF into the boolean result. __ setcc(kZero, out); __ movzxb(out, out); + // In the case of the `UnsafeCASObject` intrinsic, accessing an + // object in the heap with LOCK CMPXCHG does not require a read + // barrier, as we do not keep a reference to this heap location. + // However, if heap poisoning is enabled, we need to unpoison the + // values that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value_reg` has been moved to a temporary register, no need @@ -1992,7 +2029,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c LOG(FATAL) << "Unexpected CAS type " << type; } - // locked cmpxchg has full barrier semantics, and we don't need + // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. // Convert ZF into the boolean result. diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 1181007666..63bbc2cd0a 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -594,6 +594,10 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { return intrinsified_; } + void SetIntrinsified(bool intrinsified) { + intrinsified_ = intrinsified; + } + private: ArenaVector<Location> inputs_; ArenaVector<Location> temps_; @@ -613,7 +617,7 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { RegisterSet live_registers_; // Whether these are locations for an intrinsified call. - const bool intrinsified_; + bool intrinsified_; ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint); ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 2878ac9899..1da2a1dfd0 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1436,7 +1436,7 @@ class SideEffects : public ValueObject { return flags_ == (kAllChangeBits | kAllDependOnBits); } - // Returns true if this may read something written by other. + // Returns true if `this` may read something written by `other`. bool MayDependOn(SideEffects other) const { const uint64_t depends_on_flags = (flags_ & kAllDependOnBits) >> kChangeBits; return (other.flags_ & depends_on_flags); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 2be0680561..27ee47296c 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -383,6 +383,13 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { || instruction_set == kX86_64; } +// Read barrier are supported only on x86 and x86-64 at the moment. +// TODO: Add support for other architectures and remove this function +static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { + return instruction_set == kX86 + || instruction_set == kX86_64; +} + static void RunOptimizations(HOptimization* optimizations[], size_t length, PassObserver* pass_observer) { @@ -673,6 +680,12 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, return nullptr; } + // When read barriers are enabled, do not attempt to compile for + // instruction sets that have no read barrier support. + if (kEmitCompilerReadBarrier && !InstructionSetSupportsReadBarrier(instruction_set)) { + return nullptr; + } + if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) { MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological); return nullptr; @@ -841,9 +854,14 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, if (kIsDebugBuild && IsCompilingWithCoreImage() && - IsInstructionSetSupported(compiler_driver->GetInstructionSet())) { - // For testing purposes, we put a special marker on method names that should be compiled - // with this compiler. This makes sure we're not regressing. + IsInstructionSetSupported(compiler_driver->GetInstructionSet()) && + (!kEmitCompilerReadBarrier || + InstructionSetSupportsReadBarrier(compiler_driver->GetInstructionSet()))) { + // For testing purposes, we put a special marker on method names + // that should be compiled with this compiler (when the the + // instruction set is supported -- and has support for read + // barriers, if they are enabled). This makes sure we're not + // regressing. std::string method_name = PrettyMethod(method_idx, dex_file); bool shouldCompile = method_name.find("$opt$") != std::string::npos; DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name; diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc index ec45d6b2ca..9bbc354290 100644 --- a/compiler/optimizing/side_effects_test.cc +++ b/compiler/optimizing/side_effects_test.cc @@ -129,13 +129,13 @@ TEST(SideEffectsTest, NoDependences) { TEST(SideEffectsTest, VolatileDependences) { SideEffects volatile_write = - SideEffects::FieldWriteOfType(Primitive::kPrimInt, true); + SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ true); SideEffects any_write = - SideEffects::FieldWriteOfType(Primitive::kPrimInt, false); + SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false); SideEffects volatile_read = - SideEffects::FieldReadOfType(Primitive::kPrimByte, true); + SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ true); SideEffects any_read = - SideEffects::FieldReadOfType(Primitive::kPrimByte, false); + SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ false); EXPECT_FALSE(volatile_write.MayDependOn(any_read)); EXPECT_TRUE(any_read.MayDependOn(volatile_write)); @@ -151,15 +151,15 @@ TEST(SideEffectsTest, VolatileDependences) { TEST(SideEffectsTest, SameWidthTypes) { // Type I/F. testWriteAndReadDependence( - SideEffects::FieldWriteOfType(Primitive::kPrimInt, false), - SideEffects::FieldReadOfType(Primitive::kPrimFloat, false)); + SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false), + SideEffects::FieldReadOfType(Primitive::kPrimFloat, /* is_volatile */ false)); testWriteAndReadDependence( SideEffects::ArrayWriteOfType(Primitive::kPrimInt), SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); // Type L/D. testWriteAndReadDependence( - SideEffects::FieldWriteOfType(Primitive::kPrimLong, false), - SideEffects::FieldReadOfType(Primitive::kPrimDouble, false)); + SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false), + SideEffects::FieldReadOfType(Primitive::kPrimDouble, /* is_volatile */ false)); testWriteAndReadDependence( SideEffects::ArrayWriteOfType(Primitive::kPrimLong), SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); @@ -171,9 +171,9 @@ TEST(SideEffectsTest, AllWritesAndReads) { for (Primitive::Type type = Primitive::kPrimNot; type < Primitive::kPrimVoid; type = Primitive::Type(type + 1)) { - s = s.Union(SideEffects::FieldWriteOfType(type, false)); + s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayWriteOfType(type)); - s = s.Union(SideEffects::FieldReadOfType(type, false)); + s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayReadOfType(type)); } EXPECT_TRUE(s.DoesAllReadWrite()); @@ -225,10 +225,10 @@ TEST(SideEffectsTest, BitStrings) { "||DJ|||||", // note: DJ alias SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str()); SideEffects s = SideEffects::None(); - s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, false)); - s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, false)); + s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayWriteOfType(Primitive::kPrimShort)); - s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, false)); + s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); EXPECT_STREQ( diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index 76c7c4f6f0..be33b0e235 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -164,9 +164,10 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Deoptimization from compiled code. qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code; - // Read barrier + // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; qpoints->pReadBarrierSlow = artReadBarrierSlow; + qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow; } } // namespace art diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index 371cbb2673..63285a4015 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -147,9 +147,10 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Deoptimization from compiled code. qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code; - // Read barrier + // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; qpoints->pReadBarrierSlow = artReadBarrierSlow; + qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow; }; } // namespace art diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc index 59421dd8b9..cba427dff8 100644 --- a/runtime/arch/mips/entrypoints_init_mips.cc +++ b/runtime/arch/mips/entrypoints_init_mips.cc @@ -271,10 +271,14 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pA64Store = QuasiAtomic::Write64; static_assert(IsDirectEntrypoint(kQuickA64Store), "Non-direct C stub marked direct."); + // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; static_assert(!IsDirectEntrypoint(kQuickReadBarrierJni), "Non-direct C stub marked direct."); qpoints->pReadBarrierSlow = artReadBarrierSlow; static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct."); + qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow; + static_assert(IsDirectEntrypoint(kQuickReadBarrierForRootSlow), + "Direct C stub not marked direct."); }; } // namespace art diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc index 417d5fc632..89f54ddc04 100644 --- a/runtime/arch/mips64/entrypoints_init_mips64.cc +++ b/runtime/arch/mips64/entrypoints_init_mips64.cc @@ -178,9 +178,10 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pA64Load = QuasiAtomic::Read64; qpoints->pA64Store = QuasiAtomic::Write64; - // Read barrier + // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; qpoints->pReadBarrierSlow = artReadBarrierSlow; + qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow; }; } // namespace art diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index 1d10e5db39..afa48cde34 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -2151,4 +2151,39 @@ TEST_F(StubTest, ReadBarrier) { #endif } +TEST_F(StubTest, ReadBarrierForRoot) { +#if defined(ART_USE_READ_BARRIER) && (defined(__i386__) || defined(__arm__) || \ + defined(__aarch64__) || defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__))) + Thread* self = Thread::Current(); + + const uintptr_t readBarrierForRootSlow = + StubTest::GetEntrypoint(self, kQuickReadBarrierForRootSlow); + + // Create an object + ScopedObjectAccess soa(self); + // garbage is created during ClassLinker::Init + + StackHandleScope<1> hs(soa.Self()); + + Handle<mirror::String> obj( + hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!"))); + + EXPECT_FALSE(self->IsExceptionPending()); + + GcRoot<mirror::Class>& root = mirror::String::java_lang_String_; + size_t result = Invoke3(reinterpret_cast<size_t>(&root), 0U, 0U, readBarrierForRootSlow, self); + + EXPECT_FALSE(self->IsExceptionPending()); + EXPECT_NE(reinterpret_cast<size_t>(nullptr), result); + mirror::Class* klass = reinterpret_cast<mirror::Class*>(result); + EXPECT_EQ(klass, obj->GetClass()); + + // Tests done. +#else + LOG(INFO) << "Skipping read_barrier_for_root_slow"; + // Force-print to std::cout so it's also outside the logcat. + std::cout << "Skipping read_barrier_for_root_slow" << std::endl; +#endif +} + } // namespace art diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc index 019546f2ce..e20001864d 100644 --- a/runtime/arch/x86/entrypoints_init_x86.cc +++ b/runtime/arch/x86/entrypoints_init_x86.cc @@ -29,6 +29,7 @@ extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass, // Read barrier entrypoints. extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t); +extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*); void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // JNI @@ -136,9 +137,10 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Deoptimize qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code; - // Read barrier + // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; qpoints->pReadBarrierSlow = art_quick_read_barrier_slow; + qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow; }; } // namespace art diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 2f485ae644..6eacac1ede 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1588,14 +1588,22 @@ DEFINE_FUNCTION art_nested_signal_return END_FUNCTION art_nested_signal_return DEFINE_FUNCTION art_quick_read_barrier_slow - PUSH edx // pass arg3 - offset - PUSH ecx // pass arg2 - obj - PUSH eax // pass arg1 - ref - call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset) - addl LITERAL(12), %esp // pop arguments + PUSH edx // pass arg3 - offset + PUSH ecx // pass arg2 - obj + PUSH eax // pass arg1 - ref + call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset) + addl LITERAL(12), %esp // pop arguments CFI_ADJUST_CFA_OFFSET(-12) ret END_FUNCTION art_quick_read_barrier_slow +DEFINE_FUNCTION art_quick_read_barrier_for_root_slow + PUSH eax // pass arg1 - root + call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root) + addl LITERAL(4), %esp // pop argument + CFI_ADJUST_CFA_OFFSET(-4) + ret +END_FUNCTION art_quick_read_barrier_for_root_slow + // TODO: implement these! UNIMPLEMENTED art_quick_memcmp16 diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index eae09ee1f4..2b38c9db35 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -30,6 +30,7 @@ extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass, // Read barrier entrypoints. extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t); +extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*); void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { #if defined(__APPLE__) @@ -140,9 +141,10 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Deoptimize qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code; - // Read barrier + // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; qpoints->pReadBarrierSlow = art_quick_read_barrier_slow; + qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow; #endif // __APPLE__ }; diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 5fd8969248..17d277e1fd 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1722,3 +1722,14 @@ DEFINE_FUNCTION art_quick_read_barrier_slow RESTORE_FP_CALLEE_SAVE_FRAME ret END_FUNCTION art_quick_read_barrier_slow + +DEFINE_FUNCTION art_quick_read_barrier_for_root_slow + SETUP_FP_CALLEE_SAVE_FRAME + subq LITERAL(8), %rsp // Alignment padding. + CFI_ADJUST_CFA_OFFSET(8) + call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root) + addq LITERAL(8), %rsp + CFI_ADJUST_CFA_OFFSET(-8) + RESTORE_FP_CALLEE_SAVE_FRAME + ret +END_FUNCTION art_quick_read_barrier_for_root_slow diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 69f6fe96ff..b548dfb639 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -122,7 +122,7 @@ ADD_TEST_EQ(THREAD_SELF_OFFSET, art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value()) // Offset of field Thread::tlsPtr_.thread_local_pos. -#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 150 * __SIZEOF_POINTER__) +#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 151 * __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET, art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value()) // Offset of field Thread::tlsPtr_.thread_local_end. diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h index 9c78ee59dd..d6a44f7293 100644 --- a/runtime/base/bit_utils.h +++ b/runtime/base/bit_utils.h @@ -53,6 +53,7 @@ static constexpr int CTZ(T x) { : __builtin_ctzll(x); } +// Return the number of 1-bits in `x`. template<typename T> static constexpr int POPCOUNT(T x) { return (sizeof(T) == sizeof(uint32_t)) diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h index a474ae6361..6da2bef214 100644 --- a/runtime/common_runtime_test.h +++ b/runtime/common_runtime_test.h @@ -174,13 +174,6 @@ class CheckJniAbortCatcher { DISALLOW_COPY_AND_ASSIGN(CheckJniAbortCatcher); }; -// TODO: When read barrier works with the compiler, get rid of this. -#define TEST_DISABLED_FOR_READ_BARRIER() \ - if (kUseReadBarrier) { \ - printf("WARNING: TEST DISABLED FOR READ BARRIER\n"); \ - return; \ - } - #define TEST_DISABLED_FOR_MIPS() \ if (kRuntimeISA == kMips) { \ printf("WARNING: TEST DISABLED FOR MIPS\n"); \ diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h index 3d3f7a1bdb..27865e3dc4 100644 --- a/runtime/entrypoints/quick/quick_entrypoints.h +++ b/runtime/entrypoints/quick/quick_entrypoints.h @@ -31,12 +31,12 @@ namespace art { namespace mirror { class Array; class Class; +template<class MirrorType> class CompressedReference; class Object; -template<class MirrorType> -class CompressedReference; } // namespace mirror class ArtMethod; +template<class MirrorType> class GcRoot; class Thread; // Pointers to functions that are called by quick compiler generated code via thread-local storage. @@ -72,9 +72,14 @@ extern void ReadBarrierJni(mirror::CompressedReference<mirror::Object>* handle_o Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; + // Read barrier entrypoints. -// Compilers for ARM, ARM64, MIPS, MIPS64 can insert a call to this function directly. -// For x86 and x86_64, compilers need a wrapper assembly function, to handle mismatch in ABI. +// +// Compilers for ARM, ARM64, MIPS, MIPS64 can insert a call to these +// functions directly. For x86 and x86-64, compilers need a wrapper +// assembly function, to handle mismatch in ABI. + +// Read barrier entrypoint for heap references. // This is the read barrier slow path for instance and static fields and reference-type arrays. // TODO: Currently the read barrier does not have a fast path for compilers to directly generate. // Ideally the slow path should only take one parameter "ref". @@ -82,6 +87,10 @@ extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref, mirror::Objec uint32_t offset) SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR; +// Read barrier entrypoint for GC roots. +extern "C" mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root) + SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR; + } // namespace art #endif // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_H_ diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index 73d8ae76ae..ee7b986094 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -146,7 +146,8 @@ V(NewStringFromStringBuilder, void) \ \ V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \ - V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) + V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \ + V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*) #endif // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ // #define is only for lint. diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc index 7361d34b6a..7ec5fc50e1 100644 --- a/runtime/entrypoints/quick/quick_field_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc @@ -14,14 +14,16 @@ * limitations under the License. */ +#include <stdint.h> + #include "art_field-inl.h" #include "art_method-inl.h" #include "callee_save_frame.h" #include "dex_file-inl.h" #include "entrypoints/entrypoint_utils-inl.h" +#include "gc_root-inl.h" #include "mirror/class-inl.h" - -#include <stdint.h> +#include "mirror/object_reference.h" namespace art { @@ -560,13 +562,25 @@ extern "C" int artSetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj // TODO: Currently the read barrier does not have a fast path. Ideally the slow path should only // take one parameter "ref", which is given by the fast path. extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED, - mirror::Object* obj, uint32_t offset) { - DCHECK(kUseReadBarrier); + mirror::Object* obj, + uint32_t offset) { + DCHECK(kEmitCompilerReadBarrier); uint8_t* raw_addr = reinterpret_cast<uint8_t*>(obj) + offset; mirror::HeapReference<mirror::Object>* ref_addr = - reinterpret_cast<mirror::HeapReference<mirror::Object>*>(raw_addr); - return ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, true>(obj, MemberOffset(offset), - ref_addr); + reinterpret_cast<mirror::HeapReference<mirror::Object>*>(raw_addr); + constexpr ReadBarrierOption kReadBarrierOption = + kUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; + mirror::Object* result = + ReadBarrier::Barrier<mirror::Object, kReadBarrierOption, true>(obj, + MemberOffset(offset), + ref_addr); + return result; +} + +extern "C" mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root) { + DCHECK(kEmitCompilerReadBarrier); + // TODO: Pass a GcRootSource object as second argument to GcRoot::Read? + return root->Read(); } } // namespace art diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 78f56eef8d..8587edee54 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -302,8 +302,10 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierSlow, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow, + sizeof(void*)); - CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierSlow) + CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierForRootSlow) + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all); } }; diff --git a/runtime/gc_root.h b/runtime/gc_root.h index 477e67b3c2..3734bcc7e1 100644 --- a/runtime/gc_root.h +++ b/runtime/gc_root.h @@ -198,7 +198,7 @@ class GcRoot { ALWAYS_INLINE GcRoot(MirrorType* ref = nullptr) SHARED_REQUIRES(Locks::mutator_lock_); private: - // Root visitors take pointers to root_ and place the min CompressedReference** arrays. We use a + // Root visitors take pointers to root_ and place them in CompressedReference** arrays. We use a // CompressedReference<mirror::Object> here since it violates strict aliasing requirements to // cast CompressedReference<MirrorType>* to CompressedReference<mirror::Object>*. mutable mirror::CompressedReference<mirror::Object> root_; diff --git a/runtime/globals.h b/runtime/globals.h index 987a94ea4b..e7ea6f3788 100644 --- a/runtime/globals.h +++ b/runtime/globals.h @@ -87,8 +87,18 @@ static constexpr bool kUseTableLookupReadBarrier = false; #endif static constexpr bool kUseBakerOrBrooksReadBarrier = kUseBakerReadBarrier || kUseBrooksReadBarrier; -static constexpr bool kUseReadBarrier = kUseBakerReadBarrier || kUseBrooksReadBarrier || - kUseTableLookupReadBarrier; +static constexpr bool kUseReadBarrier = + kUseBakerReadBarrier || kUseBrooksReadBarrier || kUseTableLookupReadBarrier; + +// Debugging flag that forces the generation of read barriers, but +// does not trigger the use of the concurrent copying GC. +// +// TODO: Remove this flag when the read barriers compiler +// instrumentation is completed. +static constexpr bool kForceReadBarrier = false; +// TODO: Likewise, remove this flag when kForceReadBarrier is removed +// and replace it with kUseReadBarrier. +static constexpr bool kEmitCompilerReadBarrier = kForceReadBarrier || kUseReadBarrier; // If true, references within the heap are poisoned (negated). #ifdef USE_HEAP_POISONING diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h index b27a8849ed..7458424bac 100644 --- a/runtime/mirror/array.h +++ b/runtime/mirror/array.h @@ -122,7 +122,7 @@ class MANAGED PrimitiveArray : public Array { T Get(int32_t i) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_); T GetWithoutChecks(int32_t i) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(CheckIsValidIndex(i)); + DCHECK(CheckIsValidIndex(i)) << "i=" << i << " length=" << GetLength(); return GetData()[i]; } diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h index c4339b9230..80e136c2cc 100644 --- a/runtime/mirror/class.h +++ b/runtime/mirror/class.h @@ -1185,7 +1185,7 @@ class MANAGED Class FINAL : public Object { bool ProxyDescriptorEquals(const char* match) SHARED_REQUIRES(Locks::mutator_lock_); - // Check that the pointer size mathces the one in the class linker. + // Check that the pointer size matches the one in the class linker. ALWAYS_INLINE static void CheckPointerSize(size_t pointer_size); static MemberOffset EmbeddedImTableOffset(size_t pointer_size); diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h index fbee2d7bf3..80ebd2cf0f 100644 --- a/runtime/mirror/string.h +++ b/runtime/mirror/string.h @@ -27,6 +27,7 @@ namespace art { template<class T> class Handle; struct StringOffsets; class StringPiece; +class StubTest_ReadBarrierForRoot_Test; namespace mirror { @@ -179,7 +180,7 @@ class MANAGED String FINAL : public Object { static GcRoot<Class> java_lang_String_; friend struct art::StringOffsets; // for verifying offset information - ART_FRIEND_TEST(ObjectTest, StringLength); // for SetOffset and SetCount + ART_FRIEND_TEST(art::StubTest, ReadBarrierForRoot); // For java_lang_String_. DISALLOW_IMPLICIT_CONSTRUCTORS(String); }; diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc index c7c270946b..62347203a9 100644 --- a/runtime/reflection_test.cc +++ b/runtime/reflection_test.cc @@ -507,6 +507,8 @@ class ReflectionTest : public CommonCompilerTest { TEST_F(ReflectionTest, StaticMainMethod) { TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK(); + TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK(); + TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS(); ScopedObjectAccess soa(Thread::Current()); jobject jclass_loader = LoadDex("Main"); StackHandleScope<1> hs(soa.Self()); diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def index 7b5bc1ad9a..098bde4e96 100644 --- a/runtime/runtime_options.def +++ b/runtime/runtime_options.def @@ -80,10 +80,7 @@ RUNTIME_OPTIONS_KEY (std::string, PatchOat) RUNTIME_OPTIONS_KEY (bool, Relocate, kDefaultMustRelocate) RUNTIME_OPTIONS_KEY (bool, Dex2Oat, true) RUNTIME_OPTIONS_KEY (bool, ImageDex2Oat, true) - // kUseReadBarrier currently works with - // the interpreter only. - // TODO: make it work with the compiler. -RUNTIME_OPTIONS_KEY (bool, Interpret, kUseReadBarrier) // -Xint +RUNTIME_OPTIONS_KEY (bool, Interpret, false) // -Xint // Disable the compiler for CC (for now). RUNTIME_OPTIONS_KEY (XGcOption, GcOption) // -Xgc: RUNTIME_OPTIONS_KEY (gc::space::LargeObjectSpaceType, \ diff --git a/runtime/stack.cc b/runtime/stack.cc index d7edfade15..88978bf240 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -865,8 +865,8 @@ static void AssertPcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) CHECK(code_start <= pc && pc <= (code_start + code_size)) << PrettyMethod(method) << " pc=" << std::hex << pc - << " code=" << code - << " size=" << code_size; + << " code_start=" << code_start + << " code_size=" << code_size; } void StackVisitor::SanityCheckFrame() const { diff --git a/runtime/thread.cc b/runtime/thread.cc index 30eb2545f7..63e6326f2f 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -2517,6 +2517,7 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) { QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder) QUICK_ENTRY_POINT_INFO(pReadBarrierJni) QUICK_ENTRY_POINT_INFO(pReadBarrierSlow) + QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow) #undef QUICK_ENTRY_POINT_INFO os << offset; diff --git a/test/537-checker-jump-over-jump/src/Main.java b/test/537-checker-jump-over-jump/src/Main.java index fb666eaaea..cf9a69d28e 100644 --- a/test/537-checker-jump-over-jump/src/Main.java +++ b/test/537-checker-jump-over-jump/src/Main.java @@ -20,13 +20,25 @@ public class Main { public static int[] fibs; /// CHECK-START-X86_64: int Main.test() disassembly (after) - /// CHECK: If - /// CHECK-NEXT: cmp - /// CHECK-NEXT: jnl/ge - /// CHECK-NOT: jmp - /// CHECK: ArrayGet - // Checks that there is no conditional jump over a jmp. The ArrayGet is in - // the next block. + /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 + // + /// CHECK: If + /// CHECK-NEXT: cmp + /// CHECK-NEXT: jnl/ge + // + /// CHECK-DAG: <<Fibs:l\d+>> StaticFieldGet + /// CHECK-DAG: NullCheck [<<Fibs>>] + /// CHECK-NOT: jmp + /// CHECK-DAG: <<FibsAtZero:i\d+>> ArrayGet [<<Fibs>>,<<Zero>>] + /// CHECK-DAG: Return [<<FibsAtZero>>] + // + // Checks that there is no conditional jump over a `jmp` + // instruction. The `ArrayGet` instruction is in the next block. + // + // Note that the `StaticFieldGet` HIR instruction above (captured as + // `Fibs`) can produce a `jmp` x86-64 instruction when read barriers + // are enabled (to jump into the read barrier slow path), which is + // different from the `jmp` in the `CHECK-NOT` assertion. public static int test() { for (int i = 1; ; i++) { if (i >= FIBCOUNT) { diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 60b65911f9..8bd5f9fb75 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -497,14 +497,20 @@ endif TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS := # Tests that should fail in the read barrier configuration. +# 055: Exceeds run time limits due to read barrier instrumentation. # 137: Read barrier forces interpreter. Cannot run this with the interpreter. +# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet +# handled in the read barrier configuration. TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := \ - 137-cfi \ + 055-enum-performance \ + 137-cfi \ + 537-checker-arraycopy ifeq ($(ART_USE_READ_BARRIER),true) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) + ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ + $(PREBUILD_TYPES),$(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \ + $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ + $(TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) endif TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := diff --git a/test/run-test b/test/run-test index b2f6546cb7..10ec3103b9 100755 --- a/test/run-test +++ b/test/run-test @@ -665,6 +665,15 @@ chmod 755 "$check_cmd" export TEST_NAME=`basename ${test_dir}` +# arch_supports_read_barrier ARCH +# ------------------------------- +# Return whether the Optimizing compiler has read barrier support for ARCH. +function arch_supports_read_barrier() { + # Optimizing has read barrier support for x86 and x86-64 at the + # moment. + [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ] +} + # Tests named '<number>-checker-*' will also have their CFGs verified with # Checker when compiled with Optimizing on host. if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then @@ -678,9 +687,24 @@ if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then USE_JACK="false" if [ "$runtime" = "art" -a "$image_suffix" = "-optimizing" ]; then + # Optimizing has read barrier support for certain architectures + # only. On other architectures, compiling is disabled when read + # barriers are enabled, meaning that we do not produce a CFG file + # as a side-effect of compilation, thus the Checker assertions + # cannot be checked. Disable Checker for those cases. + # + # TODO: Enable Checker when read barrier support is added to more + # architectures (b/12687968). + if [ "x$ART_USE_READ_BARRIER" = xtrue ] \ + && (([ "x$host_mode" = "xyes" ] \ + && ! arch_supports_read_barrier "$host_arch_name") \ + || ([ "x$target_mode" = "xyes" ] \ + && ! arch_supports_read_barrier "$target_arch_name")); then + run_checker="no" # In no-prebuild mode, the compiler is only invoked if both dex2oat and # patchoat are available. Disable Checker otherwise (b/22552692). - if [ "$prebuild_mode" = "yes" ] || [ "$have_patchoat" = "yes" -a "$have_dex2oat" = "yes" ]; then + elif [ "$prebuild_mode" = "yes" ] \ + || [ "$have_patchoat" = "yes" -a "$have_dex2oat" = "yes" ]; then run_checker="yes" if [ "$target_mode" = "no" ]; then |