diff options
| author | 2016-11-11 00:58:22 +0000 | |
|---|---|---|
| committer | 2016-11-11 00:58:23 +0000 | |
| commit | 1f01a46fdfc5fdda9268cf5d1ce48d6baf8d619d (patch) | |
| tree | 69c08594e2f3f8ef3f3496fd839c407b598d564a /compiler/optimizing | |
| parent | 48dfb4a7f113b3ada0c7e55f0503e692180decc5 (diff) | |
| parent | fe814e89965ddf9a8b603863bd28259f8dd7be35 (diff) | |
Merge "Use entrypoint switching to reduce code size of GcRoot read barrier"
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 46 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 54 |
2 files changed, 71 insertions, 29 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 32642e1868..7c72d00389 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -620,8 +620,10 @@ class ArraySetSlowPathARM : public SlowPathCodeARM { // reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { public: - ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location ref) - : SlowPathCodeARM(instruction), ref_(ref) { + ReadBarrierMarkSlowPathARM(HInstruction* instruction, + Location ref, + Location entrypoint = Location::NoLocation()) + : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) { DCHECK(kEmitCompilerReadBarrier); } @@ -676,10 +678,15 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { // // rX <- ReadBarrierMarkRegX(rX) // - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); - // This runtime call does not require a stack map. - arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + if (entrypoint_.IsValid()) { + arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + __ blx(entrypoint_.AsRegister<Register>()); + } else { + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + } __ b(GetExitLabel()); } @@ -687,6 +694,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { // The location (register) of the marked object reference. const Location ref_; + // The location of the entrypoint if already loaded. + const Location entrypoint_; + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); }; @@ -6836,8 +6846,9 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct // Baker's read barrier are used: // // root = obj.field; - // if (Thread::Current()->GetIsGcMarking()) { - // root = ReadBarrier::Mark(root) + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) // } // /* GcRoot<mirror::Object> */ root = *(obj + offset) @@ -6851,14 +6862,23 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct "have different sizes."); // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(LR); SlowPathCodeARM* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root); + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( + instruction, + root, + /*entrypoint*/ temp); codegen_->AddSlowPath(slow_path); - // IP = Thread::Current()->GetIsGcMarking() - __ LoadFromOffset( - kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()); - __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel()); + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } else { // GC root loaded through a slow path for read barriers other diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index ef4e511dad..35b16051e5 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -607,10 +607,16 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { // probably still be a from-space reference (unless it gets updated by // another thread, or if another thread installed another object // reference (different from `ref`) in `obj.field`). +// If entrypoint is a valid location it is assumed to already be holding the entrypoint. The case +// where the entrypoint is passed in is for the GcRoot read barrier. class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { public: - ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location ref) - : SlowPathCodeARM64(instruction), ref_(ref) { + ReadBarrierMarkSlowPathARM64(HInstruction* instruction, + Location ref, + Location entrypoint = Location::NoLocation()) + : SlowPathCodeARM64(instruction), + ref_(ref), + entrypoint_(entrypoint) { DCHECK(kEmitCompilerReadBarrier); } @@ -665,10 +671,16 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { // // rX <- ReadBarrierMarkRegX(rX) // - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); - // This runtime call does not require a stack map. - arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + if (entrypoint_.IsValid()) { + arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + __ Blr(XRegisterFrom(entrypoint_)); + } else { + // Entrypoint is not already loaded, load from the thread. + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); + // This runtime call does not require a stack map. + arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + } __ B(GetExitLabel()); } @@ -676,6 +688,9 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { // The location (register) of the marked object reference. const Location ref_; + // The location of the entrypoint if it is already loaded. + const Location entrypoint_; + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); }; @@ -5379,8 +5394,9 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( // Baker's read barrier are used: // // root = obj.field; - // if (Thread::Current()->GetIsGcMarking()) { - // root = ReadBarrier::Mark(root) + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) // } // /* GcRoot<mirror::Object> */ root = *(obj + offset) @@ -5397,16 +5413,22 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path marking the GC root `root`. + Register temp = lr; + + // Slow path marking the GC root `root`. The entrypoint will alrady be loaded in temp. SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root); + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, + root, + LocationFrom(temp)); codegen_->AddSlowPath(slow_path); - - MacroAssembler* masm = GetVIXLAssembler(); - UseScratchRegisterScope temps(masm); - Register temp = temps.AcquireW(); - // temp = Thread::Current()->GetIsGcMarking() - __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value())); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg()); + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Ldr(temp, MemOperand(tr, entry_point_offset)); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. __ Cbnz(temp, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } else { |