Do not emit stack maps for runtime calls to ReadBarrierMarkRegX.
* Boot image code size variation on Nexus 5X
(aosp_bullhead-userdebug build):
- total ARM64 framework Oat files size change:
115584120 bytes -> 109124728 bytes (-5.59%)
- total ARM framework Oat files size change:
97387728 bytes -> 92517584 (-5.00%)
Test: ART host and target (ARM, ARM64) tests.
Bug: 29506760
Bug: 12687968
Change-Id: I979d9fb2b4e09f4c0c7bf33af2cd91750a67f989
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 9c6dcaa..3269dc6 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1178,19 +1178,19 @@
<< "instruction->DebugName()=" << instruction->DebugName()
<< " slow_path->GetDescription()=" << slow_path->GetDescription();
DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
- // When read barriers are enabled, some instructions use a
- // slow path to emit a read barrier, which does not trigger
- // GC, is not fatal, nor is emitted by HDeoptimize
- // instructions.
+ // When (non-Baker) read barriers are enabled, some instructions
+ // use a slow path to emit a read barrier, which does not trigger
+ // GC.
(kEmitCompilerReadBarrier &&
+ !kUseBakerReadBarrier &&
(instruction->IsInstanceFieldGet() ||
instruction->IsStaticFieldGet() ||
- instruction->IsArraySet() ||
instruction->IsArrayGet() ||
instruction->IsLoadClass() ||
instruction->IsLoadString() ||
instruction->IsInstanceOf() ||
- instruction->IsCheckCast())))
+ instruction->IsCheckCast() ||
+ (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))))
<< "instruction->DebugName()=" << instruction->DebugName()
<< " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString()
<< " slow_path->GetDescription()=" << slow_path->GetDescription();
@@ -1204,6 +1204,27 @@
<< instruction->DebugName() << ((slow_path != nullptr) ? slow_path->GetDescription() : "");
}
+void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath())
+ << "instruction->DebugName()=" << instruction->DebugName()
+ << " slow_path->GetDescription()=" << slow_path->GetDescription();
+ // Only the Baker read barrier marking slow path used by certains
+ // instructions is expected to invoke the runtime without recording
+ // PC-related information.
+ DCHECK(kUseBakerReadBarrier);
+ DCHECK(instruction->IsInstanceFieldGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsArrayGet() ||
+ instruction->IsLoadClass() ||
+ instruction->IsLoadString() ||
+ instruction->IsInstanceOf() ||
+ instruction->IsCheckCast() ||
+ (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))
+ << "instruction->DebugName()=" << instruction->DebugName()
+ << " slow_path->GetDescription()=" << slow_path->GetDescription();
+}
+
void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
RegisterSet* live_registers = locations->GetLiveRegisters();
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 5e6e175..2042ade 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -379,8 +379,14 @@
return type == Primitive::kPrimNot && !value->IsNullConstant();
}
+
+ // Perfoms checks pertaining to an InvokeRuntime call.
void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
+ // Perfoms checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
+ static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
+ SlowPathCode* slow_path);
+
void AddAllocatedRegister(Location location) {
allocated_registers_.Add(location);
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 236ed20..124a61f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -431,8 +431,7 @@
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -440,7 +439,6 @@
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
- InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
DCHECK_NE(reg, SP);
DCHECK_NE(reg, LR);
@@ -462,11 +460,8 @@
//
int32_t entry_point_offset =
CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmWordSize>(reg);
- // TODO: Do not emit a stack map for this runtime call.
- arm_codegen->InvokeRuntime(entry_point_offset,
- instruction_,
- instruction_->GetDexPc(),
- this);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ b(GetExitLabel());
}
@@ -516,8 +511,7 @@
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
@@ -1240,6 +1234,14 @@
RecordPcInfo(instruction, dex_pc, slow_path);
}
+void CodeGeneratorARM::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
+ __ blx(LR);
+}
+
void InstructionCodeGeneratorARM::HandleGoto(HInstruction* got, HBasicBlock* successor) {
DCHECK(!successor->IsExitBlock());
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index ef7913b..05cb8d1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -394,6 +394,12 @@
uint32_t dex_pc,
SlowPathCode* slow_path);
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
// Emit a write barrier.
void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 76b0797..efeef7b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -597,8 +597,7 @@
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -606,7 +605,6 @@
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
- InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
DCHECK_NE(obj_.reg(), LR);
DCHECK_NE(obj_.reg(), WSP);
@@ -628,11 +626,8 @@
//
int32_t entry_point_offset =
CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64WordSize>(obj_.reg());
- // TODO: Do not emit a stack map for this runtime call.
- arm64_codegen->InvokeRuntime(entry_point_offset,
- instruction_,
- instruction_->GetDexPc(),
- this);
+ // This runtime call does not require a stack map.
+ arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ B(GetExitLabel());
}
@@ -682,8 +677,7 @@
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
@@ -1502,6 +1496,15 @@
RecordPcInfo(instruction, dex_pc, slow_path);
}
+void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ BlockPoolsScope block_pools(GetVIXLAssembler());
+ __ Ldr(lr, MemOperand(tr, entry_point_offset));
+ __ Blr(lr);
+}
+
void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
Register class_reg) {
UseScratchRegisterScope temps(GetVIXLAssembler());
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 7201e59..88e8cea 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -499,6 +499,12 @@
uint32_t dex_pc,
SlowPathCode* slow_path);
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; }
bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 82baaa0..528e94f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -466,8 +466,7 @@
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -475,7 +474,6 @@
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
- InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
DCHECK_NE(reg, ESP);
DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
@@ -495,11 +493,8 @@
//
int32_t entry_point_offset =
CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86WordSize>(reg);
- // TODO: Do not emit a stack map for this runtime call.
- x86_codegen->InvokeRuntime(entry_point_offset,
- instruction_,
- instruction_->GetDexPc(),
- this);
+ // This runtime call does not require a stack map.
+ x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ jmp(GetExitLabel());
}
@@ -549,8 +544,7 @@
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
@@ -824,6 +818,13 @@
RecordPcInfo(instruction, dex_pc, slow_path);
}
+void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ __ fs()->call(Address::Absolute(entry_point_offset));
+}
+
CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
const X86InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 2a9fb80..1290172 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -336,6 +336,12 @@
uint32_t dex_pc,
SlowPathCode* slow_path);
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
size_t GetWordSize() const OVERRIDE {
return kX86WordSize;
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index b6ba30e..0f0129b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -487,8 +487,7 @@
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -496,7 +495,6 @@
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
- InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
DCHECK_NE(reg, RSP);
DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
@@ -516,11 +514,8 @@
//
int32_t entry_point_offset =
CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64WordSize>(reg);
- // TODO: Do not emit a stack map for this runtime call.
- x86_64_codegen->InvokeRuntime(entry_point_offset,
- instruction_,
- instruction_->GetDexPc(),
- this);
+ // This runtime call does not require a stack map.
+ x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ jmp(GetExitLabel());
}
@@ -570,8 +565,7 @@
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
- instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
@@ -1052,6 +1046,13 @@
RecordPcInfo(instruction, dex_pc, slow_path);
}
+void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
+}
+
static constexpr int kNumberOfCpuRegisterPairs = 0;
// Use a fake return address register to mimic Quick.
static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d7cfd37..cf92d68 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -318,6 +318,12 @@
uint32_t dex_pc,
SlowPathCode* slow_path);
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
size_t GetWordSize() const OVERRIDE {
return kX86_64WordSize;
}