Add support for Baker read barriers in SystemArrayCopy intrinsics.
Benchmarks (ARM64) score variations on Nexus 5X with CPU
cores clamped at 960000 Hz (aosp_bullhead-userdebug build):
- Ritzperf - average (lower is better): -3.03% (slightly better)
- CaffeineMark - average (higher is better): +1.26% (slightly better)
- DeltaBlue (lower is better): -10.50% (better)
- Richards - average (lower is better): -3.36% (slightly better)
- SciMark2 - average (higher is better): +0.26% (virtually unchanged)
Details about Ritzperf benchmarks with meaningful variations
(lower is better):
- FormulaEvaluationActions.EvaluateAndApplyChanges: -13.26% (better)
- FormulaEvaluationActions.EvaluateCascadingSums: -10.94% (better)
- FormulaEvaluationActions.EvaluateComplexFormulas: -15.50% (better)
- FormulaEvaluationActions.EvaluateFibonacci: -10.41% (better)
- FormulaEvaluationActions.EvaluateLargeSums: +6.02% (worse)
Boot image code size variation on Nexus 5X
(aosp_bullhead-userdebug build):
- total ARM64 framework Oat files size change:
107047632 bytes -> 107154128 bytes (+0.10%)
- total ARM framework Oat files size change:
90932028 bytes -> 91009852 bytes (+0.09%)
Test: ART host and target (ARM, ARM64) tests + Nexus 5X boot.
Bug: 29516905
Bug: 29506760
Bug: 12687968
Change-Id: I85431368d09965687a0301ae2eb3c991f276ce5d
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 7a561bb..87853a6 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -464,7 +464,8 @@
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -1578,15 +1579,15 @@
locations->SetOut(Location::SameAsFirstInput());
}
-void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
+void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
Register lhs_reg = lhs.AsRegister<Register>();
if (rhs.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- codegen_->Compare32BitValue(lhs_reg, value);
+ Compare32BitValue(lhs_reg, value);
} else if (rhs.IsStackSlot()) {
- __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
+ assembler_.cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
} else {
- __ cmpl(lhs_reg, rhs.AsRegister<Register>());
+ assembler_.cmpl(lhs_reg, rhs.AsRegister<Register>());
}
}
@@ -1619,7 +1620,7 @@
DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
LocationSummary* cond_locations = condition->GetLocations();
- GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
+ codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
cond = X86Condition(condition->GetCondition());
}
} else {
@@ -1728,7 +1729,7 @@
// Clear output register: setb only sets the low byte.
__ xorl(reg, reg);
- GenerateIntCompare(lhs, rhs);
+ codegen_->GenerateIntCompare(lhs, rhs);
__ setb(X86Condition(cond->GetCondition()), reg);
return;
}
@@ -4210,7 +4211,7 @@
case Primitive::kPrimShort:
case Primitive::kPrimChar:
case Primitive::kPrimInt: {
- GenerateIntCompare(left, right);
+ codegen_->GenerateIntCompare(left, right);
break;
}
case Primitive::kPrimLong: {