Clean up VarHandles.set*() for arrays and byte array views.
This CL is a clean up for https://r.android.com/1875684. Changes:
- Use setters instead of constructor arguments.
- Rerun benchmarks and add improvements for arrays to commit message
(as aosp/1875684 forgot to mention changes for arrays).
- Do BSWAP at compile time if the argument is a constant value.
`HandleFieldSet` now can optionally perform byte swap. The function
is rewritten so that constant and non-constant values are handled in
separate branches, which makes both cases simpler.
- Add helper function `CodeGeneratorX86_64::GetInstructionCodegen` to
reduce boilerplate.
Benchmarks improvements (using benchmarks provided by
https://android-review.googlesource.com/1420959):
benchmark before aosp/1875684 now
----------------------------------------------------------------
VarHandleSetArrayElementInt 2.79 0.002
VarHandleSetArrayElementString 3.09 0.003
VarHandleSetByteArrayViewInt 2.89 0.004
VarHandleSetByteArrayViewBigEndianInt 2.89 0.004
Bug: 71781600
Test: lunch aosp_cf_x86_64_phone-userdebug \
&& art/test.py --host -r -t 712-varhandle-invocations --64
Test: Repeat with ART_USE_READ_BARRIER=false.
Test: Repeat with ART_HEAP_POISONING=true.
Change-Id: I8cc37321228ffb2833a1158a75ced65f18af968e
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 6deb035..e601b40 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5117,6 +5117,48 @@
}
}
+void InstructionCodeGeneratorX86_64::Bswap(Location value,
+ DataType::Type type,
+ CpuRegister* temp) {
+ switch (type) {
+ case DataType::Type::kInt16:
+ // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+ __ bswapl(value.AsRegister<CpuRegister>());
+ __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
+ break;
+ case DataType::Type::kUint16:
+ // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+ __ bswapl(value.AsRegister<CpuRegister>());
+ __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kUint32:
+ __ bswapl(value.AsRegister<CpuRegister>());
+ break;
+ case DataType::Type::kInt64:
+ case DataType::Type::kUint64:
+ __ bswapq(value.AsRegister<CpuRegister>());
+ break;
+ case DataType::Type::kFloat32: {
+ DCHECK_NE(temp, nullptr);
+ __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
+ __ bswapl(*temp);
+ __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ false);
+ break;
+ }
+ case DataType::Type::kFloat64: {
+ DCHECK_NE(temp, nullptr);
+ __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
+ __ bswapq(*temp);
+ __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ true);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
+ UNREACHABLE();
+ }
+}
+
void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
uint32_t value_index,
uint32_t extra_temp_index,
@@ -5125,7 +5167,8 @@
CpuRegister base,
bool is_volatile,
bool is_atomic,
- bool value_can_be_null) {
+ bool value_can_be_null,
+ bool byte_swap) {
LocationSummary* locations = instruction->GetLocations();
Location value = locations->InAt(value_index);
@@ -5135,38 +5178,80 @@
bool maybe_record_implicit_null_check_done = false;
- switch (field_type) {
- case DataType::Type::kBool:
- case DataType::Type::kUint8:
- case DataType::Type::kInt8: {
- if (value.IsConstant()) {
+ if (value.IsConstant()) {
+ switch (field_type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
__ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
- } else {
- __ movb(field_addr, value.AsRegister<CpuRegister>());
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16: {
+ int16_t v = CodeGenerator::GetInt16ValueOf(value.GetConstant());
+ if (byte_swap) {
+ v = BSWAP(v);
+ }
+ __ movw(field_addr, Immediate(v));
+ break;
}
- break;
- }
-
- case DataType::Type::kUint16:
- case DataType::Type::kInt16: {
- if (value.IsConstant()) {
- __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
- } else {
- __ movw(field_addr, value.AsRegister<CpuRegister>());
- }
- break;
- }
-
- case DataType::Type::kInt32:
- case DataType::Type::kReference: {
- if (value.IsConstant()) {
+ case DataType::Type::kUint32:
+ case DataType::Type::kInt32:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kReference: {
int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+ if (byte_swap) {
+ v = BSWAP(v);
+ }
// `field_type == DataType::Type::kReference` implies `v == 0`.
DCHECK((field_type != DataType::Type::kReference) || (v == 0));
// Note: if heap poisoning is enabled, no need to poison
// (negate) `v` if it is a reference, as it would be null.
__ movl(field_addr, Immediate(v));
- } else {
+ break;
+ }
+ case DataType::Type::kUint64:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat64: {
+ int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+ if (byte_swap) {
+ v = BSWAP(v);
+ }
+ if (is_atomic) {
+ // Move constant into a register, then atomically store the register to memory.
+ CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
+ __ movq(temp, Immediate(v));
+ __ movq(field_addr, temp);
+ } else {
+ Address field_addr2 = Address::displace(field_addr, sizeof(int32_t));
+ codegen_->MoveInt64ToAddress(field_addr, field_addr2, v, instruction);
+ }
+ maybe_record_implicit_null_check_done = true;
+ break;
+ }
+ case DataType::Type::kVoid:
+ LOG(FATAL) << "Unreachable type " << field_type;
+ UNREACHABLE();
+ }
+ } else {
+ if (byte_swap) {
+ // Swap byte order in-place in the input register (we will restore it later).
+ CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
+ Bswap(value, field_type, &temp);
+ }
+
+ switch (field_type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ __ movb(field_addr, value.AsRegister<CpuRegister>());
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ __ movw(field_addr, value.AsRegister<CpuRegister>());
+ break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kInt32:
+ case DataType::Type::kReference:
if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
__ movl(temp, value.AsRegister<CpuRegister>());
@@ -5175,67 +5260,27 @@
} else {
__ movl(field_addr, value.AsRegister<CpuRegister>());
}
- }
- break;
- }
-
- case DataType::Type::kInt64: {
- if (value.IsConstant()) {
- int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
- if (is_atomic) {
- // Move constant into a register, then atomically store the register to memory.
- CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
- __ movq(temp, Immediate(v));
- __ movq(field_addr, temp);
- } else {
- codegen_->MoveInt64ToAddress(field_addr,
- Address::displace(field_addr, sizeof(int32_t)),
- v,
- instruction);
- }
- maybe_record_implicit_null_check_done = true;
- } else {
+ break;
+ case DataType::Type::kUint64:
+ case DataType::Type::kInt64:
__ movq(field_addr, value.AsRegister<CpuRegister>());
- }
- break;
- }
-
- case DataType::Type::kFloat32: {
- if (value.IsConstant()) {
- int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
- __ movl(field_addr, Immediate(v));
- } else {
+ break;
+ case DataType::Type::kFloat32:
__ movss(field_addr, value.AsFpuRegister<XmmRegister>());
- }
- break;
- }
-
- case DataType::Type::kFloat64: {
- if (value.IsConstant()) {
- int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
- if (is_atomic) {
- // Move constant into a register, then atomically store the register to memory.
- CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
- __ movq(temp, Immediate(v));
- __ movq(field_addr, temp);
- } else {
- codegen_->MoveInt64ToAddress(field_addr,
- Address::displace(field_addr, sizeof(int32_t)),
- v,
- instruction);
- }
- maybe_record_implicit_null_check_done = true;
- } else {
+ break;
+ case DataType::Type::kFloat64:
__ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
- }
- break;
+ break;
+ case DataType::Type::kVoid:
+ LOG(FATAL) << "Unreachable type " << field_type;
+ UNREACHABLE();
}
- case DataType::Type::kUint32:
- case DataType::Type::kUint64:
- case DataType::Type::kVoid:
- LOG(FATAL) << "Unreachable type " << field_type;
- UNREACHABLE();
+ if (byte_swap) {
+ // Restore byte order.
+ CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
+ Bswap(value, field_type, &temp);
+ }
}
if (!maybe_record_implicit_null_check_done) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1115c83..a130994 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -249,7 +249,10 @@
CpuRegister base,
bool is_volatile,
bool is_atomic,
- bool value_can_be_null);
+ bool value_can_be_null,
+ bool byte_swap = false);
+
+ void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr);
private:
// Generate code for the given suspend check. If not null, `successor`
@@ -421,6 +424,10 @@
return InstructionSet::kX86_64;
}
+ InstructionCodeGeneratorX86_64* GetInstructionCodegen() {
+ return down_cast<InstructionCodeGeneratorX86_64*>(GetInstructionVisitor());
+ }
+
const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const;
// Emit a write barrier.
@@ -650,7 +657,6 @@
void GenerateExplicitNullCheck(HNullCheck* instruction) override;
void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls);
-
void MaybeIncrementHotness(bool is_frame_entry);
static void BlockNonVolatileXmmRegisters(LocationSummary* locations);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index c536492..be3af2d 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -184,53 +184,12 @@
locations->SetOut(Location::SameAsFirstInput());
}
-static void GenReverseBytes(Location out,
- DataType::Type type,
- X86_64Assembler* assembler,
- CpuRegister temp = CpuRegister(kNoRegister)) {
- switch (type) {
- case DataType::Type::kInt16:
- // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
- __ bswapl(out.AsRegister<CpuRegister>());
- __ sarl(out.AsRegister<CpuRegister>(), Immediate(16));
- break;
- case DataType::Type::kUint16:
- // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
- __ bswapl(out.AsRegister<CpuRegister>());
- __ shrl(out.AsRegister<CpuRegister>(), Immediate(16));
- break;
- case DataType::Type::kInt32:
- case DataType::Type::kUint32:
- __ bswapl(out.AsRegister<CpuRegister>());
- break;
- case DataType::Type::kInt64:
- case DataType::Type::kUint64:
- __ bswapq(out.AsRegister<CpuRegister>());
- break;
- case DataType::Type::kFloat32:
- DCHECK_NE(temp.AsRegister(), kNoRegister);
- __ movd(temp, out.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
- __ bswapl(temp);
- __ movd(out.AsFpuRegister<XmmRegister>(), temp, /*is64bit=*/ false);
- break;
- case DataType::Type::kFloat64:
- DCHECK_NE(temp.AsRegister(), kNoRegister);
- __ movd(temp, out.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
- __ bswapq(temp);
- __ movd(out.AsFpuRegister<XmmRegister>(), temp, /*is64bit=*/ true);
- break;
- default:
- LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
- UNREACHABLE();
- }
-}
-
void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
CreateIntToIntLocations(allocator_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
- GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt32, GetAssembler());
+ codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt32);
}
void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
@@ -238,7 +197,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
- GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt64, GetAssembler());
+ codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt64);
}
void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
@@ -246,7 +205,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
- GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt16, GetAssembler());
+ codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt16);
}
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -3233,10 +3192,16 @@
class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
public:
- VarHandleSlowPathX86_64(HInvoke* invoke, bool is_atomic, bool is_volatile)
- : IntrinsicSlowPathX86_64(invoke),
- is_volatile_(is_volatile),
- is_atomic_(is_atomic) {
+ explicit VarHandleSlowPathX86_64(HInvoke* invoke)
+ : IntrinsicSlowPathX86_64(invoke) {
+ }
+
+ void SetVolatile(bool is_volatile) {
+ is_volatile_ = is_volatile;
+ }
+
+ void SetAtomic(bool is_atomic) {
+ is_atomic_ = is_atomic;
}
Label* GetByteArrayViewCheckLabel() {
@@ -3555,11 +3520,9 @@
static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
CodeGeneratorX86_64* codegen,
- DataType::Type type,
- bool is_volatile = false,
- bool is_atomic = false) {
+ DataType::Type type) {
VarHandleSlowPathX86_64* slow_path =
- new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke, is_volatile, is_atomic);
+ new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
codegen->AddSlowPath(slow_path);
GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
@@ -3610,8 +3573,7 @@
__ movq(method, Address(varhandle, art_field_offset));
__ movl(CpuRegister(target.offset), Address(method, offset_offset));
if (expected_coordinates_count == 0u) {
- InstructionCodeGeneratorX86_64* instr_codegen =
- down_cast<InstructionCodeGeneratorX86_64*>(codegen->GetInstructionVisitor());
+ InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
instr_codegen->GenerateGcRootFieldLoad(invoke,
Location::RegisterLocation(target.object),
Address(method, ArtField::DeclaringClassOffset()),
@@ -3742,7 +3704,7 @@
codegen->LoadFromMemoryNoReference(type, out, src);
if (byte_swap) {
CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- GenReverseBytes(out, type, assembler, temp);
+ codegen->GetInstructionCodegen()->Bswap(out, type, &temp);
}
}
@@ -3792,16 +3754,7 @@
}
LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
- if (GetExpectedVarHandleCoordinatesCount(invoke) > 1) {
- // Ensure that the value is in register: for byte array views we may need to swap byte order
- // inplace (and then swap it back).
- uint32_t value_index = invoke->GetNumberOfArguments() - 1;
- if (DataType::IsFloatingPointType(GetDataTypeFromShorty(invoke, value_index))) {
- locations->SetInAt(value_index, Location::RequiresFpuRegister());
- } else {
- locations->SetInAt(value_index, Location::RequiresRegister());
- }
- }
+
// Extra temporary is used for card in MarkGCCard and to move 64-bit constants to memory.
locations->AddTemp(Location::RequiresRegister());
}
@@ -3815,7 +3768,6 @@
LocationSummary* locations = invoke->GetLocations();
const uint32_t last_temp_index = locations->GetTempCount() - 1;
- CpuRegister temp = locations->GetTemp(last_temp_index).AsRegister<CpuRegister>();
uint32_t value_index = invoke->GetNumberOfArguments() - 1;
DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
@@ -3823,12 +3775,11 @@
VarHandleTarget target = GetVarHandleTarget(invoke);
VarHandleSlowPathX86_64* slow_path = nullptr;
if (!byte_swap) {
- slow_path = GenerateVarHandleChecks(invoke, codegen, value_type, is_volatile, is_atomic);
+ slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
+ slow_path->SetVolatile(is_volatile);
+ slow_path->SetAtomic(is_atomic);
GenerateVarHandleTarget(invoke, target, codegen);
__ Bind(slow_path->GetNativeByteOrderLabel());
- } else {
- // Swap bytes inplace in the input register (later we will restore it).
- GenReverseBytes(locations->InAt(value_index), value_type, assembler, temp);
}
switch (invoke->GetIntrinsic()) {
@@ -3846,25 +3797,21 @@
Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
// Store the value to the field.
- InstructionCodeGeneratorX86_64* instr_codegen =
- down_cast<InstructionCodeGeneratorX86_64*>(codegen->GetInstructionVisitor());
- instr_codegen->HandleFieldSet(invoke,
- value_index,
- last_temp_index,
- value_type,
- dst,
- CpuRegister(target.object),
- is_volatile,
- is_atomic,
- /*value_can_be_null=*/ true);
+ codegen->GetInstructionCodegen()->HandleFieldSet(invoke,
+ value_index,
+ last_temp_index,
+ value_type,
+ dst,
+ CpuRegister(target.object),
+ is_volatile,
+ is_atomic,
+ /*value_can_be_null=*/ true,
+ byte_swap);
// setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
if (!byte_swap) {
__ Bind(slow_path->GetExitLabel());
- } else {
- // Restore byte order in the input register.
- GenReverseBytes(locations->InAt(value_index), value_type, assembler, temp);
}
}