x86_64: Implement VarHandle.set{,Acquire,Opaque,Volatile} for byte array views.
Benchmarks improvements (using benchmarks provided by
https://android-review.googlesource.com/1420959):
benchmark before after
-----------------------------------------------------
VarHandleSetByteArrayViewInt 2.89 0.004
VarHandleSetByteArrayViewBigEndianInt 2.89 0.004
Bug: 71781600
Test: lunch aosp_cf_x86_64_phone-userdebug \
&& art/test.py --host -r -t 712-varhandle-invocations --64
Test: Repeat with ART_USE_READ_BARRIER=false.
Test: Repeat with ART_HEAP_POISONING=true.
Change-Id: Ife3e0dfd99218399f18aa330b42b9828172f6491
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 0abcbc7..4d617f7 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -3222,8 +3222,10 @@
class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
public:
- explicit VarHandleSlowPathX86_64(HInvoke* invoke)
- : IntrinsicSlowPathX86_64(invoke) {
+ VarHandleSlowPathX86_64(HInvoke* invoke, bool is_atomic, bool is_volatile)
+ : IntrinsicSlowPathX86_64(invoke),
+ is_volatile_(is_volatile),
+ is_atomic_(is_atomic) {
}
Label* GetByteArrayViewCheckLabel() {
@@ -3254,6 +3256,10 @@
Label byte_array_view_check_label_;
Label native_byte_order_label_;
+
+ // Arguments forwarded to specific methods.
+ bool is_volatile_;
+ bool is_atomic_;
};
// Generate subtype check without read barriers.
@@ -3500,9 +3506,11 @@
static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
CodeGeneratorX86_64* codegen,
- DataType::Type type) {
+ DataType::Type type,
+ bool is_volatile = false,
+ bool is_atomic = false) {
VarHandleSlowPathX86_64* slow_path =
- new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
+ new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke, is_volatile, is_atomic);
codegen->AddSlowPath(slow_path);
GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
@@ -3589,11 +3597,13 @@
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
DCHECK_LE(expected_coordinates_count, 2u); // Filtered by the `DoNotIntrinsify` flag above.
if (expected_coordinates_count > 1u) {
- // TODO: Add array support for all intrinsics.
- // TODO: Add support for byte array views.
- if (mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic()) !=
- mirror::VarHandle::AccessModeTemplate::kGet) {
- return false;
+ switch (mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic())) {
+ case mirror::VarHandle::AccessModeTemplate::kGet:
+ case mirror::VarHandle::AccessModeTemplate::kSet:
+ break;
+ default:
+ // TODO: Add support for all intrinsics.
+ return false;
}
}
@@ -3640,7 +3650,7 @@
return;
}
- LocationSummary *locations = CreateVarHandleCommonLocations(invoke);
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
if (DataType::IsFloatingPointType(invoke->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
} else {
@@ -3733,7 +3743,16 @@
}
LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
-
+ if (GetExpectedVarHandleCoordinatesCount(invoke) > 1) {
+ // Ensure that the value is in register: for byte array views we may need to swap byte order
+ // inplace (and then swap it back).
+ uint32_t value_index = invoke->GetNumberOfArguments() - 1;
+ if (DataType::IsFloatingPointType(GetDataTypeFromShorty(invoke, value_index))) {
+ locations->SetInAt(value_index, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(value_index, Location::RequiresRegister());
+ }
+ }
// Extra temporary is used for card in MarkGCCard and to move 64-bit constants to memory.
locations->AddTemp(Location::RequiresRegister());
}
@@ -3741,15 +3760,27 @@
static void GenerateVarHandleSet(HInvoke* invoke,
CodeGeneratorX86_64* codegen,
bool is_volatile,
- bool is_atomic) {
+ bool is_atomic,
+ bool byte_swap = false) {
X86_64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ const uint32_t last_temp_index = locations->GetTempCount() - 1;
+ CpuRegister temp = locations->GetTemp(last_temp_index).AsRegister<CpuRegister>();
+
uint32_t value_index = invoke->GetNumberOfArguments() - 1;
DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
- VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
VarHandleTarget target = GetVarHandleTarget(invoke);
- GenerateVarHandleTarget(invoke, target, codegen);
+ VarHandleSlowPathX86_64* slow_path = nullptr;
+ if (!byte_swap) {
+ slow_path = GenerateVarHandleChecks(invoke, codegen, value_type, is_volatile, is_atomic);
+ GenerateVarHandleTarget(invoke, target, codegen);
+ __ Bind(slow_path->GetNativeByteOrderLabel());
+ } else {
+ // Swap bytes inplace in the input register (later we will restore it).
+ GenReverseBytes(locations->InAt(value_index), value_type, assembler, temp);
+ }
switch (invoke->GetIntrinsic()) {
case Intrinsics::kVarHandleSetRelease:
@@ -3763,7 +3794,6 @@
break;
}
- const uint32_t last_temp_index = invoke->GetLocations()->GetTempCount() - 1;
Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
// Store the value to the field.
@@ -3781,7 +3811,12 @@
// setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
- __ Bind(slow_path->GetExitLabel());
+ if (!byte_swap) {
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // Restore byte order in the input register.
+ GenReverseBytes(locations->InAt(value_index), value_type, assembler, temp);
+ }
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
@@ -4616,11 +4651,27 @@
__ j(kNotZero, GetEntryLabel());
// Byte order check. For native byte order return to the main path.
+ if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
+ IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
+ // There is no reason to differentiate between native byte order and byte-swap
+ // for setting a zero bit pattern. Just return to the main path.
+ __ jmp(GetNativeByteOrderLabel());
+ return;
+ }
__ cmpl(Address(varhandle, native_byte_order_offset.Int32Value()), Immediate(0));
__ j(kNotEqual, GetNativeByteOrderLabel());
- DCHECK(access_mode_template == mirror::VarHandle::AccessModeTemplate::kGet);
- GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
+ switch (access_mode_template) {
+ case mirror::VarHandle::AccessModeTemplate::kGet:
+ GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
+ break;
+ case mirror::VarHandle::AccessModeTemplate::kSet:
+ GenerateVarHandleSet(invoke, codegen, is_volatile_, is_atomic_, /*byte_swap=*/ true);
+ break;
+ default:
+ DCHECK(false);
+ UNREACHABLE();
+ }
__ jmp(GetExitLabel());
}