arm: Implement VarHandle intrinsics for byte array views.
Using benchmarks provided by
https://android-review.googlesource.com/1420959
on blueline little cores with fixed frequency 1420800:
before after
GetByteArrayViewInt 28.989 0.028
SetByteArrayViewInt 30.114 0.028
GetByteArrayViewBigEndianInt 28.974 0.030
SetByteArrayViewBigEndianInt 30.130 0.030
Test: testrunner.py --target --32 --optimizing
Bug: 71781600
Change-Id: I40c1c2fdd829f17872c457994010f78549c895de
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index a9cf262..d7d09af 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -22,6 +22,7 @@
#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
+#include "class_root-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
#include "common_arm.h"
@@ -9449,20 +9450,40 @@
}
}
+void CodeGeneratorARMVIXL::LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,
+ TypeReference target_type) {
+ // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ DCHECK(GetCompilerOptions().IsBootImage());
+ PcRelativePatchInfo* labels =
+ NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
+ EmitMovwMovtPlaceholder(labels, reg);
+}
+
void CodeGeneratorARMVIXL::LoadIntrinsicDeclaringClass(vixl32::Register reg, HInvoke* invoke) {
DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
if (GetCompilerOptions().IsBootImage()) {
- // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
MethodReference target_method = invoke->GetResolvedMethodReference();
dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
- PcRelativePatchInfo* labels = NewBootImageTypePatch(*target_method.dex_file, type_idx);
- EmitMovwMovtPlaceholder(labels, reg);
+ LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
} else {
uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
LoadBootImageAddress(reg, boot_image_offset);
}
}
+void CodeGeneratorARMVIXL::LoadClassRootForIntrinsic(vixl::aarch32::Register reg,
+ ClassRoot class_root) {
+ if (GetCompilerOptions().IsBootImage()) {
+ ScopedObjectAccess soa(Thread::Current());
+ ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
+ TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
+ LoadTypeForBootImageIntrinsic(reg, target_type);
+ } else {
+ uint32_t boot_image_offset = GetBootImageOffset(class_root);
+ LoadBootImageAddress(reg, boot_image_offset);
+ }
+}
+
template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
const ArenaDeque<PcRelativePatchInfo>& infos,
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index a310545..b797c30 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_
#include "base/enums.h"
+#include "class_root.h"
#include "code_generator.h"
#include "common_arm.h"
#include "dex/string_reference.h"
@@ -638,7 +639,9 @@
Handle<mirror::Class> handle);
void LoadBootImageAddress(vixl::aarch32::Register reg, uint32_t boot_image_reference);
+ void LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg, TypeReference type_reference);
void LoadIntrinsicDeclaringClass(vixl::aarch32::Register reg, HInvoke* invoke);
+ void LoadClassRootForIntrinsic(vixl::aarch32::Register reg, ClassRoot class_root);
void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
bool NeedsThunkCode(const linker::LinkerPatch& patch) const override;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 64d49aa..252865f 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -4065,7 +4065,7 @@
// Check that the coordinateType0 is an array type. We do not need a read barrier
// for loading constant reference fields (or chains of them) for comparison with null,
- // or for finally loading a constant primitive field (primitive type) below.
+ // nor for finally loading a constant primitive field (primitive type) below.
__ Ldr(temp2, HeapOperand(temp, component_type_offset.Int32Value()));
codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
__ Cbz(temp2, slow_path->GetEntryLabel());
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index df214cc..3766f35 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -447,7 +447,7 @@
__ Vcmp(F32, temp1, temp2);
__ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
{
- // Use ExactAsemblyScope here because we are using IT.
+ // Use ExactAssemblyScope here because we are using IT.
ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
2 * kMaxInstructionSizeInBytes,
CodeBufferCheckScope::kMaximumSize);
@@ -698,9 +698,9 @@
}
static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
- HInvoke* invoke,
- vixl32::Label* end,
- vixl32::Label* different_compression) {
+ HInvoke* invoke,
+ vixl32::Label* end,
+ vixl32::Label* different_compression) {
LocationSummary* locations = invoke->GetLocations();
const vixl32::Register str = InputRegisterAt(invoke, 0);
@@ -1993,13 +1993,53 @@
__ Rbit(out_reg_hi, in_reg_lo);
}
+static void GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler* assembler, Location pair) {
+ DCHECK(pair.IsRegisterPair());
+ __ Rev(LowRegisterFrom(pair), LowRegisterFrom(pair));
+ __ Rev(HighRegisterFrom(pair), HighRegisterFrom(pair));
+}
+
+static void GenerateReverseBytes(ArmVIXLAssembler* assembler,
+ DataType::Type type,
+ Location in,
+ Location out) {
+ switch (type) {
+ case DataType::Type::kUint16:
+ __ Rev16(RegisterFrom(out), RegisterFrom(in));
+ break;
+ case DataType::Type::kInt16:
+ __ Revsh(RegisterFrom(out), RegisterFrom(in));
+ break;
+ case DataType::Type::kInt32:
+ __ Rev(RegisterFrom(out), RegisterFrom(in));
+ break;
+ case DataType::Type::kInt64:
+ DCHECK(!LowRegisterFrom(out).Is(LowRegisterFrom(in)));
+ __ Rev(LowRegisterFrom(out), HighRegisterFrom(in));
+ __ Rev(HighRegisterFrom(out), LowRegisterFrom(in));
+ break;
+ case DataType::Type::kFloat32:
+ __ Rev(RegisterFrom(in), RegisterFrom(in)); // Note: Clobbers `in`.
+ __ Vmov(SRegisterFrom(out), RegisterFrom(in));
+ break;
+ case DataType::Type::kFloat64:
+ GenerateReverseBytesInPlaceForEachWord(assembler, in); // Note: Clobbers `in`.
+ __ Vmov(DRegisterFrom(out), HighRegisterFrom(in), LowRegisterFrom(in)); // Swap high/low.
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
+ UNREACHABLE();
+ }
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
CreateIntToIntLocations(allocator_, invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
ArmVIXLAssembler* assembler = GetAssembler();
- __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
+ LocationSummary* locations = invoke->GetLocations();
+ GenerateReverseBytes(assembler, DataType::Type::kInt32, locations->InAt(0), locations->Out());
}
void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
@@ -2009,14 +2049,7 @@
void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
ArmVIXLAssembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
-
- vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
- vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
- vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
- vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
-
- __ Rev(out_reg_lo, in_reg_hi);
- __ Rev(out_reg_hi, in_reg_lo);
+ GenerateReverseBytes(assembler, DataType::Type::kInt64, locations->InAt(0), locations->Out());
}
void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
@@ -2025,7 +2058,8 @@
void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
ArmVIXLAssembler* assembler = GetAssembler();
- __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
+ LocationSummary* locations = invoke->GetLocations();
+ GenerateReverseBytes(assembler, DataType::Type::kInt16, locations->InAt(0), locations->Out());
}
static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
@@ -2818,7 +2852,6 @@
if (release_barrier) {
codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
}
- MemOperand address(base, offset);
UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
if (kPoisonHeapReferences && type == DataType::Type::kReference) {
vixl32::Register temp = temps.Acquire();
@@ -2826,6 +2859,12 @@
assembler->PoisonHeapReference(temp);
value = LocationFrom(temp);
}
+ MemOperand address = offset.IsValid() ? MemOperand(base, offset) : MemOperand(base);
+ if (offset.IsValid() && (DataType::Is64BitType(type) || type == DataType::Type::kFloat32)) {
+ const vixl32::Register temp_reg = temps.Acquire();
+ __ Add(temp_reg, base, offset);
+ address = MemOperand(temp_reg);
+ }
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kInt8:
@@ -2841,45 +2880,37 @@
break;
case DataType::Type::kInt64:
if (Use64BitExclusiveLoadStore(atomic, codegen)) {
- const vixl32::Register temp_reg = temps.Acquire();
- __ Add(temp_reg, base, offset);
vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
vixl32::Label loop;
__ Bind(&loop);
- __ Ldrexd(lo_tmp, hi_tmp, MemOperand(temp_reg)); // Ignore the retrieved value.
- __ Strexd(lo_tmp, LowRegisterFrom(value), HighRegisterFrom(value), MemOperand(temp_reg));
+ __ Ldrexd(lo_tmp, hi_tmp, address); // Ignore the retrieved value.
+ __ Strexd(lo_tmp, LowRegisterFrom(value), HighRegisterFrom(value), address);
__ Cmp(lo_tmp, 0);
__ B(ne, &loop);
} else {
__ Strd(LowRegisterFrom(value), HighRegisterFrom(value), address);
}
break;
- case DataType::Type::kFloat32: {
- const vixl32::Register temp_reg = temps.Acquire();
- __ Add(temp_reg, base, offset);
- __ Vldr(SRegisterFrom(value), MemOperand(temp_reg));
+ case DataType::Type::kFloat32:
+ __ Vstr(SRegisterFrom(value), address);
break;
- }
- case DataType::Type::kFloat64: {
- const vixl32::Register temp_reg = temps.Acquire();
- __ Add(temp_reg, base, offset);
+ case DataType::Type::kFloat64:
if (Use64BitExclusiveLoadStore(atomic, codegen)) {
vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
vixl32::Label loop;
__ Bind(&loop);
- __ Ldrexd(lo_tmp, hi_tmp, MemOperand(temp_reg)); // Ignore the retrieved value.
+ __ Ldrexd(lo_tmp, hi_tmp, address); // Ignore the retrieved value.
__ Vmov(lo_tmp, hi_tmp, DRegisterFrom(value));
- __ Strexd(strexd_tmp, lo_tmp, hi_tmp, MemOperand(temp_reg));
+ __ Strexd(strexd_tmp, lo_tmp, hi_tmp, address);
__ Cmp(strexd_tmp, 0);
__ B(ne, &loop);
} else {
- __ Vstr(DRegisterFrom(value), MemOperand(temp_reg));
+ __ Vstr(DRegisterFrom(value), address);
}
break;
- }
default:
LOG(FATAL) << "Unexpected type " << type;
UNREACHABLE();
@@ -3061,25 +3092,23 @@
static void EmitLoadExclusive(CodeGeneratorARMVIXL* codegen,
DataType::Type type,
vixl32::Register ptr,
- vixl32::Register old_value,
- vixl32::Register old_value_high) {
- DCHECK_EQ(type == DataType::Type::kInt64, old_value_high.IsValid());
+ Location old_value) {
ArmVIXLAssembler* assembler = codegen->GetAssembler();
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kInt8:
- __ Ldrexb(old_value, MemOperand(ptr));
+ __ Ldrexb(RegisterFrom(old_value), MemOperand(ptr));
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- __ Ldrexh(old_value, MemOperand(ptr));
+ __ Ldrexh(RegisterFrom(old_value), MemOperand(ptr));
break;
case DataType::Type::kInt32:
case DataType::Type::kReference:
- __ Ldrex(old_value, MemOperand(ptr));
+ __ Ldrex(RegisterFrom(old_value), MemOperand(ptr));
break;
case DataType::Type::kInt64:
- __ Ldrexd(old_value, old_value_high, MemOperand(ptr));
+ __ Ldrexd(LowRegisterFrom(old_value), HighRegisterFrom(old_value), MemOperand(ptr));
break;
default:
LOG(FATAL) << "Unexpected type: " << type;
@@ -3087,13 +3116,13 @@
}
switch (type) {
case DataType::Type::kInt8:
- __ Sxtb(old_value, old_value);
+ __ Sxtb(RegisterFrom(old_value), RegisterFrom(old_value));
break;
case DataType::Type::kInt16:
- __ Sxth(old_value, old_value);
+ __ Sxth(RegisterFrom(old_value), RegisterFrom(old_value));
break;
case DataType::Type::kReference:
- assembler->MaybeUnpoisonHeapReference(old_value);
+ assembler->MaybeUnpoisonHeapReference(RegisterFrom(old_value));
break;
default:
break;
@@ -3104,35 +3133,34 @@
DataType::Type type,
vixl32::Register ptr,
vixl32::Register store_result,
- vixl32::Register new_value,
- vixl32::Register new_value_high) {
- DCHECK_EQ(type == DataType::Type::kInt64, new_value_high.IsValid());
+ Location new_value) {
ArmVIXLAssembler* assembler = codegen->GetAssembler();
if (type == DataType::Type::kReference) {
- assembler->MaybePoisonHeapReference(new_value);
+ assembler->MaybePoisonHeapReference(RegisterFrom(new_value));
}
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kInt8:
- __ Strexb(store_result, new_value, MemOperand(ptr));
+ __ Strexb(store_result, RegisterFrom(new_value), MemOperand(ptr));
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- __ Strexh(store_result, new_value, MemOperand(ptr));
+ __ Strexh(store_result, RegisterFrom(new_value), MemOperand(ptr));
break;
case DataType::Type::kInt32:
case DataType::Type::kReference:
- __ Strex(store_result, new_value, MemOperand(ptr));
+ __ Strex(store_result, RegisterFrom(new_value), MemOperand(ptr));
break;
case DataType::Type::kInt64:
- __ Strexd(store_result, new_value, new_value_high, MemOperand(ptr));
+ __ Strexd(
+ store_result, LowRegisterFrom(new_value), HighRegisterFrom(new_value), MemOperand(ptr));
break;
default:
LOG(FATAL) << "Unexpected type: " << type;
UNREACHABLE();
}
if (type == DataType::Type::kReference) {
- assembler->MaybeUnpoisonHeapReference(new_value);
+ assembler->MaybeUnpoisonHeapReference(RegisterFrom(new_value));
}
}
@@ -3142,37 +3170,34 @@
vixl32::Label* cmp_failure,
bool cmp_failure_is_far_target,
vixl32::Register ptr,
- vixl32::Register new_value,
- vixl32::Register new_value_high,
- vixl32::Register old_value,
- vixl32::Register old_value_high,
+ Location expected,
+ Location new_value,
+ Location old_value,
vixl32::Register store_result,
- vixl32::Register success,
- vixl32::Register expected,
- vixl32::Register expected2) {
- // For Int64, the `expected2` is the high word of the expected value.
- // Otherwise, it is valid only for reference slow path and represents the unmarked old value
- // from the main path attempt to emit CAS when the marked old value matched `expected`.
+ vixl32::Register success) {
+ // For kReference, the `expected` shall be a register pair when called from a read barrier
+ // slow path, specifying both the original `expected` as well as the unmarked old value from
+ // the main path attempt to emit CAS when it matched `expected` after marking.
+ // Otherwise the type of `expected` shall match the type of `new_value` and `old_value`.
if (type == DataType::Type::kInt64) {
- DCHECK(expected2.IsValid());
+ DCHECK(expected.IsRegisterPair());
+ DCHECK(new_value.IsRegisterPair());
+ DCHECK(old_value.IsRegisterPair());
} else {
- DCHECK(type == DataType::Type::kReference || !expected2.IsValid());
+ DCHECK(expected.IsRegister() ||
+ (type == DataType::Type::kReference && expected.IsRegisterPair()));
+ DCHECK(new_value.IsRegister());
+ DCHECK(old_value.IsRegister());
}
- DCHECK_EQ(new_value_high.IsValid(), type == DataType::Type::kInt64);
- DCHECK_EQ(old_value_high.IsValid(), type == DataType::Type::kInt64);
-
ArmVIXLAssembler* assembler = codegen->GetAssembler();
// do {
// old_value = [ptr]; // Load exclusive.
- // if (old_value != expected && old_value != expected2) goto cmp_failure;
+ // if (old_value != expected) goto cmp_failure;
// store_result = failed([ptr] <- new_value); // Store exclusive.
// } while (strong && store_result);
//
- // (The `old_value != expected2` part is emitted only when `expected2` is a valid register
- // for references. For Int64, the `expected2` is used as the high word of `expected`.)
- //
// If `success` is a valid register, there are additional instructions in the above code
// to report success with value 1 and failure with value 0 in that register.
@@ -3180,34 +3205,39 @@
if (strong) {
__ Bind(&loop_head);
}
- EmitLoadExclusive(codegen, type, ptr, old_value, old_value_high);
+ EmitLoadExclusive(codegen, type, ptr, old_value);
// We do not need to initialize the failure code for comparison failure if the
// branch goes to the read barrier slow path that clobbers `success` anyway.
bool init_failure_for_cmp =
success.IsValid() &&
- !(kEmitCompilerReadBarrier && type == DataType::Type::kReference && !expected2.IsValid());
+ !(kEmitCompilerReadBarrier && type == DataType::Type::kReference && expected.IsRegister());
// Instruction scheduling: Loading a constant between LDREX* and using the loaded value
// is essentially free, so prepare the failure value here if we can.
- if (init_failure_for_cmp && !success.Is(old_value)) {
+ bool init_failure_for_cmp_early =
+ init_failure_for_cmp && !old_value.Contains(LocationFrom(success));
+ if (init_failure_for_cmp_early) {
__ Mov(success, 0); // Indicate failure if the comparison fails.
}
- __ Cmp(old_value, expected);
if (type == DataType::Type::kInt64) {
+ __ Cmp(LowRegisterFrom(old_value), LowRegisterFrom(expected));
ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
__ it(eq);
- __ cmp(eq, old_value_high, expected2);
- } else if (expected2.IsValid()) {
+ __ cmp(eq, HighRegisterFrom(old_value), HighRegisterFrom(expected));
+ } else if (expected.IsRegisterPair()) {
DCHECK_EQ(type, DataType::Type::kReference);
- // If the newly loaded value did not match `expected`, compare with `expected2`.
+ // Check if the loaded value matches any of the two registers in `expected`.
+ __ Cmp(RegisterFrom(old_value), LowRegisterFrom(expected));
ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
__ it(ne);
- __ cmp(ne, old_value, expected2);
+ __ cmp(ne, RegisterFrom(old_value), HighRegisterFrom(expected));
+ } else {
+ __ Cmp(RegisterFrom(old_value), RegisterFrom(expected));
}
- if (init_failure_for_cmp && success.Is(old_value)) {
+ if (init_failure_for_cmp && !init_failure_for_cmp_early) {
__ Mov(LeaveFlags, success, 0); // Indicate failure if the comparison fails.
}
__ B(ne, cmp_failure, /*is_far_target=*/ cmp_failure_is_far_target);
- EmitStoreExclusive(codegen, type, ptr, store_result, new_value, new_value_high);
+ EmitStoreExclusive(codegen, type, ptr, store_result, new_value);
if (strong) {
// Instruction scheduling: Loading a constant between STREX* and using its result
// is essentially free, so prepare the success value here if needed.
@@ -3317,14 +3347,11 @@
/*cmp_failure=*/ success_.IsValid() ? GetExitLabel() : &mark_old_value,
/*cmp_failure_is_far_target=*/ success_.IsValid(),
tmp_ptr,
- new_value_,
- /*new_value_high=*/ vixl32::Register(),
- /*old_value=*/ old_value_temp_,
- /*old_value_high=*/ vixl32::Register(),
+ /*expected=*/ LocationFrom(expected_, old_value_),
+ /*new_value=*/ LocationFrom(new_value_),
+ /*old_value=*/ LocationFrom(old_value_temp_),
store_result_,
- success_,
- expected_,
- /*expected2=*/ old_value_);
+ success_);
if (!success_.IsValid()) {
// To reach this point, the `old_value_temp_` must be either a from-space or a to-space
// reference of the `expected_` object. Update the `old_value_` to the to-space reference.
@@ -3447,14 +3474,11 @@
cmp_failure,
/*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
tmp_ptr,
- new_value,
- /*new_value_high=*/ vixl32::Register(), // TODO: Int64
- /*old_value=*/ tmp,
- /*old_value_high=*/ vixl32::Register(), // TODO: Int64
+ /*expected=*/ LocationFrom(expected), // TODO: Int64
+ /*new_value=*/ LocationFrom(new_value), // TODO: Int64
+ /*old_value=*/ LocationFrom(tmp), // TODO: Int64
/*store_result=*/ tmp,
- /*success=*/ out,
- expected,
- /*expected2=*/ vixl32::Register());
+ /*success=*/ out);
__ Bind(exit_loop);
codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
@@ -3485,6 +3509,7 @@
enum class GetAndUpdateOp {
kSet,
kAdd,
+ kAddWithByteSwap,
kAnd,
kOr,
kXor
@@ -3501,107 +3526,198 @@
Location maybe_vreg_temp) {
ArmVIXLAssembler* assembler = codegen->GetAssembler();
- vixl32::Register old_value_reg;
- vixl32::Register old_value_high;
- vixl32::Register new_value;
- vixl32::Register new_value_high;
+ Location loaded_value;
+ Location new_value;
switch (get_and_update_op) {
case GetAndUpdateOp::kSet:
- if (load_store_type == DataType::Type::kInt64) {
- old_value_reg = LowRegisterFrom(old_value);
- old_value_high = HighRegisterFrom(old_value);
- new_value = LowRegisterFrom(arg);
- new_value_high = HighRegisterFrom(arg);
- } else {
- old_value_reg = RegisterFrom(old_value);
- new_value = RegisterFrom(arg);
- }
+ loaded_value = old_value;
+ new_value = arg;
break;
- case GetAndUpdateOp::kAdd:
- if (old_value.IsFpuRegister()) {
- old_value_reg = RegisterFrom(maybe_temp);
- new_value = old_value_reg; // Use the same temporary for the new value.
+ case GetAndUpdateOp::kAddWithByteSwap:
+ if (old_value.IsRegisterPair()) {
+ // To avoid register overlap when reversing bytes, load into temps.
+ DCHECK(maybe_temp.IsRegisterPair());
+ loaded_value = maybe_temp;
+ new_value = loaded_value; // Use the same temporaries for the new value.
break;
}
+ FALLTHROUGH_INTENDED;
+ case GetAndUpdateOp::kAdd:
if (old_value.IsFpuRegisterPair()) {
- old_value_reg = LowRegisterFrom(maybe_temp);
- old_value_high = HighRegisterFrom(maybe_temp);
- new_value = old_value_reg; // Use the same temporaries for the new value.
- new_value_high = old_value_high;
+ DCHECK(maybe_temp.IsRegisterPair());
+ loaded_value = maybe_temp;
+ new_value = loaded_value; // Use the same temporaries for the new value.
+ break;
+ }
+ if (old_value.IsFpuRegister()) {
+ DCHECK(maybe_temp.IsRegister());
+ loaded_value = maybe_temp;
+ new_value = loaded_value; // Use the same temporary for the new value.
break;
}
FALLTHROUGH_INTENDED;
case GetAndUpdateOp::kAnd:
case GetAndUpdateOp::kOr:
case GetAndUpdateOp::kXor:
- if (load_store_type == DataType::Type::kInt64) {
- old_value_reg = LowRegisterFrom(old_value);
- old_value_high = HighRegisterFrom(old_value);
- new_value = LowRegisterFrom(maybe_temp);
- new_value_high = HighRegisterFrom(maybe_temp);
- } else {
- old_value_reg = RegisterFrom(old_value);
- new_value = RegisterFrom(maybe_temp);
- }
+ loaded_value = old_value;
+ new_value = maybe_temp;
break;
}
vixl32::Label loop_label;
__ Bind(&loop_label);
- EmitLoadExclusive(codegen, load_store_type, ptr, old_value_reg, old_value_high);
+ EmitLoadExclusive(codegen, load_store_type, ptr, loaded_value);
switch (get_and_update_op) {
case GetAndUpdateOp::kSet:
break;
+ case GetAndUpdateOp::kAddWithByteSwap:
+ if (arg.IsFpuRegisterPair()) {
+ GenerateReverseBytes(assembler, DataType::Type::kFloat64, loaded_value, old_value);
+ vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
+ __ Vadd(sum, DRegisterFrom(old_value), DRegisterFrom(arg));
+ __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), sum); // Swap low/high.
+ } else if (arg.IsFpuRegister()) {
+ GenerateReverseBytes(assembler, DataType::Type::kFloat32, loaded_value, old_value);
+ vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp); // The temporary is a pair.
+ __ Vadd(sum, SRegisterFrom(old_value), SRegisterFrom(arg));
+ __ Vmov(RegisterFrom(new_value), sum);
+ } else if (load_store_type == DataType::Type::kInt64) {
+ GenerateReverseBytes(assembler, DataType::Type::kInt64, loaded_value, old_value);
+ // Swap low/high registers for the addition results.
+ __ Adds(HighRegisterFrom(new_value), LowRegisterFrom(old_value), LowRegisterFrom(arg));
+ __ Adc(LowRegisterFrom(new_value), HighRegisterFrom(old_value), HighRegisterFrom(arg));
+ } else {
+ GenerateReverseBytes(assembler, DataType::Type::kInt32, loaded_value, old_value);
+ __ Add(RegisterFrom(new_value), RegisterFrom(old_value), RegisterFrom(arg));
+ }
+ if (load_store_type == DataType::Type::kInt64) {
+ // The `new_value` already has the high and low word swapped. Reverse bytes in each.
+ GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
+ } else {
+ GenerateReverseBytes(assembler, load_store_type, new_value, new_value);
+ }
+ break;
case GetAndUpdateOp::kAdd:
if (arg.IsFpuRegisterPair()) {
vixl32::DRegister old_value_vreg = DRegisterFrom(old_value);
vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
- __ Vmov(old_value_vreg, old_value_reg, old_value_high);
+ __ Vmov(old_value_vreg, LowRegisterFrom(loaded_value), HighRegisterFrom(loaded_value));
__ Vadd(sum, old_value_vreg, DRegisterFrom(arg));
- __ Vmov(new_value, new_value_high, sum);
+ __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), sum);
} else if (arg.IsFpuRegister()) {
vixl32::SRegister old_value_vreg = SRegisterFrom(old_value);
vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp); // The temporary is a pair.
- __ Vmov(old_value_vreg, old_value_reg);
+ __ Vmov(old_value_vreg, RegisterFrom(loaded_value));
__ Vadd(sum, old_value_vreg, SRegisterFrom(arg));
- __ Vmov(new_value, sum);
+ __ Vmov(RegisterFrom(new_value), sum);
} else if (load_store_type == DataType::Type::kInt64) {
- __ Adds(new_value, old_value_reg, LowRegisterFrom(arg));
- __ Adc(new_value_high, old_value_high, HighRegisterFrom(arg));
+ __ Adds(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
+ __ Adc(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
} else {
- __ Add(new_value, old_value_reg, RegisterFrom(arg));
+ __ Add(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
}
break;
case GetAndUpdateOp::kAnd:
if (load_store_type == DataType::Type::kInt64) {
- __ And(new_value, old_value_reg, LowRegisterFrom(arg));
- __ And(new_value_high, old_value_high, HighRegisterFrom(arg));
+ __ And(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
+ __ And(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
} else {
- __ And(new_value, old_value_reg, RegisterFrom(arg));
+ __ And(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
}
break;
case GetAndUpdateOp::kOr:
if (load_store_type == DataType::Type::kInt64) {
- __ Orr(new_value, old_value_reg, LowRegisterFrom(arg));
- __ Orr(new_value_high, old_value_high, HighRegisterFrom(arg));
+ __ Orr(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
+ __ Orr(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
} else {
- __ Orr(new_value, old_value_reg, RegisterFrom(arg));
+ __ Orr(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
}
break;
case GetAndUpdateOp::kXor:
if (load_store_type == DataType::Type::kInt64) {
- __ Eor(new_value, old_value_reg, LowRegisterFrom(arg));
- __ Eor(new_value_high, old_value_high, HighRegisterFrom(arg));
+ __ Eor(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
+ __ Eor(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
} else {
- __ Eor(new_value, old_value_reg, RegisterFrom(arg));
+ __ Eor(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
}
break;
}
- EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value, new_value_high);
+ EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value);
__ Cmp(store_result, 0);
__ B(ne, &loop_label);
}
+class VarHandleSlowPathARMVIXL : public IntrinsicSlowPathARMVIXL {
+ public:
+ VarHandleSlowPathARMVIXL(HInvoke* invoke, std::memory_order order)
+ : IntrinsicSlowPathARMVIXL(invoke),
+ order_(order),
+ atomic_(false),
+ return_success_(false),
+ strong_(false),
+ get_and_update_op_(GetAndUpdateOp::kAdd) {
+ }
+
+ vixl32::Label* GetByteArrayViewCheckLabel() {
+ return &byte_array_view_check_label_;
+ }
+
+ vixl32::Label* GetNativeByteOrderLabel() {
+ return &native_byte_order_label_;
+ }
+
+ void SetAtomic(bool atomic) {
+ DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGet ||
+ GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kSet);
+ atomic_ = atomic;
+ }
+
+ void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
+ if (return_success) {
+ DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
+ } else {
+ DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
+ }
+ return_success_ = return_success;
+ strong_ = strong;
+ }
+
+ void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
+ DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
+ get_and_update_op_ = get_and_update_op;
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) override {
+ if (GetByteArrayViewCheckLabel()->IsReferenced()) {
+ EmitByteArrayViewCode(codegen_in);
+ }
+ IntrinsicSlowPathARMVIXL::EmitNativeCode(codegen_in);
+ }
+
+ private:
+ HInvoke* GetInvoke() const {
+ return GetInstruction()->AsInvoke();
+ }
+
+ mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
+ return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
+ }
+
+ void EmitByteArrayViewCode(CodeGenerator* codegen_in);
+
+ vixl32::Label byte_array_view_check_label_;
+ vixl32::Label native_byte_order_label_;
+ // Shared parameter for all VarHandle intrinsics.
+ std::memory_order order_;
+ // Extra argument for GenerateVarHandleGet() and GenerateVarHandleSet().
+ bool atomic_;
+ // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
+ bool return_success_;
+ bool strong_;
+ // Extra argument for GenerateVarHandleGetAndUpdate().
+ GetAndUpdateOp get_and_update_op_;
+};
+
// Generate subtype check without read barriers.
static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL* codegen,
SlowPathCodeARMVIXL* slow_path,
@@ -3761,7 +3877,7 @@
static void GenerateVarHandleArrayChecks(HInvoke* invoke,
CodeGeneratorARMVIXL* codegen,
- SlowPathCodeARMVIXL* slow_path) {
+ VarHandleSlowPathARMVIXL* slow_path) {
ArmVIXLAssembler* assembler = codegen->GetAssembler();
vixl32::Register varhandle = InputRegisterAt(invoke, 0);
vixl32::Register object = InputRegisterAt(invoke, 1);
@@ -3781,8 +3897,7 @@
__ Cmp(object, 0);
__ B(eq, slow_path->GetEntryLabel());
- // Use the first temporary register, whether it's for the declaring class or the offset.
- // It is not used yet at this point.
+ // Use the offset temporary register. It is not used yet at this point.
vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
@@ -3798,19 +3913,6 @@
__ Cmp(temp2, 0);
__ B(eq, slow_path->GetEntryLabel());
- // Check that the coordinateType0 is an array type. We do not need a read barrier
- // for loading constant reference fields (or chains of them) for comparison with null,
- // or for finally loading a constant primitive field (primitive type) below.
- __ Ldr(temp2, MemOperand(temp, component_type_offset.Int32Value()));
- codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
- __ Cmp(temp2, 0);
- __ B(eq, slow_path->GetEntryLabel());
-
- // Check that the array component type matches the primitive type.
- __ Ldrh(temp2, MemOperand(temp2, primitive_type_offset.Int32Value()));
- __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
- __ B(ne, slow_path->GetEntryLabel());
-
// Check object class against componentType0.
//
// This is an exact check and we defer other cases to the runtime. This includes
@@ -3826,6 +3928,32 @@
__ Cmp(temp, temp2);
__ B(ne, slow_path->GetEntryLabel());
+ // Check that the coordinateType0 is an array type. We do not need a read barrier
+ // for loading constant reference fields (or chains of them) for comparison with null,
+ // nor for finally loading a constant primitive field (primitive type) below.
+ __ Ldr(temp2, MemOperand(temp, component_type_offset.Int32Value()));
+ codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+ __ Cmp(temp2, 0);
+ __ B(eq, slow_path->GetEntryLabel());
+
+ // Check that the array component type matches the primitive type.
+ // With the exception of `kPrimNot`, `kPrimByte` and `kPrimBoolean`,
+ // we shall check for a byte array view in the slow path.
+ // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
+ // so we cannot emit that if we're JITting without boot image.
+ bool boot_image_available =
+ codegen->GetCompilerOptions().IsBootImage() ||
+ !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
+ DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
+ size_t can_be_view =
+ ((value_type != DataType::Type::kReference) && (DataType::Size(value_type) != 1u)) &&
+ boot_image_available;
+ vixl32::Label* slow_path_label =
+ can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
+ __ Ldrh(temp2, MemOperand(temp2, primitive_type_offset.Int32Value()));
+ __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
+ __ B(ne, slow_path_label);
+
// Check for array index out of bounds.
__ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
__ Cmp(index, temp);
@@ -3834,7 +3962,7 @@
static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
CodeGeneratorARMVIXL* codegen,
- SlowPathCodeARMVIXL* slow_path) {
+ VarHandleSlowPathARMVIXL* slow_path) {
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
if (expected_coordinates_count == 0u) {
GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
@@ -3846,24 +3974,45 @@
}
}
+static VarHandleSlowPathARMVIXL* GenerateVarHandleChecks(HInvoke* invoke,
+ CodeGeneratorARMVIXL* codegen,
+ std::memory_order order,
+ DataType::Type type) {
+ VarHandleSlowPathARMVIXL* slow_path =
+ new (codegen->GetScopedAllocator()) VarHandleSlowPathARMVIXL(invoke, order);
+ codegen->AddSlowPath(slow_path);
+
+ GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
+ GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
+
+ return slow_path;
+}
+
struct VarHandleTarget {
vixl32::Register object; // The object holding the value to operate on.
vixl32::Register offset; // The offset of the value to operate on.
};
-static VarHandleTarget GenerateVarHandleTarget(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
- ArmVIXLAssembler* assembler = codegen->GetAssembler();
- vixl32::Register varhandle = InputRegisterAt(invoke, 0);
+static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
LocationSummary* locations = invoke->GetLocations();
VarHandleTarget target;
// The temporary allocated for loading the offset.
- target.offset = RegisterFrom(locations->GetTemp((expected_coordinates_count == 0u) ? 1u : 0u));
+ target.offset = RegisterFrom(locations->GetTemp(0u));
// The reference to the object that holds the value to operate on.
target.object = (expected_coordinates_count == 0u)
- ? RegisterFrom(locations->GetTemp(0u))
+ ? RegisterFrom(locations->GetTemp(1u))
: InputRegisterAt(invoke, 1);
+ return target;
+}
+
+static void GenerateVarHandleTarget(HInvoke* invoke,
+ const VarHandleTarget& target,
+ CodeGeneratorARMVIXL* codegen) {
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
+ vixl32::Register varhandle = InputRegisterAt(invoke, 0);
+ size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
if (expected_coordinates_count <= 1u) {
// For static fields, we need to fill the `target.object` with the declaring class,
@@ -3900,8 +4049,6 @@
}
__ Add(target.offset, shifted_index, data_offset.Int32Value());
}
-
- return target;
}
static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
@@ -4002,10 +4149,6 @@
for (size_t i = 0; i != expected_coordinates_count; ++i) {
locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
}
- if (expected_coordinates_count == 0u) {
- // Add a temporary to hold the declaring class.
- locations->AddTemp(Location::RequiresRegister());
- }
if (return_type != DataType::Type::kVoid) {
if (DataType::IsFloatingPointType(return_type)) {
locations->SetOut(Location::RequiresFpuRegister());
@@ -4034,6 +4177,10 @@
} else {
locations->AddTemp(Location::RequiresRegister());
}
+ if (expected_coordinates_count == 0u) {
+ // Add a temporary to hold the declaring class.
+ locations->AddTemp(Location::RequiresRegister());
+ }
return locations;
}
@@ -4070,7 +4217,8 @@
static void GenerateVarHandleGet(HInvoke* invoke,
CodeGeneratorARMVIXL* codegen,
std::memory_order order,
- bool atomic) {
+ bool atomic,
+ bool byte_swap = false) {
DataType::Type type = invoke->GetType();
DCHECK_NE(type, DataType::Type::kVoid);
@@ -4078,14 +4226,14 @@
ArmVIXLAssembler* assembler = codegen->GetAssembler();
Location out = locations->Out();
- SlowPathCodeARMVIXL* slow_path =
- new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
- codegen->AddSlowPath(slow_path);
-
- GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
- GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
-
- VarHandleTarget target = GenerateVarHandleTarget(invoke, codegen);
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ VarHandleSlowPathARMVIXL* slow_path = nullptr;
+ if (!byte_swap) {
+ slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
+ slow_path->SetAtomic(atomic);
+ GenerateVarHandleTarget(invoke, target, codegen);
+ __ Bind(slow_path->GetNativeByteOrderLabel());
+ }
Location maybe_temp = Location::NoLocation();
Location maybe_temp2 = Location::NoLocation();
@@ -4094,28 +4242,63 @@
// Reuse the offset temporary.
maybe_temp = LocationFrom(target.offset);
} else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
- // Reuse the declaring class (if present) and offset temporary.
+ // Reuse the offset temporary and declaring class (if present).
// The address shall be constructed in the scratch register before they are clobbered.
- maybe_temp = locations->GetTemp(0);
+ maybe_temp = LocationFrom(target.offset);
+ DCHECK(maybe_temp.Equals(locations->GetTemp(0)));
if (type == DataType::Type::kFloat64) {
maybe_temp2 = locations->GetTemp(1);
maybe_temp3 = locations->GetTemp(2);
}
}
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ Location loaded_value = out;
+ DataType::Type load_type = type;
+ if (byte_swap) {
+ if (type == DataType::Type::kFloat64) {
+ if (Use64BitExclusiveLoadStore(atomic, codegen)) {
+ // Change load type to Int64 and promote `maybe_temp2` and `maybe_temp3` to `loaded_value`.
+ loaded_value = LocationFrom(RegisterFrom(maybe_temp2), RegisterFrom(maybe_temp3));
+ maybe_temp2 = Location::NoLocation();
+ maybe_temp3 = Location::NoLocation();
+ } else {
+ // Use the offset temporary and the scratch register.
+ loaded_value = LocationFrom(target.offset, temps.Acquire());
+ }
+ load_type = DataType::Type::kInt64;
+ } else if (type == DataType::Type::kFloat32) {
+ // Reuse the offset temporary.
+ loaded_value = LocationFrom(target.offset);
+ load_type = DataType::Type::kInt32;
+ } else if (type == DataType::Type::kInt64) {
+ // Swap the high and low registers and reverse the bytes in each after the load.
+ loaded_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
+ }
+ }
+
GenerateIntrinsicGet(invoke,
codegen,
- type,
+ load_type,
order,
atomic,
target.object,
target.offset,
- out,
+ loaded_value,
maybe_temp,
maybe_temp2,
maybe_temp3);
+ if (byte_swap) {
+ if (type == DataType::Type::kInt64) {
+ GenerateReverseBytesInPlaceForEachWord(assembler, loaded_value);
+ } else {
+ GenerateReverseBytes(assembler, type, loaded_value, out);
+ }
+ }
- __ Bind(slow_path->GetExitLabel());
+ if (!byte_swap) {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
@@ -4159,21 +4342,37 @@
LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
- DataType::Type value_type = invoke->InputAt(invoke->GetNumberOfArguments() - 1u)->GetType();
- if (DataType::Is64BitType(value_type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
- // We need 2 or 3 temporaries for GenerateIntrinsicSet() but we can reuse the
- // declaring class (if present) and offset temporary.
- DCHECK_EQ(locations->GetTempCount(),
- (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
- size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 3u : 2u;
- locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
+ uint32_t number_of_arguments = invoke->GetNumberOfArguments();
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
+ if (DataType::Is64BitType(value_type)) {
+ size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
+ DCHECK_EQ(locations->GetTempCount(), (expected_coordinates_count == 0) ? 2u : 1u);
+ HInstruction* arg = invoke->InputAt(number_of_arguments - 1u);
+ bool has_reverse_bytes_slow_path =
+ (expected_coordinates_count == 2u) &&
+ !(arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern());
+ if (Use64BitExclusiveLoadStore(atomic, codegen)) {
+ // We need 4 temporaries in the byte array view slow path. Otherwise, we need
+ // 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type.
+ // We can reuse the offset temporary and declaring class (if present).
+ size_t temps_needed = has_reverse_bytes_slow_path
+ ? 4u
+ : ((value_type == DataType::Type::kFloat64) ? 3u : 2u);
+ locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
+ } else if (has_reverse_bytes_slow_path) {
+ // We need 2 temps for the value with reversed bytes in the byte array view slow path.
+ // We can reuse the offset temporary.
+ DCHECK_EQ(locations->GetTempCount(), 1u);
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
static void GenerateVarHandleSet(HInvoke* invoke,
CodeGeneratorARMVIXL* codegen,
std::memory_order order,
- bool atomic) {
+ bool atomic,
+ bool byte_swap = false) {
uint32_t value_index = invoke->GetNumberOfArguments() - 1;
DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
@@ -4181,20 +4380,20 @@
LocationSummary* locations = invoke->GetLocations();
Location value = locations->InAt(value_index);
- SlowPathCodeARMVIXL* slow_path =
- new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
- codegen->AddSlowPath(slow_path);
-
- GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
- GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, value_type);
-
- VarHandleTarget target = GenerateVarHandleTarget(invoke, codegen);
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ VarHandleSlowPathARMVIXL* slow_path = nullptr;
+ if (!byte_swap) {
+ slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
+ slow_path->SetAtomic(atomic);
+ GenerateVarHandleTarget(invoke, target, codegen);
+ __ Bind(slow_path->GetNativeByteOrderLabel());
+ }
Location maybe_temp = Location::NoLocation();
Location maybe_temp2 = Location::NoLocation();
Location maybe_temp3 = Location::NoLocation();
if (DataType::Is64BitType(value_type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
- // Reuse the declaring class (if present) and offset temporary.
+ // Reuse the offset temporary and declaring class (if present).
// The address shall be constructed in the scratch register before they are clobbered.
maybe_temp = locations->GetTemp(0);
maybe_temp2 = locations->GetTemp(1);
@@ -4203,6 +4402,44 @@
}
}
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ if (byte_swap) {
+ if (DataType::Is64BitType(value_type) || value_type == DataType::Type::kFloat32) {
+ // Calculate the address in scratch register, so that we can use the offset temporary.
+ vixl32::Register base = temps.Acquire();
+ __ Add(base, target.object, target.offset);
+ target.object = base;
+ target.offset = vixl32::Register();
+ }
+ Location original_value = value;
+ if (DataType::Is64BitType(value_type)) {
+ size_t temp_start = 0u;
+ if (Use64BitExclusiveLoadStore(atomic, codegen)) {
+ // Clear `maybe_temp3` which was initialized above for Float64.
+ DCHECK(value_type != DataType::Type::kFloat64 || maybe_temp3.Equals(locations->GetTemp(2)));
+ maybe_temp3 = Location::NoLocation();
+ temp_start = 2u;
+ }
+ value = LocationFrom(RegisterFrom(locations->GetTemp(temp_start)),
+ RegisterFrom(locations->GetTemp(temp_start + 1u)));
+ if (value_type == DataType::Type::kFloat64) {
+ __ Vmov(HighRegisterFrom(value), LowRegisterFrom(value), DRegisterFrom(original_value));
+ GenerateReverseBytesInPlaceForEachWord(assembler, value);
+ value_type = DataType::Type::kInt64;
+ } else {
+ GenerateReverseBytes(assembler, value_type, original_value, value);
+ }
+ } else if (value_type == DataType::Type::kFloat32) {
+ value = locations->GetTemp(0); // Use the offset temporary which was freed above.
+ __ Vmov(RegisterFrom(value), SRegisterFrom(original_value));
+ GenerateReverseBytes(assembler, DataType::Type::kInt32, value, value);
+ value_type = DataType::Type::kInt32;
+ } else {
+ value = LocationFrom(temps.Acquire());
+ GenerateReverseBytes(assembler, value_type, original_value, value);
+ }
+ }
+
GenerateIntrinsicSet(codegen,
value_type,
order,
@@ -4217,13 +4454,14 @@
if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
// Reuse the offset temporary for MarkGCCard.
vixl32::Register temp = target.offset;
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
vixl32::Register card = temps.Acquire();
vixl32::Register value_reg = RegisterFrom(value);
codegen->MarkGCCard(temp, card, target.object, value_reg, /*value_can_be_null=*/ true);
}
- __ Bind(slow_path->GetExitLabel());
+ if (!byte_swap) {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
@@ -4306,6 +4544,15 @@
? (return_success ? 5u : 7u)
: (return_success ? 3u : 4u);
locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
+ } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
+ // Add temps for the byte-reversed `expected` and `new_value` in the byte array view slow path.
+ DCHECK_EQ(locations->GetTempCount(), 1u);
+ if (value_type == DataType::Type::kInt64) {
+ // We would ideally add 4 temps for Int64 but that would simply run out of registers,
+ // so we instead need to reverse bytes in actual arguments and undo it at the end.
+ } else {
+ locations->AddRegisterTemps(2u);
+ }
}
if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
// Add a temporary for store result, also used for the `old_value_temp` in slow path.
@@ -4317,7 +4564,8 @@
CodeGeneratorARMVIXL* codegen,
std::memory_order order,
bool return_success,
- bool strong) {
+ bool strong,
+ bool byte_swap = false) {
DCHECK(return_success || strong);
uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
@@ -4331,14 +4579,14 @@
Location new_value = locations->InAt(new_value_index);
Location out = locations->Out();
- SlowPathCodeARMVIXL* slow_path =
- new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
- codegen->AddSlowPath(slow_path);
-
- GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
- GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, value_type);
-
- VarHandleTarget target = GenerateVarHandleTarget(invoke, codegen);
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ VarHandleSlowPathARMVIXL* slow_path = nullptr;
+ if (!byte_swap) {
+ slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
+ slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
+ GenerateVarHandleTarget(invoke, target, codegen);
+ __ Bind(slow_path->GetNativeByteOrderLabel());
+ }
bool seq_cst_barrier = (order == std::memory_order_seq_cst);
bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
@@ -4360,52 +4608,75 @@
// Reuse the declaring class (if present) and offset temporary for non-reference types,
// the address has already been constructed in the scratch register. We are more careful
// for references due to read and write barrier, see below.
- vixl32::Register new_value_reg;
- vixl32::Register new_value_high;
- vixl32::Register expected_reg;
- vixl32::Register expected_high;
- vixl32::Register old_value;
- vixl32::Register old_value_high;
+ Location old_value;
vixl32::Register store_result;
vixl32::Register success = return_success ? RegisterFrom(out) : vixl32::Register();
DataType::Type cas_type = value_type;
if (value_type == DataType::Type::kFloat64) {
- expected_reg = RegisterFrom(locations->GetTemp(0));
- expected_high = RegisterFrom(locations->GetTemp(1));
- new_value_reg = RegisterFrom(locations->GetTemp(2));
- new_value_high = RegisterFrom(locations->GetTemp(3));
+ vixl32::DRegister expected_vreg = DRegisterFrom(expected);
+ vixl32::DRegister new_value_vreg = DRegisterFrom(new_value);
+ expected =
+ LocationFrom(RegisterFrom(locations->GetTemp(0)), RegisterFrom(locations->GetTemp(1)));
+ new_value =
+ LocationFrom(RegisterFrom(locations->GetTemp(2)), RegisterFrom(locations->GetTemp(3)));
store_result = RegisterFrom(locations->GetTemp(4));
- old_value = return_success ? success : RegisterFrom(locations->GetTemp(5));
- old_value_high = return_success ? store_result : RegisterFrom(locations->GetTemp(6));
- __ Vmov(new_value_reg, new_value_high, DRegisterFrom(new_value));
- __ Vmov(expected_reg, expected_high, DRegisterFrom(expected));
+ old_value = return_success
+ ? LocationFrom(success, store_result)
+ : LocationFrom(RegisterFrom(locations->GetTemp(5)), RegisterFrom(locations->GetTemp(6)));
+ if (byte_swap) {
+ __ Vmov(HighRegisterFrom(expected), LowRegisterFrom(expected), expected_vreg);
+ __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), new_value_vreg);
+ GenerateReverseBytesInPlaceForEachWord(assembler, expected);
+ GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
+ } else {
+ __ Vmov(LowRegisterFrom(expected), HighRegisterFrom(expected), expected_vreg);
+ __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), new_value_vreg);
+ }
cas_type = DataType::Type::kInt64;
} else if (value_type == DataType::Type::kFloat32) {
- expected_reg = RegisterFrom(locations->GetTemp(0));
- new_value_reg = RegisterFrom(locations->GetTemp(1));
+ vixl32::SRegister expected_vreg = SRegisterFrom(expected);
+ vixl32::SRegister new_value_vreg = SRegisterFrom(new_value);
+ expected = locations->GetTemp(0);
+ new_value = locations->GetTemp(1);
store_result = RegisterFrom(locations->GetTemp(2));
- old_value = return_success ? store_result : RegisterFrom(locations->GetTemp(3));
- __ Vmov(new_value_reg, SRegisterFrom(new_value));
- __ Vmov(expected_reg, SRegisterFrom(expected));
+ old_value = return_success ? LocationFrom(store_result) : locations->GetTemp(3);
+ __ Vmov(RegisterFrom(expected), expected_vreg);
+ __ Vmov(RegisterFrom(new_value), new_value_vreg);
+ if (byte_swap) {
+ GenerateReverseBytes(assembler, DataType::Type::kInt32, expected, expected);
+ GenerateReverseBytes(assembler, DataType::Type::kInt32, new_value, new_value);
+ }
cas_type = DataType::Type::kInt32;
} else if (value_type == DataType::Type::kInt64) {
- expected_reg = LowRegisterFrom(expected);
- expected_high = HighRegisterFrom(expected);
- new_value_reg = LowRegisterFrom(new_value);
- new_value_high = HighRegisterFrom(new_value);
store_result = RegisterFrom(locations->GetTemp(0));
- old_value = return_success ? success : LowRegisterFrom(out);
- old_value_high = return_success ? store_result : HighRegisterFrom(out);
+ old_value = return_success
+ ? LocationFrom(success, store_result)
+ // If swapping bytes, swap the high/low regs and reverse the bytes in each after the load.
+ : byte_swap ? LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out)) : out;
+ if (byte_swap) {
+ // Due to lack of registers, reverse bytes in `expected` and `new_value` and undo that later.
+ GenerateReverseBytesInPlaceForEachWord(assembler, expected);
+ expected = LocationFrom(HighRegisterFrom(expected), LowRegisterFrom(expected));
+ GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
+ new_value = LocationFrom(HighRegisterFrom(new_value), LowRegisterFrom(new_value));
+ }
} else {
- expected_reg = RegisterFrom(expected);
- new_value_reg = RegisterFrom(new_value);
// Use the last temp. For references with read barriers, this is an extra temporary
// allocated to avoid overwriting the temporaries for declaring class (if present)
// and offset as they are needed in the slow path. Otherwise, this is the offset
// temporary which also works for references without read barriers that need the
// object register preserved for the write barrier.
store_result = RegisterFrom(locations->GetTemp(locations->GetTempCount() - 1u));
- old_value = return_success ? store_result : RegisterFrom(out);
+ old_value = return_success ? LocationFrom(store_result) : out;
+ if (byte_swap) {
+ DCHECK_EQ(locations->GetTempCount(), 3u);
+ Location original_expected = expected;
+ Location original_new_value = new_value;
+ expected = locations->GetTemp(0);
+ new_value = locations->GetTemp(1);
+ GenerateReverseBytes(assembler, value_type, original_expected, expected);
+ GenerateReverseBytes(assembler, value_type, original_new_value, new_value);
+ }
}
vixl32::Label exit_loop_label;
@@ -4422,9 +4693,9 @@
strong,
target.object,
target.offset,
- expected_reg,
- new_value_reg,
- old_value,
+ RegisterFrom(expected),
+ RegisterFrom(new_value),
+ RegisterFrom(old_value),
old_value_temp,
store_result,
success,
@@ -4440,14 +4711,11 @@
cmp_failure,
/*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
tmp_ptr,
- new_value_reg,
- new_value_high,
+ expected,
+ new_value,
old_value,
- old_value_high,
store_result,
- success,
- expected_reg,
- /*expected2=*/ expected_high);
+ success);
__ Bind(exit_loop);
if (acquire_barrier) {
@@ -4455,10 +4723,22 @@
seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
}
- if (!return_success && value_type == DataType::Type::kFloat64) {
- __ Vmov(DRegisterFrom(out), old_value, old_value_high);
- } else if (!return_success && value_type == DataType::Type::kFloat32) {
- __ Vmov(SRegisterFrom(out), old_value);
+ if (!return_success) {
+ if (byte_swap) {
+ if (value_type == DataType::Type::kInt64) {
+ GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
+ // Undo byte swapping in `expected` and `new_value`. We do not have the
+ // information whether the value in these registers shall be needed later.
+ GenerateReverseBytesInPlaceForEachWord(assembler, expected);
+ GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
+ } else {
+ GenerateReverseBytes(assembler, value_type, old_value, out);
+ }
+ } else if (value_type == DataType::Type::kFloat64) {
+ __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
+ } else if (value_type == DataType::Type::kFloat32) {
+ __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
+ }
}
if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
@@ -4467,10 +4747,12 @@
vixl32::Register card = tmp_ptr;
// Mark card for object assuming new value is stored.
bool new_value_can_be_null = true; // TODO: Worth finding out this information?
- codegen->MarkGCCard(temp, card, target.object, new_value_reg, new_value_can_be_null);
+ codegen->MarkGCCard(temp, card, target.object, RegisterFrom(new_value), new_value_can_be_null);
}
- __ Bind(slow_path->GetExitLabel());
+ if (!byte_swap) {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
@@ -4577,10 +4859,24 @@
// We need to preserve the declaring class (if present) and offset for read barrier
// slow paths, so we must use a separate temporary for the exclusive store result.
locations->AddTemp(Location::RequiresRegister());
+ } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
+ // Add temps for the byte-reversed `arg` in the byte array view slow path.
+ DCHECK_EQ(locations->GetTempCount(), 1u);
+ locations->AddRegisterTemps((value_type == DataType::Type::kInt64) ? 2u : 1u);
}
} else {
// We need temporaries for the new value and exclusive store result.
size_t temps_needed = DataType::Is64BitType(value_type) ? 3u : 2u;
+ if (get_and_update_op != GetAndUpdateOp::kAdd &&
+ GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
+ // Add temps for the byte-reversed `arg` in the byte array view slow path.
+ if (value_type == DataType::Type::kInt64) {
+ // We would ideally add 2 temps for Int64 but that would simply run out of registers,
+ // so we instead need to reverse bytes in the actual argument and undo it at the end.
+ } else {
+ temps_needed += 1u;
+ }
+ }
locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
if (DataType::IsFloatingPointType(value_type)) {
// Note: This shall allocate a D register. There is no way to request an S register.
@@ -4592,7 +4888,8 @@
static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
CodeGeneratorARMVIXL* codegen,
GetAndUpdateOp get_and_update_op,
- std::memory_order order) {
+ std::memory_order order,
+ bool byte_swap = false) {
uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
@@ -4601,14 +4898,14 @@
Location arg = locations->InAt(arg_index);
Location out = locations->Out();
- SlowPathCodeARMVIXL* slow_path =
- new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
- codegen->AddSlowPath(slow_path);
-
- GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
- GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, value_type);
-
- VarHandleTarget target = GenerateVarHandleTarget(invoke, codegen);
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ VarHandleSlowPathARMVIXL* slow_path = nullptr;
+ if (!byte_swap) {
+ slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
+ slow_path->SetGetAndUpdateOp(get_and_update_op);
+ GenerateVarHandleTarget(invoke, target, codegen);
+ __ Bind(slow_path->GetNativeByteOrderLabel());
+ }
bool seq_cst_barrier = (order == std::memory_order_seq_cst);
bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
@@ -4625,12 +4922,8 @@
vixl32::Register tmp_ptr = temps.Acquire();
__ Add(tmp_ptr, target.object, target.offset);
- // Use the last temporary core register for the exclusive store result. This shall be
- // the offset temporary in some cases. (Non-core temp is needed for FP GetAndAdd.)
- size_t temp_count = locations->GetTempCount();
- Location last_temp = locations->GetTemp(temp_count - 1u);
- vixl32::Register store_result =
- RegisterFrom(last_temp.IsRegister() ? last_temp : locations->GetTemp(temp_count - 2u));
+ // Use the offset temporary for the exclusive store result.
+ vixl32::Register store_result = target.offset;
// The load/store type is never floating point.
DataType::Type load_store_type = DataType::IsFloatingPointType(value_type)
@@ -4645,19 +4938,26 @@
// For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
// rather than moving between core and FP registers in the loop.
if (value_type == DataType::Type::kFloat64) {
- DCHECK_EQ(locations->GetTempCount(), 5u); // Four here plus the `store_result`.
- old_value = LocationFrom(RegisterFrom(locations->GetTemp(0)),
- RegisterFrom(locations->GetTemp(1)));
- vixl32::Register arg_temp = RegisterFrom(locations->GetTemp(2));
- vixl32::Register arg_temp_high = RegisterFrom(locations->GetTemp(3));
- __ Vmov(arg_temp, arg_temp_high, DRegisterFrom(arg));
- arg = LocationFrom(arg_temp, arg_temp_high);
+ vixl32::DRegister arg_vreg = DRegisterFrom(arg);
+ DCHECK_EQ(locations->GetTempCount(), 5u); // `store_result` and the four here.
+ old_value =
+ LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)));
+ arg = LocationFrom(RegisterFrom(locations->GetTemp(3)), RegisterFrom(locations->GetTemp(4)));
+ if (byte_swap) {
+ __ Vmov(HighRegisterFrom(arg), LowRegisterFrom(arg), arg_vreg);
+ GenerateReverseBytesInPlaceForEachWord(assembler, arg);
+ } else {
+ __ Vmov(LowRegisterFrom(arg), HighRegisterFrom(arg), arg_vreg);
+ }
} else if (value_type == DataType::Type::kFloat32) {
- DCHECK_EQ(locations->GetTempCount(), 3u); // Two here plus the `store_result`.
- old_value = locations->GetTemp(0);
- vixl32::Register arg_temp = RegisterFrom(locations->GetTemp(1));
- __ Vmov(arg_temp, SRegisterFrom(arg));
- arg = LocationFrom(arg_temp);
+ vixl32::SRegister arg_vreg = SRegisterFrom(arg);
+ DCHECK_EQ(locations->GetTempCount(), 3u); // `store_result` and the two here.
+ old_value = locations->GetTemp(1);
+ arg = locations->GetTemp(2);
+ __ Vmov(RegisterFrom(arg), arg_vreg);
+ if (byte_swap) {
+ GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg);
+ }
} else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
if (kUseBakerReadBarrier) {
// Load the old value initially to a temporary register.
@@ -4669,20 +4969,42 @@
DCHECK(!store_result.Is(target.object));
DCHECK(!store_result.Is(target.offset));
}
+ } else if (byte_swap) {
+ Location original_arg = arg;
+ arg = locations->GetTemp(1);
+ if (value_type == DataType::Type::kInt64) {
+ arg = LocationFrom(RegisterFrom(arg), RegisterFrom(locations->GetTemp(2)));
+ // Swap the high/low regs and reverse the bytes in each after the load.
+ old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
+ }
+ GenerateReverseBytes(assembler, value_type, original_arg, arg);
}
} else {
- if (DataType::Is64BitType(value_type)) {
- maybe_temp = LocationFrom(RegisterFrom(locations->GetTemp(0)),
- RegisterFrom(locations->GetTemp(1)));
- DCHECK(!LowRegisterFrom(maybe_temp).Is(store_result));
- DCHECK(!HighRegisterFrom(maybe_temp).Is(store_result));
- } else {
- maybe_temp = locations->GetTemp(0);
- DCHECK(!RegisterFrom(maybe_temp).Is(store_result));
- }
+ maybe_temp = DataType::Is64BitType(value_type)
+ ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
+ : locations->GetTemp(1);
+ DCHECK(!maybe_temp.Contains(LocationFrom(store_result)));
if (DataType::IsFloatingPointType(value_type)) {
- DCHECK(last_temp.IsFpuRegisterPair());
- maybe_vreg_temp = last_temp;
+ maybe_vreg_temp = locations->GetTemp(locations->GetTempCount() - 1u);
+ DCHECK(maybe_vreg_temp.IsFpuRegisterPair());
+ }
+ if (byte_swap) {
+ if (get_and_update_op == GetAndUpdateOp::kAdd) {
+ // We need to do the byte swapping in the CAS loop for GetAndAdd.
+ get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
+ } else if (value_type == DataType::Type::kInt64) {
+ // Swap the high/low regs and reverse the bytes in each after the load.
+ old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
+ // Due to lack of registers, reverse bytes in `arg` and undo that later.
+ GenerateReverseBytesInPlaceForEachWord(assembler, arg);
+ arg = LocationFrom(HighRegisterFrom(arg), LowRegisterFrom(arg));
+ } else {
+ DCHECK(!DataType::IsFloatingPointType(value_type));
+ Location original_arg = arg;
+ arg = locations->GetTemp(2);
+ DCHECK(!arg.Contains(LocationFrom(store_result)));
+ GenerateReverseBytes(assembler, value_type, original_arg, arg);
+ }
}
}
@@ -4701,7 +5023,19 @@
seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
}
- if (get_and_update_op == GetAndUpdateOp::kSet && DataType::IsFloatingPointType(value_type)) {
+ if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) {
+ if (value_type == DataType::Type::kInt64) {
+ GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
+ if (get_and_update_op != GetAndUpdateOp::kSet) {
+ // Undo byte swapping in `arg`. We do not have the information
+ // whether the value in these registers shall be needed later.
+ GenerateReverseBytesInPlaceForEachWord(assembler, arg);
+ }
+ } else {
+ GenerateReverseBytes(assembler, value_type, old_value, out);
+ }
+ } else if (get_and_update_op == GetAndUpdateOp::kSet &&
+ DataType::IsFloatingPointType(value_type)) {
if (value_type == DataType::Type::kFloat64) {
__ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
} else {
@@ -4731,7 +5065,9 @@
codegen->MarkGCCard(temp, card, target.object, RegisterFrom(arg), new_value_can_be_null);
}
- __ Bind(slow_path->GetExitLabel());
+ if (!byte_swap) {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
@@ -4854,6 +5190,108 @@
GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
}
+void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
+ DCHECK(GetByteArrayViewCheckLabel()->IsReferenced());
+ CodeGeneratorARMVIXL* codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_in);
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
+ HInvoke* invoke = GetInvoke();
+ mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
+ DataType::Type value_type =
+ GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
+ DCHECK_NE(value_type, DataType::Type::kReference);
+ size_t size = DataType::Size(value_type);
+ DCHECK_GT(size, 1u);
+ vixl32::Operand size_operand(dchecked_integral_cast<int32_t>(size));
+ vixl32::Register varhandle = InputRegisterAt(invoke, 0);
+ vixl32::Register object = InputRegisterAt(invoke, 1);
+ vixl32::Register index = InputRegisterAt(invoke, 2);
+
+ MemberOffset class_offset = mirror::Object::ClassOffset();
+ MemberOffset array_length_offset = mirror::Array::LengthOffset();
+ MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
+ MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
+
+ __ Bind(GetByteArrayViewCheckLabel());
+
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ {
+ // Use the offset temporary register. It is not used yet at this point.
+ vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
+
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ vixl32::Register temp2 = temps.Acquire();
+
+ // The main path checked that the coordinateType0 is an array class that matches
+ // the class of the actual coordinate argument but it does not match the value type.
+ // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
+ __ Ldr(temp, MemOperand(varhandle, class_offset.Int32Value()));
+ codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
+ __ Cmp(temp, temp2);
+ __ B(ne, GetEntryLabel());
+
+ // Check for array index out of bounds.
+ __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
+ if (!temp.IsLow()) {
+ // Avoid using the 32-bit `cmp temp, #imm` in IT block by loading `size` into `temp2`.
+ __ Mov(temp2, size_operand);
+ }
+ __ Subs(temp, temp, index);
+ {
+ // Use ExactAssemblyScope here because we are using IT.
+ ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
+ 2 * k16BitT32InstructionSizeInBytes);
+ __ it(hs);
+ if (temp.IsLow()) {
+ __ cmp(hs, temp, size_operand);
+ } else {
+ __ cmp(hs, temp, temp2);
+ }
+ }
+ __ B(lo, GetEntryLabel());
+
+ // Construct the target.
+ __ Add(target.offset, index, data_offset.Int32Value()); // Note: `temp` cannot be used below.
+
+ // Alignment check. For unaligned access, go to the runtime.
+ DCHECK(IsPowerOfTwo(size));
+ __ Tst(target.offset, dchecked_integral_cast<int32_t>(size - 1u));
+ __ B(ne, GetEntryLabel());
+
+ // Byte order check. For native byte order return to the main path.
+ if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
+ HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u);
+ if (arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern()) {
+ // There is no reason to differentiate between native byte order and byte-swap
+ // for setting a zero bit pattern. Just return to the main path.
+ __ B(GetNativeByteOrderLabel());
+ return;
+ }
+ }
+ __ Ldr(temp2, MemOperand(varhandle, native_byte_order_offset.Int32Value()));
+ __ Cmp(temp2, 0);
+ __ B(ne, GetNativeByteOrderLabel());
+ }
+
+ switch (access_mode_template) {
+ case mirror::VarHandle::AccessModeTemplate::kGet:
+ GenerateVarHandleGet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
+ break;
+ case mirror::VarHandle::AccessModeTemplate::kSet:
+ GenerateVarHandleSet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
+ break;
+ case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
+ case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
+ GenerateVarHandleCompareAndSetOrExchange(
+ invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
+ break;
+ case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
+ GenerateVarHandleGetAndUpdate(
+ invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
+ break;
+ }
+ __ B(GetExitLabel());
+}
+
UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)