summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
author Santiago Aboy Solanes <solanes@google.com> 2024-09-16 16:54:52 +0100
committer Santiago Aboy Solanes <solanes@google.com> 2024-09-24 16:28:22 +0000
commit9ff2b617341bfe574e2d8706553f0cd65a1a2fc8 (patch)
tree5a3ed4d9c534b424c4b39a42d58375d9a0209b22 /compiler/optimizing
parenta172151731d843be55e2b357bf58d26fe1aee75c (diff)
Add VarHandle implementations for void getAndUpdate methods
We were missing to intrinsify getAndUpdate methods whose value was unused. By adding this support, they get a considerable runtime improvement. Bug: 362916226 Test: art/test/testrunner/testrunner.py --host --64 --optimizing -b Test: art/test/testrunner/testrunner.py --host --32 --optimizing -b Test: art/test/testrunner/testrunner.py --target --64 --optimizing Test: art/test/testrunner/testrunner.py --target --32 --optimizing Change-Id: I63d5a897ac9ea3c133167ac186afd8595f75bb13
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/instruction_builder.cc5
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc142
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc173
-rw-r--r--compiler/optimizing/intrinsics_riscv64.cc129
-rw-r--r--compiler/optimizing/intrinsics_utils.h2
-rw-r--r--compiler/optimizing/intrinsics_x86.cc162
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc193
7 files changed, 555 insertions, 251 deletions
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index a679f88a58..09d7c23fed 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -26,8 +26,8 @@
#include "data_type-inl.h"
#include "dex/bytecode_utils.h"
#include "dex/dex_instruction-inl.h"
-#include "driver/dex_compilation_unit.h"
#include "driver/compiler_options.h"
+#include "driver/dex_compilation_unit.h"
#include "entrypoints/entrypoint_utils-inl.h"
#include "imtable-inl.h"
#include "intrinsics.h"
@@ -36,6 +36,7 @@
#include "jit/profiling_info.h"
#include "mirror/dex_cache.h"
#include "oat/oat_file.h"
+#include "optimizing/data_type.h"
#include "optimizing_compiler_stats.h"
#include "reflective_handle_scope-inl.h"
#include "scoped_thread_state_change-inl.h"
@@ -1357,7 +1358,7 @@ static void DecideVarHandleIntrinsic(HInvoke* invoke) {
optimizations.SetDoNotIntrinsify();
return;
}
- if (value_type != return_type) {
+ if (value_type != return_type && return_type != DataType::Type::kVoid) {
optimizations.SetDoNotIntrinsify();
return;
}
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 71ef84e1aa..4af63fa906 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -34,6 +34,7 @@
#include "mirror/reference.h"
#include "mirror/string-inl.h"
#include "mirror/var_handle.h"
+#include "optimizing/data_type.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
#include "utils/arm64/assembler_arm64.h"
@@ -1762,17 +1763,33 @@ static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
locations->SetInAt(3, Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ // Request another temporary register for methods that don't return a value.
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ if (is_void) {
+ locations->AddTemp(Location::RequiresRegister());
+ } else {
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
}
static void GenUnsafeGetAndUpdate(HInvoke* invoke,
DataType::Type type,
CodeGeneratorARM64* codegen,
GetAndUpdateOp get_and_update_op) {
+ // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check
+ // before using.
+ DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet);
+
MacroAssembler* masm = codegen->GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
- Register out = RegisterFrom(locations->Out(), type); // Result.
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ // We use a temporary for void methods, as we don't return the value.
+ Location out_or_temp_loc =
+ is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
+ Register out_or_temp = RegisterFrom(out_or_temp_loc, type); // Result.
Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
Register arg = RegisterFrom(locations->InAt(3), type); // New value or addend.
@@ -1793,20 +1810,19 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke,
std::memory_order_seq_cst,
tmp_ptr,
arg,
- /*old_value=*/ out);
+ /*old_value=*/ out_or_temp);
- if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+ if (!is_void && type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
if (kUseBakerReadBarrier) {
- codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), out.W());
+ codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out_or_temp.W(), out_or_temp.W());
} else {
- codegen->GenerateReadBarrierSlow(
- invoke,
- Location::RegisterLocation(out.GetCode()),
- Location::RegisterLocation(out.GetCode()),
- Location::RegisterLocation(base.GetCode()),
- /*offset=*/ 0u,
- /*index=*/ Location::RegisterLocation(offset.GetCode()));
+ codegen->GenerateReadBarrierSlow(invoke,
+ Location::RegisterLocation(out_or_temp.GetCode()),
+ Location::RegisterLocation(out_or_temp.GetCode()),
+ Location::RegisterLocation(base.GetCode()),
+ /*offset=*/ 0u,
+ /*index=*/ Location::RegisterLocation(offset.GetCode()));
}
}
}
@@ -5420,18 +5436,20 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
return;
}
- if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
+ // Get the type from the shorty as the invokes may not return a value.
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
+ uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
+ size_t old_temp_count = locations->GetTempCount();
+ if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field, thus seeing the new value
// that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
return;
}
- LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
-
- size_t old_temp_count = locations->GetTempCount();
DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
- if (DataType::IsFloatingPointType(invoke->GetType())) {
+ if (DataType::IsFloatingPointType(value_type)) {
if (get_and_update_op == GetAndUpdateOp::kAdd) {
// For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister());
@@ -5451,12 +5469,23 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
(get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) &&
GetExpectedVarHandleCoordinatesCount(invoke) == 2u &&
!IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
- DataType::Type value_type =
- GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) {
locations->AddTemp(Location::RequiresRegister());
}
}
+
+ // Request another temporary register for methods that don't return a value.
+ // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`.
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
+ if (is_void) {
+ if (DataType::IsFloatingPointType(value_type)) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ }
}
static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
@@ -5464,6 +5493,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
GetAndUpdateOp get_and_update_op,
std::memory_order order,
bool byte_swap = false) {
+ // Get the type from the shorty as the invokes may not return a value.
uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
bool is_fp = DataType::IsFloatingPointType(value_type);
@@ -5473,7 +5503,13 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
CPURegister arg = (is_fp && get_and_update_op == GetAndUpdateOp::kAdd)
? InputCPURegisterAt(invoke, arg_index)
: InputCPURegisterOrZeroRegAt(invoke, arg_index);
- CPURegister out = helpers::OutputCPURegister(invoke);
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
+ // We use a temporary for void methods, as we don't return the value.
+ CPURegister out_or_temp =
+ is_void ? CPURegisterFrom(locations->GetTemp(locations->GetTempCount() - 1u), value_type) :
+ helpers::OutputCPURegister(invoke);
VarHandleTarget target = GetVarHandleTarget(invoke);
VarHandleSlowPathARM64* slow_path = nullptr;
@@ -5516,7 +5552,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
}
// Prepare register for old value.
- CPURegister old_value = out;
+ CPURegister old_value = out_or_temp;
if (get_and_update_op == GetAndUpdateOp::kSet) {
// For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
// rather than moving between core and FP registers in the loop.
@@ -5554,36 +5590,40 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
GenerateGetAndUpdate(codegen, get_and_update_op, load_store_type, order, tmp_ptr, arg, old_value);
- if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
- // The only adjustment needed is sign-extension for `kInt16`.
- // Everything else has been done by the `GenerateGetAndUpdate()`.
- DCHECK(byte_swap);
- if (value_type == DataType::Type::kInt16) {
- DCHECK_EQ(load_store_type, DataType::Type::kUint16);
- __ Sxth(out.W(), old_value.W());
- }
- } else if (byte_swap) {
- // Also handles moving to FP registers.
- GenerateReverseBytes(masm, value_type, old_value, out);
- } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat64) {
- __ Fmov(out.D(), old_value.X());
- } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat32) {
- __ Fmov(out.S(), old_value.W());
- } else if (value_type == DataType::Type::kInt8) {
- __ Sxtb(out.W(), old_value.W());
- } else if (value_type == DataType::Type::kInt16) {
- __ Sxth(out.W(), old_value.W());
- } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
- if (kUseBakerReadBarrier) {
- codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), old_value.W());
- } else {
- codegen->GenerateReadBarrierSlow(
- invoke,
- Location::RegisterLocation(out.GetCode()),
- Location::RegisterLocation(old_value.GetCode()),
- Location::RegisterLocation(target.object.GetCode()),
- /*offset=*/ 0u,
- /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
+ if (!is_void) {
+ if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
+ // The only adjustment needed is sign-extension for `kInt16`.
+ // Everything else has been done by the `GenerateGetAndUpdate()`.
+ DCHECK(byte_swap);
+ if (value_type == DataType::Type::kInt16) {
+ DCHECK_EQ(load_store_type, DataType::Type::kUint16);
+ __ Sxth(out_or_temp.W(), old_value.W());
+ }
+ } else if (byte_swap) {
+ // Also handles moving to FP registers.
+ GenerateReverseBytes(masm, value_type, old_value, out_or_temp);
+ } else if (get_and_update_op == GetAndUpdateOp::kSet &&
+ value_type == DataType::Type::kFloat64) {
+ __ Fmov(out_or_temp.D(), old_value.X());
+ } else if (get_and_update_op == GetAndUpdateOp::kSet &&
+ value_type == DataType::Type::kFloat32) {
+ __ Fmov(out_or_temp.S(), old_value.W());
+ } else if (value_type == DataType::Type::kInt8) {
+ __ Sxtb(out_or_temp.W(), old_value.W());
+ } else if (value_type == DataType::Type::kInt16) {
+ __ Sxth(out_or_temp.W(), old_value.W());
+ } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+ if (kUseBakerReadBarrier) {
+ codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out_or_temp.W(), old_value.W());
+ } else {
+ codegen->GenerateReadBarrierSlow(
+ invoke,
+ Location::RegisterLocation(out_or_temp.GetCode()),
+ Location::RegisterLocation(old_value.GetCode()),
+ Location::RegisterLocation(target.object.GetCode()),
+ /*offset=*/0u,
+ /*index=*/Location::RegisterLocation(target.offset.GetCode()));
+ }
}
}
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 1b1711b9de..7f89f56d0c 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -16,6 +16,7 @@
#include "intrinsics_arm_vixl.h"
+#include "aarch32/constants-aarch32.h"
#include "arch/arm/callee_save_frame_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "art_method.h"
@@ -30,12 +31,11 @@
#include "mirror/object_array-inl.h"
#include "mirror/reference.h"
#include "mirror/string-inl.h"
+#include "optimizing/data_type.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
#include "well_known_classes.h"
-#include "aarch32/constants-aarch32.h"
-
namespace art HIDDEN {
namespace arm {
@@ -3871,9 +3871,15 @@ static void CreateUnsafeGetAndUpdateLocations(HInvoke* invoke,
locations->SetInAt(2, Location::RequiresRegister());
locations->SetInAt(3, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-
+ // Request another temporary register for methods that don't return a value.
size_t num_temps = 1u; // We always need `tmp_ptr`.
+ const bool is_void = invoke->GetType() == DataType::Type::kVoid;
+ if (is_void) {
+ num_temps++;
+ } else {
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
+
if (get_and_update_op == GetAndUpdateOp::kAdd) {
// Add `maybe_temp` used for the new value in `GenerateGetAndUpdate()`.
num_temps += (type == DataType::Type::kInt64) ? 2u : 1u;
@@ -3894,10 +3900,18 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke,
CodeGeneratorARMVIXL* codegen,
DataType::Type type,
GetAndUpdateOp get_and_update_op) {
+ // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check
+ // before using.
+ DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet);
+
ArmVIXLAssembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
- Location out = locations->Out(); // Result.
+ const bool is_void = invoke->GetType() == DataType::Type::kVoid;
+
+ // We use a temporary for void methods, as we don't return the value.
+ Location out_or_temp =
+ is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
Location arg = locations->InAt(3); // New value or addend.
@@ -3930,24 +3944,24 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke,
type,
tmp_ptr,
arg,
- /*old_value=*/ out,
+ /*old_value=*/ out_or_temp,
/*store_result=*/ temp,
maybe_temp,
/*maybe_vreg_temp=*/ Location::NoLocation());
codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
- if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+ if (!is_void && type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
if (kUseBakerReadBarrier) {
- codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out), RegisterFrom(out));
+ codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out_or_temp),
+ RegisterFrom(out_or_temp));
} else {
- codegen->GenerateReadBarrierSlow(
- invoke,
- out,
- out,
- Location::RegisterLocation(base.GetCode()),
- /*offset=*/ 0u,
- /*index=*/ Location::RegisterLocation(offset.GetCode()));
+ codegen->GenerateReadBarrierSlow(invoke,
+ out_or_temp,
+ out_or_temp,
+ Location::RegisterLocation(base.GetCode()),
+ /*offset=*/ 0u,
+ /*index=*/ Location::RegisterLocation(offset.GetCode()));
}
}
}
@@ -4656,6 +4670,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke,
LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
+ // Get the type from the shorty as the invokes may not return a value.
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
if (DataType::Is64BitType(value_type)) {
@@ -5163,7 +5178,10 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
return;
}
- if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
+ // Get the type from the shorty as the invokes may not return a value.
+ uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
+ if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field, thus seeing the new value
// that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
@@ -5177,7 +5195,6 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
DCHECK_EQ(locations->GetTempCount(),
(GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
- DataType::Type value_type = invoke->GetType();
if (get_and_update_op == GetAndUpdateOp::kSet) {
if (DataType::IsFloatingPointType(value_type)) {
// Add temps needed to do the GenerateGetAndUpdate() with core registers.
@@ -5211,6 +5228,23 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
locations->AddTemp(Location::RequiresFpuRegister());
}
}
+
+ // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`.
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
+ if (is_void) {
+ if (DataType::IsFloatingPointType(value_type)) {
+ // Note: This shall allocate a D register. There is no way to request an S register.
+ locations->AddTemp(Location::RequiresFpuRegister());
+ } else if (DataType::Is64BitType(value_type)) {
+ // We need two for non-fpu 64 bit types.
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ }
}
static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
@@ -5218,13 +5252,38 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
GetAndUpdateOp get_and_update_op,
std::memory_order order,
bool byte_swap = false) {
+ // Get the type from the shorty as the invokes may not return a value.
uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
ArmVIXLAssembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
Location arg = locations->InAt(arg_index);
- Location out = locations->Out();
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
+
+ size_t temps_that_mimic_out;
+ Location result;
+ const size_t temp_count = locations->GetTempCount();
+ if (is_void) {
+ if (value_type == DataType::Type::kFloat32) {
+ // Note: Since we allocated a D register, use the low part.
+ DCHECK(locations->GetTemp(temp_count - 1u).IsFpuRegisterPair());
+ temps_that_mimic_out = 1u;
+ result = locations->GetTemp(temp_count - 1u).ToLow();
+ } else if (!DataType::IsFloatingPointType(value_type) && DataType::Is64BitType(value_type)) {
+ temps_that_mimic_out = 2u;
+ result = LocationFrom(RegisterFrom(locations->GetTemp(temp_count - 2u)),
+ RegisterFrom(locations->GetTemp(temp_count - 1u)));
+ } else {
+ temps_that_mimic_out = 1u;
+ result = locations->GetTemp(temp_count - 1u);
+ }
+ } else {
+ temps_that_mimic_out = 0u;
+ result = locations->Out();
+ }
VarHandleTarget target = GetVarHandleTarget(invoke);
VarHandleSlowPathARMVIXL* slow_path = nullptr;
@@ -5261,7 +5320,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
: value_type;
// Prepare register for old value and temporaries if any.
- Location old_value = out;
+ Location old_value = result;
Location maybe_temp = Location::NoLocation();
Location maybe_vreg_temp = Location::NoLocation();
if (get_and_update_op == GetAndUpdateOp::kSet) {
@@ -5269,7 +5328,9 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
// rather than moving between core and FP registers in the loop.
if (value_type == DataType::Type::kFloat64) {
vixl32::DRegister arg_vreg = DRegisterFrom(arg);
- DCHECK_EQ(locations->GetTempCount(), 5u); // `store_result` and the four here.
+ // `store_result` and the four here, plus maybe an extra one for the temp that mimics the
+ // "out" register.
+ DCHECK_EQ(temp_count, 5u + temps_that_mimic_out);
old_value =
LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)));
arg = LocationFrom(RegisterFrom(locations->GetTemp(3)), RegisterFrom(locations->GetTemp(4)));
@@ -5281,7 +5342,9 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
}
} else if (value_type == DataType::Type::kFloat32) {
vixl32::SRegister arg_vreg = SRegisterFrom(arg);
- DCHECK_EQ(locations->GetTempCount(), 3u); // `store_result` and the two here.
+ // `store_result` and the two here, plus maybe an extra one for the temp that mimics the
+ // "out" register.
+ DCHECK_EQ(temp_count, 3u + temps_that_mimic_out);
old_value = locations->GetTemp(1);
arg = locations->GetTemp(2);
__ Vmov(RegisterFrom(arg), arg_vreg);
@@ -5293,7 +5356,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
// Load the old value initially to a temporary register.
// We shall move it to `out` later with a read barrier.
old_value = LocationFrom(store_result);
- store_result = RegisterFrom(out); // Use the `out` for the exclusive store result.
+ store_result = RegisterFrom(result); // Use `result` for the exclusive store result.
} else {
// The store_result is a separate temporary.
DCHECK(!store_result.Is(target.object));
@@ -5305,7 +5368,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
if (value_type == DataType::Type::kInt64) {
arg = LocationFrom(RegisterFrom(arg), RegisterFrom(locations->GetTemp(2)));
// Swap the high/low regs and reverse the bytes in each after the load.
- old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
+ old_value = LocationFrom(HighRegisterFrom(result), LowRegisterFrom(result));
}
GenerateReverseBytes(assembler, value_type, original_arg, arg);
}
@@ -5315,7 +5378,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
: locations->GetTemp(1);
DCHECK(!maybe_temp.Contains(LocationFrom(store_result)));
if (DataType::IsFloatingPointType(value_type)) {
- maybe_vreg_temp = locations->GetTemp(locations->GetTempCount() - 1u);
+ maybe_vreg_temp = locations->GetTemp(temp_count - 1u - temps_that_mimic_out);
DCHECK(maybe_vreg_temp.IsFpuRegisterPair());
}
if (byte_swap) {
@@ -5324,7 +5387,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
} else if (value_type == DataType::Type::kInt64) {
// Swap the high/low regs and reverse the bytes in each after the load.
- old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
+ old_value = LocationFrom(HighRegisterFrom(result), LowRegisterFrom(result));
// Due to lack of registers, reverse bytes in `arg` and undo that later.
GenerateReverseBytesInPlaceForEachWord(assembler, arg);
arg = LocationFrom(HighRegisterFrom(arg), LowRegisterFrom(arg));
@@ -5353,36 +5416,38 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
}
- if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) {
- if (value_type == DataType::Type::kInt64) {
- GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
- if (get_and_update_op != GetAndUpdateOp::kSet) {
- // Undo byte swapping in `arg`. We do not have the information
- // whether the value in these registers shall be needed later.
- GenerateReverseBytesInPlaceForEachWord(assembler, arg);
+ if (!is_void) {
+ if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) {
+ if (value_type == DataType::Type::kInt64) {
+ GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
+ if (get_and_update_op != GetAndUpdateOp::kSet) {
+ // Undo byte swapping in `arg`. We do not have the information
+ // whether the value in these registers shall be needed later.
+ GenerateReverseBytesInPlaceForEachWord(assembler, arg);
+ }
+ } else {
+ GenerateReverseBytes(assembler, value_type, old_value, result);
+ }
+ } else if (get_and_update_op == GetAndUpdateOp::kSet &&
+ DataType::IsFloatingPointType(value_type)) {
+ if (value_type == DataType::Type::kFloat64) {
+ __ Vmov(DRegisterFrom(result), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
+ } else {
+ __ Vmov(SRegisterFrom(result), RegisterFrom(old_value));
+ }
+ } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+ if (kUseBakerReadBarrier) {
+ codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(result),
+ RegisterFrom(old_value));
+ } else {
+ codegen->GenerateReadBarrierSlow(
+ invoke,
+ Location::RegisterLocation(RegisterFrom(result).GetCode()),
+ Location::RegisterLocation(RegisterFrom(old_value).GetCode()),
+ Location::RegisterLocation(target.object.GetCode()),
+ /*offset=*/ 0u,
+ /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
}
- } else {
- GenerateReverseBytes(assembler, value_type, old_value, out);
- }
- } else if (get_and_update_op == GetAndUpdateOp::kSet &&
- DataType::IsFloatingPointType(value_type)) {
- if (value_type == DataType::Type::kFloat64) {
- __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
- } else {
- __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
- }
- } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
- if (kUseBakerReadBarrier) {
- codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out),
- RegisterFrom(old_value));
- } else {
- codegen->GenerateReadBarrierSlow(
- invoke,
- Location::RegisterLocation(RegisterFrom(out).GetCode()),
- Location::RegisterLocation(RegisterFrom(old_value).GetCode()),
- Location::RegisterLocation(target.object.GetCode()),
- /*offset=*/ 0u,
- /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
}
}
diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc
index 2bc8cad71a..f078beb1a4 100644
--- a/compiler/optimizing/intrinsics_riscv64.cc
+++ b/compiler/optimizing/intrinsics_riscv64.cc
@@ -2955,20 +2955,35 @@ static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
locations->SetInAt(2, Location::RequiresRegister());
locations->SetInAt(3, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ // Request another temporary register for methods that don't return a value.
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ if (is_void) {
+ locations->AddTemp(Location::RequiresRegister());
+ } else {
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
}
static void GenUnsafeGetAndUpdate(HInvoke* invoke,
DataType::Type type,
CodeGeneratorRISCV64* codegen,
GetAndUpdateOp get_and_update_op) {
+ // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check
+ // before using.
+ DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet);
+
Riscv64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
- Location out_loc = locations->Out();
- XRegister out = out_loc.AsRegister<XRegister>(); // Result.
- XRegister base = locations->InAt(1).AsRegister<XRegister>(); // Object pointer.
- XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset.
- XRegister arg = locations->InAt(3).AsRegister<XRegister>(); // New value or addend.
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ // We use a temporary for void methods, as we don't return the value.
+ Location out_or_temp_loc =
+ is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
+ XRegister out_or_temp = out_or_temp_loc.AsRegister<XRegister>(); // Result.
+ XRegister base = locations->InAt(1).AsRegister<XRegister>(); // Object pointer.
+ XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset.
+ XRegister arg = locations->InAt(3).AsRegister<XRegister>(); // New value or addend.
// This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
if (type == DataType::Type::kReference) {
@@ -2987,28 +3002,28 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke,
std::memory_order_seq_cst,
tmp_ptr,
arg,
- /*old_value=*/ out,
+ /*old_value=*/ out_or_temp,
/*mask=*/ kNoXRegister,
/*temp=*/ kNoXRegister);
- if (type == DataType::Type::kReference) {
- __ ZextW(out, out);
+ if (!is_void && type == DataType::Type::kReference) {
+ __ ZextW(out_or_temp, out_or_temp);
if (codegen->EmitReadBarrier()) {
DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
if (kUseBakerReadBarrier) {
// Use RA as temp. It is clobbered in the slow path anyway.
static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
- SlowPathCodeRISCV64* rb_slow_path =
- codegen->AddGcRootBakerBarrierBarrierSlowPath(invoke, out_loc, kBakerReadBarrierTemp);
- codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out_loc, kBakerReadBarrierTemp);
+ SlowPathCodeRISCV64* rb_slow_path = codegen->AddGcRootBakerBarrierBarrierSlowPath(
+ invoke, out_or_temp_loc, kBakerReadBarrierTemp);
+ codegen->EmitBakerReadBarierMarkingCheck(
+ rb_slow_path, out_or_temp_loc, kBakerReadBarrierTemp);
} else {
- codegen->GenerateReadBarrierSlow(
- invoke,
- out_loc,
- out_loc,
- Location::RegisterLocation(base),
- /*offset=*/ 0u,
- /*index=*/ Location::RegisterLocation(offset));
+ codegen->GenerateReadBarrierSlow(invoke,
+ out_or_temp_loc,
+ out_or_temp_loc,
+ Location::RegisterLocation(base),
+ /*offset=*/ 0u,
+ /*index=*/ Location::RegisterLocation(offset));
}
}
}
@@ -4450,7 +4465,11 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
return;
}
- if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
+ // Get the type from the shorty as the invokes may not return a value.
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
+ uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
+ if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field, thus seeing the new value
// that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
@@ -4458,15 +4477,10 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
}
// TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
- if (kPoisonHeapReferences && invoke->GetType() == DataType::Type::kReference) {
+ if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
return;
}
- LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
- uint32_t arg_index = invoke->GetNumberOfArguments() - 1u;
- DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
- DataType::Type value_type = invoke->GetType();
- DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index));
Location arg = locations->InAt(arg_index);
bool is_fp = DataType::IsFloatingPointType(value_type);
@@ -4517,6 +4531,19 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
if (temps_needed > old_temp_count + scratch_registers_available) {
locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available));
}
+
+ // Request another temporary register for methods that don't return a value.
+ // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`.
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
+ if (is_void) {
+ if (DataType::IsFloatingPointType(value_type)) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ }
}
static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
@@ -4524,16 +4551,21 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
GetAndUpdateOp get_and_update_op,
std::memory_order order,
bool byte_swap = false) {
+ // Get the type from the shorty as the invokes may not return a value.
uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
- DataType::Type value_type = invoke->GetType();
- DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index));
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
Riscv64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
Location arg = locations->InAt(arg_index);
DCHECK_IMPLIES(arg.IsConstant(), arg.GetConstant()->IsZeroBitPattern());
- Location out = locations->Out();
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
+ // We use a temporary for void methods, as we don't return the value.
+ Location out_or_temp =
+ is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
VarHandleTarget target = GetVarHandleTarget(invoke);
VarHandleSlowPathRISCV64* slow_path = nullptr;
@@ -4589,6 +4621,8 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
available_scratch_registers -= 1u;
return srs.AllocateXRegister();
} else {
+ DCHECK_IMPLIES(is_void, next_temp != locations->GetTempCount() - 1u)
+ << "The last temp is special for the void case, as it represents the out register.";
XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
next_temp += 1u;
return temp;
@@ -4678,24 +4712,24 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
codegen->GetInstructionVisitor()->Load(
Location::RegisterLocation(old_value), tmp_ptr, /*offset=*/ 0, op_type);
if (byte_swap) {
- GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type);
+ GenerateByteSwapAndExtract(codegen, out_or_temp, old_value, shift, value_type);
} else {
DCHECK(is_fp);
- codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
+ codegen->MoveLocation(out_or_temp, Location::RegisterLocation(old_value), value_type);
}
if (is_fp) {
codegen->GetInstructionVisitor()->FAdd(
- ftmp, out.AsFpuRegister<FRegister>(), arg.AsFpuRegister<FRegister>(), value_type);
+ ftmp, out_or_temp.AsFpuRegister<FRegister>(), arg.AsFpuRegister<FRegister>(), value_type);
codegen->MoveLocation(
Location::RegisterLocation(new_value), Location::FpuRegisterLocation(ftmp), op_type);
} else if (arg.IsConstant()) {
DCHECK(arg.GetConstant()->IsZeroBitPattern());
- __ Mv(new_value, out.AsRegister<XRegister>());
+ __ Mv(new_value, out_or_temp.AsRegister<XRegister>());
} else if (value_type == DataType::Type::kInt64) {
- __ Add(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
+ __ Add(new_value, out_or_temp.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
} else {
DCHECK_EQ(op_type, DataType::Type::kInt32);
- __ Addw(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
+ __ Addw(new_value, out_or_temp.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
}
if (byte_swap) {
DataType::Type swap_type = op_type;
@@ -4706,7 +4740,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
// To update the 16 bits, we can XOR the new value with the `out`, byte swap as Uint16
// (extracting only the bits we want to update), shift and XOR with the old value.
swap_type = DataType::Type::kUint16;
- __ Xor(new_value, new_value, out.AsRegister<XRegister>());
+ __ Xor(new_value, new_value, out_or_temp.AsRegister<XRegister>());
}
GenerateReverseBytes(codegen, Location::RegisterLocation(new_value), new_value, swap_type);
if (is_small) {
@@ -4727,15 +4761,15 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
store_result,
/*expected=*/ old_value);
} else {
- XRegister old_value = is_fp ? get_temp() : out.AsRegister<XRegister>();
+ XRegister old_value = is_fp ? get_temp() : out_or_temp.AsRegister<XRegister>();
GenerateGetAndUpdate(
codegen, get_and_update_op, op_type, order, tmp_ptr, arg_reg, old_value, mask, temp);
if (byte_swap) {
- DCHECK_IMPLIES(is_small, out.AsRegister<XRegister>() == old_value)
- << " " << value_type << " " << out.AsRegister<XRegister>() << "!=" << old_value;
- GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type);
+ DCHECK_IMPLIES(is_small, out_or_temp.AsRegister<XRegister>() == old_value)
+ << " " << value_type << " " << out_or_temp.AsRegister<XRegister>() << "!=" << old_value;
+ GenerateByteSwapAndExtract(codegen, out_or_temp, old_value, shift, value_type);
} else if (is_fp) {
- codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
+ codegen->MoveLocation(out_or_temp, Location::RegisterLocation(old_value), value_type);
} else if (is_small) {
__ Srlw(old_value, old_value, shift);
DCHECK_NE(value_type, DataType::Type::kUint8);
@@ -4754,14 +4788,14 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
if (codegen->EmitBakerReadBarrier()) {
// Use RA as temp. It is clobbered in the slow path anyway.
static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
- SlowPathCodeRISCV64* rb_slow_path =
- codegen->AddGcRootBakerBarrierBarrierSlowPath(invoke, out, kBakerReadBarrierTemp);
- codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out, kBakerReadBarrierTemp);
+ SlowPathCodeRISCV64* rb_slow_path = codegen->AddGcRootBakerBarrierBarrierSlowPath(
+ invoke, out_or_temp, kBakerReadBarrierTemp);
+ codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out_or_temp, kBakerReadBarrierTemp);
} else if (codegen->EmitNonBakerReadBarrier()) {
Location base_loc = Location::RegisterLocation(target.object);
Location index = Location::RegisterLocation(target.offset);
SlowPathCodeRISCV64* rb_slow_path = codegen->AddReadBarrierSlowPath(
- invoke, out, out, base_loc, /*offset=*/ 0u, index);
+ invoke, out_or_temp, out_or_temp, base_loc, /*offset=*/ 0u, index);
__ J(rb_slow_path->GetEntryLabel());
__ Bind(rb_slow_path->GetExitLabel());
}
@@ -4775,8 +4809,11 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
// Check that we have allocated the right number of temps. We may need more registers
// for byte swapped CAS in the slow path, so skip this check for the main path in that case.
+ // In the void case, we requested an extra register to mimic the `out` register.
+ const size_t extra_temp_registers = is_void ? 1u : 0u;
bool has_byte_swap = (arg_index == 3u) && (!is_reference && data_size != 1u);
- if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) {
+ if ((!has_byte_swap || byte_swap) &&
+ next_temp != locations->GetTempCount() - extra_temp_registers) {
// We allocate a temporary register for the class object for a static field `VarHandle` but
// we do not update the `next_temp` if it's otherwise unused after the address calculation.
CHECK_EQ(arg_index, 1u);
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index 13d9bc4b68..6c08cea3f8 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -111,7 +111,7 @@ static inline size_t GetExpectedVarHandleCoordinatesCount(HInvoke *invoke) {
}
static inline DataType::Type GetDataTypeFromShorty(HInvoke* invoke, uint32_t index) {
- DCHECK(invoke->IsInvokePolymorphic());
+ DCHECK(invoke->IsInvokePolymorphic()) << *invoke;
const DexFile* dex_file = invoke->GetMethodReference().dex_file;
const char* shorty = dex_file->GetShorty(invoke->AsInvokePolymorphic()->GetProtoIndex());
DCHECK_LT(index, strlen(shorty));
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 2b83ba8349..dc88de474c 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -34,6 +34,7 @@
#include "mirror/reference.h"
#include "mirror/string.h"
#include "mirror/var_handle.h"
+#include "optimizing/data_type.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
#include "utils/x86/assembler_x86.h"
@@ -2489,6 +2490,7 @@ void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ const bool is_void = invoke->GetType() == DataType::Type::kVoid;
if (type == DataType::Type::kInt64) {
// Explicitly allocate all registers.
locations->SetInAt(1, Location::RegisterLocation(EBP));
@@ -2502,14 +2504,23 @@ void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
locations->SetInAt(2, Location::RegisterPairLocation(ESI, EDI));
locations->SetInAt(3, Location::RegisterPairLocation(EBX, ECX));
}
- locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kOutputOverlap);
+ if (is_void) {
+ locations->AddTemp(Location::RegisterLocation(EAX));
+ locations->AddTemp(Location::RegisterLocation(EDX));
+ } else {
+ locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kOutputOverlap);
+ }
} else {
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
// Use the same register for both the output and the new value or addend
// to take advantage of XCHG or XADD. Arbitrarily pick EAX.
locations->SetInAt(3, Location::RegisterLocation(EAX));
- locations->SetOut(Location::RegisterLocation(EAX));
+ // Only set the `out` register if it's needed. In the void case we can still use EAX in the
+ // same manner as it is used an as `in` register.
+ if (!is_void) {
+ locations->SetOut(Location::RegisterLocation(EAX));
+ }
}
}
@@ -2573,14 +2584,20 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke,
X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
LocationSummary* locations = invoke->GetLocations();
- Location out = locations->Out(); // Result.
+ const bool is_void = invoke->GetType() == DataType::Type::kVoid;
+ // We use requested specific registers to use as temps for void methods, as we don't return the
+ // value.
+ Location out_or_temp =
+ is_void ? (type == DataType::Type::kInt64 ? Location::RegisterPairLocation(EAX, EDX) :
+ Location::RegisterLocation(EAX)) :
+ locations->Out();
Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
Location offset = locations->InAt(2); // Long offset.
Location arg = locations->InAt(3); // New value or addend.
if (type == DataType::Type::kInt32) {
- DCHECK(out.Equals(arg));
- Register out_reg = out.AsRegister<Register>();
+ DCHECK(out_or_temp.Equals(arg));
+ Register out_reg = out_or_temp.AsRegister<Register>();
Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
if (get_and_update_op == GetAndUpdateOp::kAdd) {
__ LockXaddl(field_address, out_reg);
@@ -2621,7 +2638,7 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke,
} else {
DCHECK_EQ(type, DataType::Type::kReference);
DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
- Register out_reg = out.AsRegister<Register>();
+ Register out_reg = out_or_temp.AsRegister<Register>();
Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
Register temp1 = locations->GetTemp(0).AsRegister<Register>();
Register temp2 = locations->GetTemp(1).AsRegister<Register>();
@@ -2650,8 +2667,10 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke,
__ movl(temp1, out_reg);
__ PoisonHeapReference(temp1);
__ xchgl(temp1, field_address);
- __ UnpoisonHeapReference(temp1);
- __ movl(out_reg, temp1);
+ if (!is_void) {
+ __ UnpoisonHeapReference(temp1);
+ __ movl(out_reg, temp1);
+ }
} else {
__ xchgl(out_reg, field_address);
}
@@ -4237,9 +4256,13 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86*
return;
}
+ // Get the type from the shorty as the invokes may not return a value.
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
uint32_t value_index = number_of_arguments - 1;
DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
if (DataType::Is64BitType(value_type)) {
// We avoid the case of an Int64/Float64 value because we would need to place it in a register
@@ -4266,10 +4289,17 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86*
if (value_type == DataType::Type::kFloat32) {
locations->AddTemp(Location::RegisterLocation(EAX));
locations->SetInAt(value_index, Location::FpuRegisterOrConstant(invoke->InputAt(value_index)));
- locations->SetOut(Location::RequiresFpuRegister());
+ // Only set the `out` register if it's needed. In the void case, we will not use `out`.
+ if (!is_void) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ }
} else {
locations->SetInAt(value_index, Location::RegisterLocation(EAX));
- locations->SetOut(Location::RegisterLocation(EAX));
+ // Only set the `out` register if it's needed. In the void case we can still use EAX in the
+ // same manner as it is used an as `in` register.
+ if (!is_void) {
+ locations->SetOut(Location::RegisterLocation(EAX));
+ }
}
}
@@ -4283,6 +4313,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege
// The value we want to set is the last argument
uint32_t value_index = invoke->GetNumberOfArguments() - 1;
Location value = locations->InAt(value_index);
+ // Get the type from the shorty as the invokes may not return a value.
DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
Register temp = locations->GetTemp(1).AsRegister<Register>();
Register temp2 = locations->GetTemp(2).AsRegister<Register>();
@@ -4307,24 +4338,36 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege
// fields the object is in a separate register, it is safe to use the first temporary register.
temp = expected_coordinates_count == 1u ? temp : locations->GetTemp(3).AsRegister<Register>();
// No need for a lock prefix. `xchg` has an implicit lock when it is used with an address.
+
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
switch (value_type) {
case DataType::Type::kBool:
__ xchgb(value.AsRegister<ByteRegister>(), field_addr);
- __ movzxb(locations->Out().AsRegister<Register>(),
- locations->Out().AsRegister<ByteRegister>());
+ if (!is_void) {
+ __ movzxb(locations->Out().AsRegister<Register>(),
+ locations->Out().AsRegister<ByteRegister>());
+ }
break;
case DataType::Type::kInt8:
__ xchgb(value.AsRegister<ByteRegister>(), field_addr);
- __ movsxb(locations->Out().AsRegister<Register>(),
- locations->Out().AsRegister<ByteRegister>());
+ if (!is_void) {
+ __ movsxb(locations->Out().AsRegister<Register>(),
+ locations->Out().AsRegister<ByteRegister>());
+ }
break;
case DataType::Type::kUint16:
__ xchgw(value.AsRegister<Register>(), field_addr);
- __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
+ if (!is_void) {
+ __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
+ }
break;
case DataType::Type::kInt16:
__ xchgw(value.AsRegister<Register>(), field_addr);
- __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
+ if (!is_void) {
+ __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
+ }
break;
case DataType::Type::kInt32:
__ xchgl(value.AsRegister<Register>(), field_addr);
@@ -4332,7 +4375,9 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege
case DataType::Type::kFloat32:
codegen->Move32(Location::RegisterLocation(EAX), value);
__ xchgl(EAX, field_addr);
- __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
+ if (!is_void) {
+ __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
+ }
break;
case DataType::Type::kReference: {
if (codegen->EmitBakerReadBarrier()) {
@@ -4353,10 +4398,13 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege
__ movl(temp, value.AsRegister<Register>());
__ PoisonHeapReference(temp);
__ xchgl(temp, field_addr);
- __ UnpoisonHeapReference(temp);
- __ movl(locations->Out().AsRegister<Register>(), temp);
+ if (!is_void) {
+ __ UnpoisonHeapReference(temp);
+ __ movl(locations->Out().AsRegister<Register>(), temp);
+ }
} else {
- __ xchgl(locations->Out().AsRegister<Register>(), field_addr);
+ DCHECK_IMPLIES(!is_void, locations->Out().Equals(Location::RegisterLocation(EAX)));
+ __ xchgl(Location::RegisterLocation(EAX).AsRegister<Register>(), field_addr);
}
break;
}
@@ -4590,6 +4638,7 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86*
return;
}
+ // Get the type from the shorty as the invokes may not return a value.
// The last argument should be the value we intend to set.
uint32_t value_index = invoke->GetNumberOfArguments() - 1;
DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
@@ -4615,15 +4664,26 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86*
locations->AddTemp(Location::RequiresRegister());
}
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
+
if (DataType::IsFloatingPointType(value_type)) {
locations->AddTemp(Location::RequiresFpuRegister());
locations->AddTemp(Location::RegisterLocation(EAX));
locations->SetInAt(value_index, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
+ // Only set the `out` register if it's needed. In the void case, we do not use `out`.
+ if (!is_void) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ }
} else {
// xadd updates the register argument with the old value. ByteRegister required for xaddb.
locations->SetInAt(value_index, Location::RegisterLocation(EAX));
- locations->SetOut(Location::RegisterLocation(EAX));
+ // Only set the `out` register if it's needed. In the void case we can still use EAX in the
+ // same manner as it is used an as `in` register.
+ if (!is_void) {
+ locations->SetOut(Location::RegisterLocation(EAX));
+ }
}
}
@@ -4636,8 +4696,11 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codege
LocationSummary* locations = invoke->GetLocations();
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
uint32_t value_index = number_of_arguments - 1;
+ // Get the type from the shorty as the invokes may not return a value.
DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
- DCHECK_EQ(type, invoke->GetType());
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == type);
Location value_loc = locations->InAt(value_index);
Register temp = locations->GetTemp(0).AsRegister<Register>();
SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
@@ -4659,16 +4722,22 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codege
switch (type) {
case DataType::Type::kInt8:
__ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>());
- __ movsxb(locations->Out().AsRegister<Register>(),
- locations->Out().AsRegister<ByteRegister>());
+ if (!is_void) {
+ __ movsxb(locations->Out().AsRegister<Register>(),
+ locations->Out().AsRegister<ByteRegister>());
+ }
break;
case DataType::Type::kInt16:
__ LockXaddw(field_addr, value_loc.AsRegister<Register>());
- __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
+ if (!is_void) {
+ __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
+ }
break;
case DataType::Type::kUint16:
__ LockXaddw(field_addr, value_loc.AsRegister<Register>());
- __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
+ if (!is_void) {
+ __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
+ }
break;
case DataType::Type::kInt32:
__ LockXaddl(field_addr, value_loc.AsRegister<Register>());
@@ -4693,8 +4762,10 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codege
temp);
__ j(kNotZero, &try_again);
- // The old value is present in EAX.
- codegen->Move32(locations->Out(), eax);
+ if (!is_void) {
+ // The old value is present in EAX.
+ codegen->Move32(locations->Out(), eax);
+ }
break;
}
default:
@@ -4740,9 +4811,11 @@ static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGenerat
return;
}
+ // Get the type from the shorty as the invokes may not return a value.
// The last argument should be the value we intend to set.
uint32_t value_index = invoke->GetNumberOfArguments() - 1;
- if (DataType::Is64BitType(GetDataTypeFromShorty(invoke, value_index))) {
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
+ if (DataType::Is64BitType(value_type)) {
// We avoid the case of an Int64 value because we would need to place it in a register pair.
// If the slow path is taken, the ParallelMove might fail to move the pair according to the
// X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
@@ -4767,7 +4840,18 @@ static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGenerat
}
locations->SetInAt(value_index, Location::RegisterOrConstant(invoke->InputAt(value_index)));
- locations->SetOut(Location::RegisterLocation(EAX));
+
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
+ if (is_void) {
+ // Used as a temporary, even when we are not outputting it so reserve it. This has to be
+ // requested before the other temporary since there's variable number of temp registers and the
+ // other temp register is expected to be the last one.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+ } else {
+ locations->SetOut(Location::RegisterLocation(EAX));
+ }
}
static void GenerateBitwiseOp(HInvoke* invoke,
@@ -4805,9 +4889,12 @@ static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86*
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
+ // Get the type from the shorty as the invokes may not return a value.
uint32_t value_index = invoke->GetNumberOfArguments() - 1;
DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
- DCHECK_EQ(type, invoke->GetType());
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == type);
Register temp = locations->GetTemp(0).AsRegister<Register>();
SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
codegen->AddSlowPath(slow_path);
@@ -4826,8 +4913,9 @@ static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86*
DCHECK_NE(temp, reference);
Address field_addr(reference, offset, TIMES_1, 0);
- Register out = locations->Out().AsRegister<Register>();
- DCHECK_EQ(out, EAX);
+ Location eax_loc = Location::RegisterLocation(EAX);
+ Register eax = eax_loc.AsRegister<Register>();
+ DCHECK_IMPLIES(!is_void, locations->Out().Equals(eax_loc));
if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrRelease ||
invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorRelease ||
@@ -4838,12 +4926,12 @@ static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86*
NearLabel try_again;
__ Bind(&try_again);
// Place the expected value in EAX for cmpxchg
- codegen->LoadFromMemoryNoBarrier(type, locations->Out(), field_addr);
+ codegen->LoadFromMemoryNoBarrier(type, eax_loc, field_addr);
codegen->Move32(locations->GetTemp(0), locations->InAt(value_index));
- GenerateBitwiseOp(invoke, codegen, temp, out);
+ GenerateBitwiseOp(invoke, codegen, temp, eax);
GenPrimitiveLockedCmpxchg(type,
codegen,
- /* expected_value= */ locations->Out(),
+ /* expected_value= */ eax_loc,
/* new_value= */ locations->GetTemp(0),
reference,
offset);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 09074f39df..67ef16e4a6 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -27,8 +27,8 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "heap_poisoning.h"
-#include "intrinsics.h"
#include "intrinsic_objects.h"
+#include "intrinsics.h"
#include "intrinsics_utils.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
@@ -36,6 +36,7 @@
#include "mirror/reference.h"
#include "mirror/string.h"
#include "optimizing/code_generator.h"
+#include "optimizing/data_type.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
#include "utils/x86_64/assembler_x86_64.h"
@@ -2633,7 +2634,11 @@ static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
// Use the same register for both the output and the new value or addend
// to take advantage of XCHG or XADD. Arbitrarily pick RAX.
locations->SetInAt(3, Location::RegisterLocation(RAX));
- locations->SetOut(Location::RegisterLocation(RAX));
+ // Only set the `out` register if it's needed. In the void case we can still use RAX in the
+ // same manner as it is used an as `in` register.
+ if (invoke->GetType() != DataType::Type::kVoid) {
+ locations->SetOut(Location::RegisterLocation(RAX));
+ }
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
@@ -2697,25 +2702,29 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke,
X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
LocationSummary* locations = invoke->GetLocations();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // Result.
- CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); // Object pointer.
- CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); // Long offset.
- DCHECK_EQ(out, locations->InAt(3).AsRegister<CpuRegister>()); // New value or addend.
+ const bool is_void = invoke->GetType() == DataType::Type::kVoid;
+ Location rax_loc = Location::RegisterLocation(RAX);
+ // We requested RAX to use as a temporary for void methods, as we don't return the value.
+ DCHECK_IMPLIES(!is_void, locations->Out().Equals(rax_loc));
+ CpuRegister out_or_temp = rax_loc.AsRegister<CpuRegister>(); // Result.
+ CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); // Object pointer.
+ CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); // Long offset.
+ DCHECK_EQ(out_or_temp, locations->InAt(3).AsRegister<CpuRegister>()); // New value or addend.
Address field_address(base, offset, TIMES_1, 0);
if (type == DataType::Type::kInt32) {
if (get_and_update_op == GetAndUpdateOp::kAdd) {
- __ LockXaddl(field_address, out);
+ __ LockXaddl(field_address, out_or_temp);
} else {
DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
- __ xchgl(out, field_address);
+ __ xchgl(out_or_temp, field_address);
}
} else if (type == DataType::Type::kInt64) {
if (get_and_update_op == GetAndUpdateOp::kAdd) {
- __ LockXaddq(field_address, out);
+ __ LockXaddq(field_address, out_or_temp);
} else {
DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
- __ xchgq(out, field_address);
+ __ xchgq(out_or_temp, field_address);
}
} else {
DCHECK_EQ(type, DataType::Type::kReference);
@@ -2740,18 +2749,20 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke,
// Mark card for object as a new value shall be stored.
bool new_value_can_be_null = true; // TODO: Worth finding out this information?
- codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out, new_value_can_be_null);
+ codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out_or_temp, new_value_can_be_null);
if (kPoisonHeapReferences) {
// Use a temp to avoid poisoning base of the field address, which might happen if `out`
// is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
- __ movl(temp1, out);
+ __ movl(temp1, out_or_temp);
__ PoisonHeapReference(temp1);
__ xchgl(temp1, field_address);
- __ UnpoisonHeapReference(temp1);
- __ movl(out, temp1);
+ if (!is_void) {
+ __ UnpoisonHeapReference(temp1);
+ __ movl(out_or_temp, temp1);
+ }
} else {
- __ xchgl(out, field_address);
+ __ xchgl(out_or_temp, field_address);
}
}
}
@@ -4500,23 +4511,33 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86_
return;
}
+ // Get the type from the shorty as the invokes may not return a value.
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
uint32_t new_value_index = number_of_arguments - 1;
- DataType::Type type = invoke->GetType();
- DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
- if (DataType::IsFloatingPointType(type)) {
- locations->SetOut(Location::RequiresFpuRegister());
+ if (DataType::IsFloatingPointType(value_type)) {
+ // Only set the `out` register if it's needed. In the void case we don't use `out`.
+ if (!is_void) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ }
// A temporary is needed to load the new floating-point value into a register for XCHG.
locations->AddTemp(Location::RequiresRegister());
} else {
- // Use the same register for both the new value and output to take advantage of XCHG.
- // It doesn't have to be RAX, but we need to choose some to make sure it's the same.
- locations->SetOut(Location::RegisterLocation(RAX));
+ // Only set the `out` register if it's needed. In the void case we can still use RAX in the
+ // same manner as it is used an as `in` register.
+ if (!is_void) {
+ // Use the same register for both the new value and output to take advantage of XCHG.
+ // It doesn't have to be RAX, but we need to choose some to make sure it's the same.
+ locations->SetOut(Location::RegisterLocation(RAX));
+ }
locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
- if (type == DataType::Type::kReference) {
+ if (value_type == DataType::Type::kReference) {
// Need two temporaries for MarkGCCard.
locations->AddRegisterTemps(2);
if (codegen->EmitReadBarrier()) {
@@ -4539,6 +4560,9 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke,
LocationSummary* locations = invoke->GetLocations();
Location out = locations->Out();
uint32_t temp_count = locations->GetTempCount();
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == type);
if (DataType::IsFloatingPointType(type)) {
// `getAndSet` for floating-point types: move the new FP value into a register, atomically
@@ -4558,7 +4582,9 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke,
if (byte_swap) {
codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
}
- __ movd(out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit);
+ if (!is_void) {
+ __ movd(out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit);
+ }
} else if (type == DataType::Type::kReference) {
// `getAndSet` for references: load reference and atomically exchange it with the field.
// Output register is the same as the one holding new value, so no need to move the result.
@@ -4581,15 +4607,17 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke,
}
codegen->MarkGCCard(temp1, temp2, ref);
- DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
+ DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
if (kPoisonHeapReferences) {
// Use a temp to avoid poisoning base of the field address, which might happen if `valreg` is
// the same as `target.object` (for code like `vh.getAndSet(obj, obj)`).
__ movl(temp1, valreg);
__ PoisonHeapReference(temp1);
__ xchgl(temp1, field_addr);
- __ UnpoisonHeapReference(temp1);
- __ movl(valreg, temp1);
+ if (!is_void) {
+ __ UnpoisonHeapReference(temp1);
+ __ movl(valreg, temp1);
+ }
} else {
__ xchgl(valreg, field_addr);
}
@@ -4600,24 +4628,32 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke,
codegen->GetInstructionCodegen()->Bswap(value, type);
}
CpuRegister valreg = value.AsRegister<CpuRegister>();
- DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
+ DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
__ xchgb(valreg, field_addr);
- __ movzxb(valreg, valreg);
+ if (!is_void) {
+ __ movzxb(valreg, valreg);
+ }
break;
case DataType::Type::kInt8:
__ xchgb(valreg, field_addr);
- __ movsxb(valreg, valreg);
+ if (!is_void) {
+ __ movsxb(valreg, valreg);
+ }
break;
case DataType::Type::kUint16:
__ xchgw(valreg, field_addr);
- __ movzxw(valreg, valreg);
+ if (!is_void) {
+ __ movzxw(valreg, valreg);
+ }
break;
case DataType::Type::kInt16:
__ xchgw(valreg, field_addr);
- __ movsxw(valreg, valreg);
+ if (!is_void) {
+ __ movsxw(valreg, valreg);
+ }
break;
case DataType::Type::kInt32:
case DataType::Type::kUint32:
@@ -4642,25 +4678,36 @@ static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGenerat
return;
}
+ // Get the type from the shorty as the invokes may not return a value.
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
uint32_t new_value_index = number_of_arguments - 1;
- DataType::Type type = invoke->GetType();
- DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
- DCHECK_NE(DataType::Type::kReference, type);
- DCHECK(!DataType::IsFloatingPointType(type));
+ DCHECK_NE(DataType::Type::kReference, value_type);
+ DCHECK(!DataType::IsFloatingPointType(value_type));
+ if (!is_void) {
+ // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary.
+ locations->SetOut(Location::RegisterLocation(RAX));
+ } else {
+ // Used as a temporary, even when we are not outputting it so reserve it. This has to be
+ // requested before the other temporary since there's variable number of temp registers and the
+ // other temp register is expected to be the last one.
+ locations->AddTemp(Location::RegisterLocation(RAX));
+ }
// A temporary to compute the bitwise operation on the old and the new values.
locations->AddTemp(Location::RequiresRegister());
// We need value to be either in a register, or a 32-bit constant (as there are no arithmetic
// instructions that accept 64-bit immediate on x86_64).
- locations->SetInAt(new_value_index, DataType::Is64BitType(type)
- ? Location::RequiresRegister()
- : Location::RegisterOrConstant(invoke->InputAt(new_value_index)));
- // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary.
- locations->SetOut(Location::RegisterLocation(RAX));
+ locations->SetInAt(new_value_index,
+ DataType::Is64BitType(value_type) ?
+ Location::RequiresRegister() :
+ Location::RegisterOrConstant(invoke->InputAt(new_value_index)));
}
static void GenerateVarHandleGetAndOp(HInvoke* invoke,
@@ -4672,8 +4719,9 @@ static void GenerateVarHandleGetAndOp(HInvoke* invoke,
bool byte_swap) {
X86_64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
- Location temp_loc = locations->GetTemp(locations->GetTempCount() - 1);
- Location rax_loc = locations->Out();
+ Location temp_loc = locations->GetTemp(locations->GetTempCount() - 1u);
+ Location rax_loc = Location::RegisterLocation(RAX);
+ DCHECK_IMPLIES(invoke->GetType() != DataType::Type::kVoid, locations->Out().Equals(rax_loc));
CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
CpuRegister rax = rax_loc.AsRegister<CpuRegister>();
DCHECK_EQ(rax.AsRegister(), RAX);
@@ -4781,15 +4829,21 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_
return;
}
+ // Get the type from the shorty as the invokes may not return a value.
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
uint32_t new_value_index = number_of_arguments - 1;
- DataType::Type type = invoke->GetType();
- DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == value_type);
LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
- if (DataType::IsFloatingPointType(type)) {
- locations->SetOut(Location::RequiresFpuRegister());
+ if (DataType::IsFloatingPointType(value_type)) {
+ // Only set the `out` register if it's needed. In the void case we don't use `out`
+ if (!is_void) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ }
// Require that the new FP value is in a register (and not a constant) for ADDSS/ADDSD.
locations->SetInAt(new_value_index, Location::RequiresFpuRegister());
// CMPXCHG clobbers RAX.
@@ -4799,11 +4853,15 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_
// A temporary to hold the new value for CMPXCHG.
locations->AddTemp(Location::RequiresRegister());
} else {
- DCHECK_NE(type, DataType::Type::kReference);
- // Use the same register for both the new value and output to take advantage of XADD.
- // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls
- // back to GenerateVarHandleGetAndOp that expects out in RAX.
- locations->SetOut(Location::RegisterLocation(RAX));
+ DCHECK_NE(value_type, DataType::Type::kReference);
+ // Only set the `out` register if it's needed. In the void case we can still use RAX in the
+ // same manner as it is used an as `in` register.
+ if (!is_void) {
+ // Use the same register for both the new value and output to take advantage of XADD.
+ // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls
+ // back to GenerateVarHandleGetAndOp that expects out in RAX.
+ locations->SetOut(Location::RegisterLocation(RAX));
+ }
locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
if (GetExpectedVarHandleCoordinatesCount(invoke) == 2) {
// For byte array views with non-native endianness we need extra BSWAP operations, so we
@@ -4827,6 +4885,10 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
Location out = locations->Out();
uint32_t temp_count = locations->GetTempCount();
+ DataType::Type return_type = invoke->GetType();
+ const bool is_void = return_type == DataType::Type::kVoid;
+ DCHECK_IMPLIES(!is_void, return_type == type);
+
if (DataType::IsFloatingPointType(type)) {
if (byte_swap) {
// This code should never be executed: it is the case of a byte array view (since it requires
@@ -4889,7 +4951,9 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
if (byte_swap) {
codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
}
- __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
+ if (!is_void) {
+ __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
+ }
} else {
if (byte_swap) {
// We cannot use XADD since we need to byte-swap the old value when reading it from memory,
@@ -4906,24 +4970,32 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
// the old value to the field. Output register is the same as the one holding new value. Do
// sign extend / zero extend as needed.
CpuRegister valreg = value.AsRegister<CpuRegister>();
- DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
+ DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
__ LockXaddb(field_addr, valreg);
- __ movzxb(valreg, valreg);
+ if (!is_void) {
+ __ movzxb(valreg, valreg);
+ }
break;
case DataType::Type::kInt8:
__ LockXaddb(field_addr, valreg);
- __ movsxb(valreg, valreg);
+ if (!is_void) {
+ __ movsxb(valreg, valreg);
+ }
break;
case DataType::Type::kUint16:
__ LockXaddw(field_addr, valreg);
- __ movzxw(valreg, valreg);
+ if (!is_void) {
+ __ movzxw(valreg, valreg);
+ }
break;
case DataType::Type::kInt16:
__ LockXaddw(field_addr, valreg);
- __ movsxw(valreg, valreg);
+ if (!is_void) {
+ __ movsxw(valreg, valreg);
+ }
break;
case DataType::Type::kInt32:
case DataType::Type::kUint32:
@@ -4952,9 +5024,10 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
X86_64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
+ // Get the type from the shorty as the invokes may not return a value.
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
Location value = locations->InAt(number_of_arguments - 1);
- DataType::Type type = invoke->GetType();
+ DataType::Type type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
VarHandleSlowPathX86_64* slow_path = nullptr;
VarHandleTarget target = GetVarHandleTarget(invoke);
@@ -5212,7 +5285,7 @@ void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen
CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
- CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1).AsRegister<CpuRegister>();
+ CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1u).AsRegister<CpuRegister>();
MemberOffset class_offset = mirror::Object::ClassOffset();
MemberOffset array_length_offset = mirror::Array::LengthOffset();