diff options
author | 2024-09-25 11:32:36 +0000 | |
---|---|---|
committer | 2024-09-25 13:41:34 +0100 | |
commit | bb2fb09b7d4618e4e4319835e1a8e3c1eb1506ae (patch) | |
tree | 91e81a7b395847baa0c578568343c09837d41d40 /compiler/optimizing | |
parent | 8567fcdf972ade85785b689a15e3593a7d3bff9d (diff) |
Revert^2 "Add VarHandle implementations for void getAndUpdate methods"
This reverts commit 8567fcdf972ade85785b689a15e3593a7d3bff9d.
Reason for revert: RISC-V fix
Test: LUCI run for RISC-V
Change-Id: I6392bfa045e7737b5c45d78da4f7ecb8fbd3b89a
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/instruction_builder.cc | 5 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 142 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 173 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_riscv64.cc | 131 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_utils.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 162 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 193 |
7 files changed, 556 insertions, 252 deletions
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index a679f88a58..09d7c23fed 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -26,8 +26,8 @@ #include "data_type-inl.h" #include "dex/bytecode_utils.h" #include "dex/dex_instruction-inl.h" -#include "driver/dex_compilation_unit.h" #include "driver/compiler_options.h" +#include "driver/dex_compilation_unit.h" #include "entrypoints/entrypoint_utils-inl.h" #include "imtable-inl.h" #include "intrinsics.h" @@ -36,6 +36,7 @@ #include "jit/profiling_info.h" #include "mirror/dex_cache.h" #include "oat/oat_file.h" +#include "optimizing/data_type.h" #include "optimizing_compiler_stats.h" #include "reflective_handle_scope-inl.h" #include "scoped_thread_state_change-inl.h" @@ -1357,7 +1358,7 @@ static void DecideVarHandleIntrinsic(HInvoke* invoke) { optimizations.SetDoNotIntrinsify(); return; } - if (value_type != return_type) { + if (value_type != return_type && return_type != DataType::Type::kVoid) { optimizations.SetDoNotIntrinsify(); return; } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 71ef84e1aa..4af63fa906 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -34,6 +34,7 @@ #include "mirror/reference.h" #include "mirror/string-inl.h" #include "mirror/var_handle.h" +#include "optimizing/data_type.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" #include "utils/arm64/assembler_arm64.h" @@ -1762,17 +1763,33 @@ static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator, locations->SetInAt(3, Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + // Request another temporary register for methods that don't return a value. + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + if (is_void) { + locations->AddTemp(Location::RequiresRegister()); + } else { + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } } static void GenUnsafeGetAndUpdate(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen, GetAndUpdateOp get_and_update_op) { + // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check + // before using. + DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet); + MacroAssembler* masm = codegen->GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); - Register out = RegisterFrom(locations->Out(), type); // Result. + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + // We use a temporary for void methods, as we don't return the value. + Location out_or_temp_loc = + is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out(); + Register out_or_temp = RegisterFrom(out_or_temp_loc, type); // Result. Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. Register arg = RegisterFrom(locations->InAt(3), type); // New value or addend. @@ -1793,20 +1810,19 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke, std::memory_order_seq_cst, tmp_ptr, arg, - /*old_value=*/ out); + /*old_value=*/ out_or_temp); - if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) { + if (!is_void && type == DataType::Type::kReference && codegen->EmitReadBarrier()) { DCHECK(get_and_update_op == GetAndUpdateOp::kSet); if (kUseBakerReadBarrier) { - codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), out.W()); + codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out_or_temp.W(), out_or_temp.W()); } else { - codegen->GenerateReadBarrierSlow( - invoke, - Location::RegisterLocation(out.GetCode()), - Location::RegisterLocation(out.GetCode()), - Location::RegisterLocation(base.GetCode()), - /*offset=*/ 0u, - /*index=*/ Location::RegisterLocation(offset.GetCode())); + codegen->GenerateReadBarrierSlow(invoke, + Location::RegisterLocation(out_or_temp.GetCode()), + Location::RegisterLocation(out_or_temp.GetCode()), + Location::RegisterLocation(base.GetCode()), + /*offset=*/ 0u, + /*index=*/ Location::RegisterLocation(offset.GetCode())); } } } @@ -5420,18 +5436,20 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, return; } - if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { + // Get the type from the shorty as the invokes may not return a value. + LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); + uint32_t arg_index = invoke->GetNumberOfArguments() - 1; + DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index); + size_t old_temp_count = locations->GetTempCount(); + if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field, thus seeing the new value // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084 return; } - LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); - - size_t old_temp_count = locations->GetTempCount(); DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u); - if (DataType::IsFloatingPointType(invoke->GetType())) { + if (DataType::IsFloatingPointType(value_type)) { if (get_and_update_op == GetAndUpdateOp::kAdd) { // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0). locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister()); @@ -5451,12 +5469,23 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) && GetExpectedVarHandleCoordinatesCount(invoke) == 2u && !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { - DataType::Type value_type = - GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u); if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) { locations->AddTemp(Location::RequiresRegister()); } } + + // Request another temporary register for methods that don't return a value. + // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`. + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); + if (is_void) { + if (DataType::IsFloatingPointType(value_type)) { + locations->AddTemp(Location::RequiresFpuRegister()); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + } } static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, @@ -5464,6 +5493,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, GetAndUpdateOp get_and_update_op, std::memory_order order, bool byte_swap = false) { + // Get the type from the shorty as the invokes may not return a value. uint32_t arg_index = invoke->GetNumberOfArguments() - 1; DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index); bool is_fp = DataType::IsFloatingPointType(value_type); @@ -5473,7 +5503,13 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, CPURegister arg = (is_fp && get_and_update_op == GetAndUpdateOp::kAdd) ? InputCPURegisterAt(invoke, arg_index) : InputCPURegisterOrZeroRegAt(invoke, arg_index); - CPURegister out = helpers::OutputCPURegister(invoke); + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); + // We use a temporary for void methods, as we don't return the value. + CPURegister out_or_temp = + is_void ? CPURegisterFrom(locations->GetTemp(locations->GetTempCount() - 1u), value_type) : + helpers::OutputCPURegister(invoke); VarHandleTarget target = GetVarHandleTarget(invoke); VarHandleSlowPathARM64* slow_path = nullptr; @@ -5516,7 +5552,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, } // Prepare register for old value. - CPURegister old_value = out; + CPURegister old_value = out_or_temp; if (get_and_update_op == GetAndUpdateOp::kSet) { // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers, // rather than moving between core and FP registers in the loop. @@ -5554,36 +5590,40 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, GenerateGetAndUpdate(codegen, get_and_update_op, load_store_type, order, tmp_ptr, arg, old_value); - if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) { - // The only adjustment needed is sign-extension for `kInt16`. - // Everything else has been done by the `GenerateGetAndUpdate()`. - DCHECK(byte_swap); - if (value_type == DataType::Type::kInt16) { - DCHECK_EQ(load_store_type, DataType::Type::kUint16); - __ Sxth(out.W(), old_value.W()); - } - } else if (byte_swap) { - // Also handles moving to FP registers. - GenerateReverseBytes(masm, value_type, old_value, out); - } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat64) { - __ Fmov(out.D(), old_value.X()); - } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat32) { - __ Fmov(out.S(), old_value.W()); - } else if (value_type == DataType::Type::kInt8) { - __ Sxtb(out.W(), old_value.W()); - } else if (value_type == DataType::Type::kInt16) { - __ Sxth(out.W(), old_value.W()); - } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { - if (kUseBakerReadBarrier) { - codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), old_value.W()); - } else { - codegen->GenerateReadBarrierSlow( - invoke, - Location::RegisterLocation(out.GetCode()), - Location::RegisterLocation(old_value.GetCode()), - Location::RegisterLocation(target.object.GetCode()), - /*offset=*/ 0u, - /*index=*/ Location::RegisterLocation(target.offset.GetCode())); + if (!is_void) { + if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) { + // The only adjustment needed is sign-extension for `kInt16`. + // Everything else has been done by the `GenerateGetAndUpdate()`. + DCHECK(byte_swap); + if (value_type == DataType::Type::kInt16) { + DCHECK_EQ(load_store_type, DataType::Type::kUint16); + __ Sxth(out_or_temp.W(), old_value.W()); + } + } else if (byte_swap) { + // Also handles moving to FP registers. + GenerateReverseBytes(masm, value_type, old_value, out_or_temp); + } else if (get_and_update_op == GetAndUpdateOp::kSet && + value_type == DataType::Type::kFloat64) { + __ Fmov(out_or_temp.D(), old_value.X()); + } else if (get_and_update_op == GetAndUpdateOp::kSet && + value_type == DataType::Type::kFloat32) { + __ Fmov(out_or_temp.S(), old_value.W()); + } else if (value_type == DataType::Type::kInt8) { + __ Sxtb(out_or_temp.W(), old_value.W()); + } else if (value_type == DataType::Type::kInt16) { + __ Sxth(out_or_temp.W(), old_value.W()); + } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { + if (kUseBakerReadBarrier) { + codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out_or_temp.W(), old_value.W()); + } else { + codegen->GenerateReadBarrierSlow( + invoke, + Location::RegisterLocation(out_or_temp.GetCode()), + Location::RegisterLocation(old_value.GetCode()), + Location::RegisterLocation(target.object.GetCode()), + /*offset=*/0u, + /*index=*/Location::RegisterLocation(target.offset.GetCode())); + } } } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 1b1711b9de..7f89f56d0c 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -16,6 +16,7 @@ #include "intrinsics_arm_vixl.h" +#include "aarch32/constants-aarch32.h" #include "arch/arm/callee_save_frame_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" @@ -30,12 +31,11 @@ #include "mirror/object_array-inl.h" #include "mirror/reference.h" #include "mirror/string-inl.h" +#include "optimizing/data_type.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" #include "well_known_classes.h" -#include "aarch32/constants-aarch32.h" - namespace art HIDDEN { namespace arm { @@ -3871,9 +3871,15 @@ static void CreateUnsafeGetAndUpdateLocations(HInvoke* invoke, locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - + // Request another temporary register for methods that don't return a value. size_t num_temps = 1u; // We always need `tmp_ptr`. + const bool is_void = invoke->GetType() == DataType::Type::kVoid; + if (is_void) { + num_temps++; + } else { + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } + if (get_and_update_op == GetAndUpdateOp::kAdd) { // Add `maybe_temp` used for the new value in `GenerateGetAndUpdate()`. num_temps += (type == DataType::Type::kInt64) ? 2u : 1u; @@ -3894,10 +3900,18 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke, CodeGeneratorARMVIXL* codegen, DataType::Type type, GetAndUpdateOp get_and_update_op) { + // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check + // before using. + DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet); + ArmVIXLAssembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); - Location out = locations->Out(); // Result. + const bool is_void = invoke->GetType() == DataType::Type::kVoid; + + // We use a temporary for void methods, as we don't return the value. + Location out_or_temp = + is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out(); vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B). Location arg = locations->InAt(3); // New value or addend. @@ -3930,24 +3944,24 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke, type, tmp_ptr, arg, - /*old_value=*/ out, + /*old_value=*/ out_or_temp, /*store_result=*/ temp, maybe_temp, /*maybe_vreg_temp=*/ Location::NoLocation()); codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); - if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) { + if (!is_void && type == DataType::Type::kReference && codegen->EmitReadBarrier()) { DCHECK(get_and_update_op == GetAndUpdateOp::kSet); if (kUseBakerReadBarrier) { - codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out), RegisterFrom(out)); + codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out_or_temp), + RegisterFrom(out_or_temp)); } else { - codegen->GenerateReadBarrierSlow( - invoke, - out, - out, - Location::RegisterLocation(base.GetCode()), - /*offset=*/ 0u, - /*index=*/ Location::RegisterLocation(offset.GetCode())); + codegen->GenerateReadBarrierSlow(invoke, + out_or_temp, + out_or_temp, + Location::RegisterLocation(base.GetCode()), + /*offset=*/ 0u, + /*index=*/ Location::RegisterLocation(offset.GetCode())); } } } @@ -4656,6 +4670,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke, LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); + // Get the type from the shorty as the invokes may not return a value. uint32_t number_of_arguments = invoke->GetNumberOfArguments(); DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u); if (DataType::Is64BitType(value_type)) { @@ -5163,7 +5178,10 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, return; } - if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { + // Get the type from the shorty as the invokes may not return a value. + uint32_t arg_index = invoke->GetNumberOfArguments() - 1; + DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index); + if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field, thus seeing the new value // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084 @@ -5177,7 +5195,6 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, DCHECK_EQ(locations->GetTempCount(), (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u); - DataType::Type value_type = invoke->GetType(); if (get_and_update_op == GetAndUpdateOp::kSet) { if (DataType::IsFloatingPointType(value_type)) { // Add temps needed to do the GenerateGetAndUpdate() with core registers. @@ -5211,6 +5228,23 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, locations->AddTemp(Location::RequiresFpuRegister()); } } + + // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`. + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); + if (is_void) { + if (DataType::IsFloatingPointType(value_type)) { + // Note: This shall allocate a D register. There is no way to request an S register. + locations->AddTemp(Location::RequiresFpuRegister()); + } else if (DataType::Is64BitType(value_type)) { + // We need two for non-fpu 64 bit types. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + } } static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, @@ -5218,13 +5252,38 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, GetAndUpdateOp get_and_update_op, std::memory_order order, bool byte_swap = false) { + // Get the type from the shorty as the invokes may not return a value. uint32_t arg_index = invoke->GetNumberOfArguments() - 1; DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index); ArmVIXLAssembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); Location arg = locations->InAt(arg_index); - Location out = locations->Out(); + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); + + size_t temps_that_mimic_out; + Location result; + const size_t temp_count = locations->GetTempCount(); + if (is_void) { + if (value_type == DataType::Type::kFloat32) { + // Note: Since we allocated a D register, use the low part. + DCHECK(locations->GetTemp(temp_count - 1u).IsFpuRegisterPair()); + temps_that_mimic_out = 1u; + result = locations->GetTemp(temp_count - 1u).ToLow(); + } else if (!DataType::IsFloatingPointType(value_type) && DataType::Is64BitType(value_type)) { + temps_that_mimic_out = 2u; + result = LocationFrom(RegisterFrom(locations->GetTemp(temp_count - 2u)), + RegisterFrom(locations->GetTemp(temp_count - 1u))); + } else { + temps_that_mimic_out = 1u; + result = locations->GetTemp(temp_count - 1u); + } + } else { + temps_that_mimic_out = 0u; + result = locations->Out(); + } VarHandleTarget target = GetVarHandleTarget(invoke); VarHandleSlowPathARMVIXL* slow_path = nullptr; @@ -5261,7 +5320,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, : value_type; // Prepare register for old value and temporaries if any. - Location old_value = out; + Location old_value = result; Location maybe_temp = Location::NoLocation(); Location maybe_vreg_temp = Location::NoLocation(); if (get_and_update_op == GetAndUpdateOp::kSet) { @@ -5269,7 +5328,9 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, // rather than moving between core and FP registers in the loop. if (value_type == DataType::Type::kFloat64) { vixl32::DRegister arg_vreg = DRegisterFrom(arg); - DCHECK_EQ(locations->GetTempCount(), 5u); // `store_result` and the four here. + // `store_result` and the four here, plus maybe an extra one for the temp that mimics the + // "out" register. + DCHECK_EQ(temp_count, 5u + temps_that_mimic_out); old_value = LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2))); arg = LocationFrom(RegisterFrom(locations->GetTemp(3)), RegisterFrom(locations->GetTemp(4))); @@ -5281,7 +5342,9 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, } } else if (value_type == DataType::Type::kFloat32) { vixl32::SRegister arg_vreg = SRegisterFrom(arg); - DCHECK_EQ(locations->GetTempCount(), 3u); // `store_result` and the two here. + // `store_result` and the two here, plus maybe an extra one for the temp that mimics the + // "out" register. + DCHECK_EQ(temp_count, 3u + temps_that_mimic_out); old_value = locations->GetTemp(1); arg = locations->GetTemp(2); __ Vmov(RegisterFrom(arg), arg_vreg); @@ -5293,7 +5356,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, // Load the old value initially to a temporary register. // We shall move it to `out` later with a read barrier. old_value = LocationFrom(store_result); - store_result = RegisterFrom(out); // Use the `out` for the exclusive store result. + store_result = RegisterFrom(result); // Use `result` for the exclusive store result. } else { // The store_result is a separate temporary. DCHECK(!store_result.Is(target.object)); @@ -5305,7 +5368,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, if (value_type == DataType::Type::kInt64) { arg = LocationFrom(RegisterFrom(arg), RegisterFrom(locations->GetTemp(2))); // Swap the high/low regs and reverse the bytes in each after the load. - old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out)); + old_value = LocationFrom(HighRegisterFrom(result), LowRegisterFrom(result)); } GenerateReverseBytes(assembler, value_type, original_arg, arg); } @@ -5315,7 +5378,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, : locations->GetTemp(1); DCHECK(!maybe_temp.Contains(LocationFrom(store_result))); if (DataType::IsFloatingPointType(value_type)) { - maybe_vreg_temp = locations->GetTemp(locations->GetTempCount() - 1u); + maybe_vreg_temp = locations->GetTemp(temp_count - 1u - temps_that_mimic_out); DCHECK(maybe_vreg_temp.IsFpuRegisterPair()); } if (byte_swap) { @@ -5324,7 +5387,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, get_and_update_op = GetAndUpdateOp::kAddWithByteSwap; } else if (value_type == DataType::Type::kInt64) { // Swap the high/low regs and reverse the bytes in each after the load. - old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out)); + old_value = LocationFrom(HighRegisterFrom(result), LowRegisterFrom(result)); // Due to lack of registers, reverse bytes in `arg` and undo that later. GenerateReverseBytesInPlaceForEachWord(assembler, arg); arg = LocationFrom(HighRegisterFrom(arg), LowRegisterFrom(arg)); @@ -5353,36 +5416,38 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny); } - if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) { - if (value_type == DataType::Type::kInt64) { - GenerateReverseBytesInPlaceForEachWord(assembler, old_value); - if (get_and_update_op != GetAndUpdateOp::kSet) { - // Undo byte swapping in `arg`. We do not have the information - // whether the value in these registers shall be needed later. - GenerateReverseBytesInPlaceForEachWord(assembler, arg); + if (!is_void) { + if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) { + if (value_type == DataType::Type::kInt64) { + GenerateReverseBytesInPlaceForEachWord(assembler, old_value); + if (get_and_update_op != GetAndUpdateOp::kSet) { + // Undo byte swapping in `arg`. We do not have the information + // whether the value in these registers shall be needed later. + GenerateReverseBytesInPlaceForEachWord(assembler, arg); + } + } else { + GenerateReverseBytes(assembler, value_type, old_value, result); + } + } else if (get_and_update_op == GetAndUpdateOp::kSet && + DataType::IsFloatingPointType(value_type)) { + if (value_type == DataType::Type::kFloat64) { + __ Vmov(DRegisterFrom(result), LowRegisterFrom(old_value), HighRegisterFrom(old_value)); + } else { + __ Vmov(SRegisterFrom(result), RegisterFrom(old_value)); + } + } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { + if (kUseBakerReadBarrier) { + codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(result), + RegisterFrom(old_value)); + } else { + codegen->GenerateReadBarrierSlow( + invoke, + Location::RegisterLocation(RegisterFrom(result).GetCode()), + Location::RegisterLocation(RegisterFrom(old_value).GetCode()), + Location::RegisterLocation(target.object.GetCode()), + /*offset=*/ 0u, + /*index=*/ Location::RegisterLocation(target.offset.GetCode())); } - } else { - GenerateReverseBytes(assembler, value_type, old_value, out); - } - } else if (get_and_update_op == GetAndUpdateOp::kSet && - DataType::IsFloatingPointType(value_type)) { - if (value_type == DataType::Type::kFloat64) { - __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value)); - } else { - __ Vmov(SRegisterFrom(out), RegisterFrom(old_value)); - } - } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { - if (kUseBakerReadBarrier) { - codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out), - RegisterFrom(old_value)); - } else { - codegen->GenerateReadBarrierSlow( - invoke, - Location::RegisterLocation(RegisterFrom(out).GetCode()), - Location::RegisterLocation(RegisterFrom(old_value).GetCode()), - Location::RegisterLocation(target.object.GetCode()), - /*offset=*/ 0u, - /*index=*/ Location::RegisterLocation(target.offset.GetCode())); } } diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc index 2bc8cad71a..4e5dbe4d23 100644 --- a/compiler/optimizing/intrinsics_riscv64.cc +++ b/compiler/optimizing/intrinsics_riscv64.cc @@ -2955,20 +2955,35 @@ static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator, locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + // Request another temporary register for methods that don't return a value. + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + if (is_void) { + locations->AddTemp(Location::RequiresRegister()); + } else { + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } } static void GenUnsafeGetAndUpdate(HInvoke* invoke, DataType::Type type, CodeGeneratorRISCV64* codegen, GetAndUpdateOp get_and_update_op) { + // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check + // before using. + DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet); + Riscv64Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); - Location out_loc = locations->Out(); - XRegister out = out_loc.AsRegister<XRegister>(); // Result. - XRegister base = locations->InAt(1).AsRegister<XRegister>(); // Object pointer. - XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset. - XRegister arg = locations->InAt(3).AsRegister<XRegister>(); // New value or addend. + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + // We use a temporary for void methods, as we don't return the value. + Location out_or_temp_loc = + is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out(); + XRegister out_or_temp = out_or_temp_loc.AsRegister<XRegister>(); // Result. + XRegister base = locations->InAt(1).AsRegister<XRegister>(); // Object pointer. + XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset. + XRegister arg = locations->InAt(3).AsRegister<XRegister>(); // New value or addend. // This needs to be before the temp registers, as MarkGCCard also uses scratch registers. if (type == DataType::Type::kReference) { @@ -2987,28 +3002,28 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke, std::memory_order_seq_cst, tmp_ptr, arg, - /*old_value=*/ out, + /*old_value=*/ out_or_temp, /*mask=*/ kNoXRegister, /*temp=*/ kNoXRegister); - if (type == DataType::Type::kReference) { - __ ZextW(out, out); + if (!is_void && type == DataType::Type::kReference) { + __ ZextW(out_or_temp, out_or_temp); if (codegen->EmitReadBarrier()) { DCHECK(get_and_update_op == GetAndUpdateOp::kSet); if (kUseBakerReadBarrier) { // Use RA as temp. It is clobbered in the slow path anyway. static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA); - SlowPathCodeRISCV64* rb_slow_path = - codegen->AddGcRootBakerBarrierBarrierSlowPath(invoke, out_loc, kBakerReadBarrierTemp); - codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out_loc, kBakerReadBarrierTemp); + SlowPathCodeRISCV64* rb_slow_path = codegen->AddGcRootBakerBarrierBarrierSlowPath( + invoke, out_or_temp_loc, kBakerReadBarrierTemp); + codegen->EmitBakerReadBarierMarkingCheck( + rb_slow_path, out_or_temp_loc, kBakerReadBarrierTemp); } else { - codegen->GenerateReadBarrierSlow( - invoke, - out_loc, - out_loc, - Location::RegisterLocation(base), - /*offset=*/ 0u, - /*index=*/ Location::RegisterLocation(offset)); + codegen->GenerateReadBarrierSlow(invoke, + out_or_temp_loc, + out_or_temp_loc, + Location::RegisterLocation(base), + /*offset=*/ 0u, + /*index=*/ Location::RegisterLocation(offset)); } } } @@ -4450,7 +4465,11 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, return; } - if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { + // Get the type from the shorty as the invokes may not return a value. + LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); + uint32_t arg_index = invoke->GetNumberOfArguments() - 1; + DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index); + if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field, thus seeing the new value // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084 @@ -4458,15 +4477,10 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, } // TODO(riscv64): Fix this intrinsic for heap poisoning configuration. - if (kPoisonHeapReferences && invoke->GetType() == DataType::Type::kReference) { + if (kPoisonHeapReferences && value_type == DataType::Type::kReference) { return; } - LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); - uint32_t arg_index = invoke->GetNumberOfArguments() - 1u; - DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke)); - DataType::Type value_type = invoke->GetType(); - DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index)); Location arg = locations->InAt(arg_index); bool is_fp = DataType::IsFloatingPointType(value_type); @@ -4517,6 +4531,19 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, if (temps_needed > old_temp_count + scratch_registers_available) { locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available)); } + + // Request another temporary register for methods that don't return a value. + // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`. + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); + if (is_void) { + if (DataType::IsFloatingPointType(value_type)) { + locations->AddTemp(Location::RequiresFpuRegister()); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + } } static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, @@ -4524,16 +4551,21 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, GetAndUpdateOp get_and_update_op, std::memory_order order, bool byte_swap = false) { + // Get the type from the shorty as the invokes may not return a value. uint32_t arg_index = invoke->GetNumberOfArguments() - 1; DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke)); - DataType::Type value_type = invoke->GetType(); - DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index)); + DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index); Riscv64Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); Location arg = locations->InAt(arg_index); DCHECK_IMPLIES(arg.IsConstant(), arg.GetConstant()->IsZeroBitPattern()); - Location out = locations->Out(); + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); + // We use a temporary for void methods, as we don't return the value. + Location out_or_temp = + is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out(); VarHandleTarget target = GetVarHandleTarget(invoke); VarHandleSlowPathRISCV64* slow_path = nullptr; @@ -4589,6 +4621,8 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, available_scratch_registers -= 1u; return srs.AllocateXRegister(); } else { + DCHECK_IMPLIES(is_void, next_temp != locations->GetTempCount() - 1u) + << "The last temp is special for the void case, as it represents the out register."; XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>(); next_temp += 1u; return temp; @@ -4678,24 +4712,24 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, codegen->GetInstructionVisitor()->Load( Location::RegisterLocation(old_value), tmp_ptr, /*offset=*/ 0, op_type); if (byte_swap) { - GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type); + GenerateByteSwapAndExtract(codegen, out_or_temp, old_value, shift, value_type); } else { DCHECK(is_fp); - codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type); + codegen->MoveLocation(out_or_temp, Location::RegisterLocation(old_value), value_type); } if (is_fp) { codegen->GetInstructionVisitor()->FAdd( - ftmp, out.AsFpuRegister<FRegister>(), arg.AsFpuRegister<FRegister>(), value_type); + ftmp, out_or_temp.AsFpuRegister<FRegister>(), arg.AsFpuRegister<FRegister>(), value_type); codegen->MoveLocation( Location::RegisterLocation(new_value), Location::FpuRegisterLocation(ftmp), op_type); } else if (arg.IsConstant()) { DCHECK(arg.GetConstant()->IsZeroBitPattern()); - __ Mv(new_value, out.AsRegister<XRegister>()); + __ Mv(new_value, out_or_temp.AsRegister<XRegister>()); } else if (value_type == DataType::Type::kInt64) { - __ Add(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>()); + __ Add(new_value, out_or_temp.AsRegister<XRegister>(), arg.AsRegister<XRegister>()); } else { DCHECK_EQ(op_type, DataType::Type::kInt32); - __ Addw(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>()); + __ Addw(new_value, out_or_temp.AsRegister<XRegister>(), arg.AsRegister<XRegister>()); } if (byte_swap) { DataType::Type swap_type = op_type; @@ -4706,7 +4740,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, // To update the 16 bits, we can XOR the new value with the `out`, byte swap as Uint16 // (extracting only the bits we want to update), shift and XOR with the old value. swap_type = DataType::Type::kUint16; - __ Xor(new_value, new_value, out.AsRegister<XRegister>()); + __ Xor(new_value, new_value, out_or_temp.AsRegister<XRegister>()); } GenerateReverseBytes(codegen, Location::RegisterLocation(new_value), new_value, swap_type); if (is_small) { @@ -4727,15 +4761,15 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, store_result, /*expected=*/ old_value); } else { - XRegister old_value = is_fp ? get_temp() : out.AsRegister<XRegister>(); + XRegister old_value = is_fp ? get_temp() : out_or_temp.AsRegister<XRegister>(); GenerateGetAndUpdate( codegen, get_and_update_op, op_type, order, tmp_ptr, arg_reg, old_value, mask, temp); if (byte_swap) { - DCHECK_IMPLIES(is_small, out.AsRegister<XRegister>() == old_value) - << " " << value_type << " " << out.AsRegister<XRegister>() << "!=" << old_value; - GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type); + DCHECK_IMPLIES(is_small, out_or_temp.AsRegister<XRegister>() == old_value) + << " " << value_type << " " << out_or_temp.AsRegister<XRegister>() << "!=" << old_value; + GenerateByteSwapAndExtract(codegen, out_or_temp, old_value, shift, value_type); } else if (is_fp) { - codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type); + codegen->MoveLocation(out_or_temp, Location::RegisterLocation(old_value), value_type); } else if (is_small) { __ Srlw(old_value, old_value, shift); DCHECK_NE(value_type, DataType::Type::kUint8); @@ -4754,14 +4788,14 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, if (codegen->EmitBakerReadBarrier()) { // Use RA as temp. It is clobbered in the slow path anyway. static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA); - SlowPathCodeRISCV64* rb_slow_path = - codegen->AddGcRootBakerBarrierBarrierSlowPath(invoke, out, kBakerReadBarrierTemp); - codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out, kBakerReadBarrierTemp); + SlowPathCodeRISCV64* rb_slow_path = codegen->AddGcRootBakerBarrierBarrierSlowPath( + invoke, out_or_temp, kBakerReadBarrierTemp); + codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out_or_temp, kBakerReadBarrierTemp); } else if (codegen->EmitNonBakerReadBarrier()) { Location base_loc = Location::RegisterLocation(target.object); Location index = Location::RegisterLocation(target.offset); SlowPathCodeRISCV64* rb_slow_path = codegen->AddReadBarrierSlowPath( - invoke, out, out, base_loc, /*offset=*/ 0u, index); + invoke, out_or_temp, out_or_temp, base_loc, /*offset=*/ 0u, index); __ J(rb_slow_path->GetEntryLabel()); __ Bind(rb_slow_path->GetExitLabel()); } @@ -4775,13 +4809,16 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, // Check that we have allocated the right number of temps. We may need more registers // for byte swapped CAS in the slow path, so skip this check for the main path in that case. + // In the void case, we requested an extra register to mimic the `out` register. + const size_t extra_temp_registers = is_void ? 1u : 0u; bool has_byte_swap = (arg_index == 3u) && (!is_reference && data_size != 1u); - if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) { + if ((!has_byte_swap || byte_swap) && + next_temp != locations->GetTempCount() - extra_temp_registers) { // We allocate a temporary register for the class object for a static field `VarHandle` but // we do not update the `next_temp` if it's otherwise unused after the address calculation. CHECK_EQ(arg_index, 1u); CHECK_EQ(next_temp, 1u); - CHECK_EQ(locations->GetTempCount(), 2u); + CHECK_EQ(locations->GetTempCount(), 2u + extra_temp_registers); } } diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h index 13d9bc4b68..6c08cea3f8 100644 --- a/compiler/optimizing/intrinsics_utils.h +++ b/compiler/optimizing/intrinsics_utils.h @@ -111,7 +111,7 @@ static inline size_t GetExpectedVarHandleCoordinatesCount(HInvoke *invoke) { } static inline DataType::Type GetDataTypeFromShorty(HInvoke* invoke, uint32_t index) { - DCHECK(invoke->IsInvokePolymorphic()); + DCHECK(invoke->IsInvokePolymorphic()) << *invoke; const DexFile* dex_file = invoke->GetMethodReference().dex_file; const char* shorty = dex_file->GetShorty(invoke->AsInvokePolymorphic()->GetProtoIndex()); DCHECK_LT(index, strlen(shorty)); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 2b83ba8349..dc88de474c 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -34,6 +34,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "mirror/var_handle.h" +#include "optimizing/data_type.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" #include "utils/x86/assembler_x86.h" @@ -2489,6 +2490,7 @@ void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator, locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + const bool is_void = invoke->GetType() == DataType::Type::kVoid; if (type == DataType::Type::kInt64) { // Explicitly allocate all registers. locations->SetInAt(1, Location::RegisterLocation(EBP)); @@ -2502,14 +2504,23 @@ void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator, locations->SetInAt(2, Location::RegisterPairLocation(ESI, EDI)); locations->SetInAt(3, Location::RegisterPairLocation(EBX, ECX)); } - locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kOutputOverlap); + if (is_void) { + locations->AddTemp(Location::RegisterLocation(EAX)); + locations->AddTemp(Location::RegisterLocation(EDX)); + } else { + locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kOutputOverlap); + } } else { locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); // Use the same register for both the output and the new value or addend // to take advantage of XCHG or XADD. Arbitrarily pick EAX. locations->SetInAt(3, Location::RegisterLocation(EAX)); - locations->SetOut(Location::RegisterLocation(EAX)); + // Only set the `out` register if it's needed. In the void case we can still use EAX in the + // same manner as it is used an as `in` register. + if (!is_void) { + locations->SetOut(Location::RegisterLocation(EAX)); + } } } @@ -2573,14 +2584,20 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke, X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); LocationSummary* locations = invoke->GetLocations(); - Location out = locations->Out(); // Result. + const bool is_void = invoke->GetType() == DataType::Type::kVoid; + // We use requested specific registers to use as temps for void methods, as we don't return the + // value. + Location out_or_temp = + is_void ? (type == DataType::Type::kInt64 ? Location::RegisterPairLocation(EAX, EDX) : + Location::RegisterLocation(EAX)) : + locations->Out(); Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. Location offset = locations->InAt(2); // Long offset. Location arg = locations->InAt(3); // New value or addend. if (type == DataType::Type::kInt32) { - DCHECK(out.Equals(arg)); - Register out_reg = out.AsRegister<Register>(); + DCHECK(out_or_temp.Equals(arg)); + Register out_reg = out_or_temp.AsRegister<Register>(); Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0); if (get_and_update_op == GetAndUpdateOp::kAdd) { __ LockXaddl(field_address, out_reg); @@ -2621,7 +2638,7 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke, } else { DCHECK_EQ(type, DataType::Type::kReference); DCHECK(get_and_update_op == GetAndUpdateOp::kSet); - Register out_reg = out.AsRegister<Register>(); + Register out_reg = out_or_temp.AsRegister<Register>(); Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0); Register temp1 = locations->GetTemp(0).AsRegister<Register>(); Register temp2 = locations->GetTemp(1).AsRegister<Register>(); @@ -2650,8 +2667,10 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke, __ movl(temp1, out_reg); __ PoisonHeapReference(temp1); __ xchgl(temp1, field_address); - __ UnpoisonHeapReference(temp1); - __ movl(out_reg, temp1); + if (!is_void) { + __ UnpoisonHeapReference(temp1); + __ movl(out_reg, temp1); + } } else { __ xchgl(out_reg, field_address); } @@ -4237,9 +4256,13 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86* return; } + // Get the type from the shorty as the invokes may not return a value. uint32_t number_of_arguments = invoke->GetNumberOfArguments(); uint32_t value_index = number_of_arguments - 1; DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index); + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); if (DataType::Is64BitType(value_type)) { // We avoid the case of an Int64/Float64 value because we would need to place it in a register @@ -4266,10 +4289,17 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86* if (value_type == DataType::Type::kFloat32) { locations->AddTemp(Location::RegisterLocation(EAX)); locations->SetInAt(value_index, Location::FpuRegisterOrConstant(invoke->InputAt(value_index))); - locations->SetOut(Location::RequiresFpuRegister()); + // Only set the `out` register if it's needed. In the void case, we will not use `out`. + if (!is_void) { + locations->SetOut(Location::RequiresFpuRegister()); + } } else { locations->SetInAt(value_index, Location::RegisterLocation(EAX)); - locations->SetOut(Location::RegisterLocation(EAX)); + // Only set the `out` register if it's needed. In the void case we can still use EAX in the + // same manner as it is used an as `in` register. + if (!is_void) { + locations->SetOut(Location::RegisterLocation(EAX)); + } } } @@ -4283,6 +4313,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege // The value we want to set is the last argument uint32_t value_index = invoke->GetNumberOfArguments() - 1; Location value = locations->InAt(value_index); + // Get the type from the shorty as the invokes may not return a value. DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index); Register temp = locations->GetTemp(1).AsRegister<Register>(); Register temp2 = locations->GetTemp(2).AsRegister<Register>(); @@ -4307,24 +4338,36 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege // fields the object is in a separate register, it is safe to use the first temporary register. temp = expected_coordinates_count == 1u ? temp : locations->GetTemp(3).AsRegister<Register>(); // No need for a lock prefix. `xchg` has an implicit lock when it is used with an address. + + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); switch (value_type) { case DataType::Type::kBool: __ xchgb(value.AsRegister<ByteRegister>(), field_addr); - __ movzxb(locations->Out().AsRegister<Register>(), - locations->Out().AsRegister<ByteRegister>()); + if (!is_void) { + __ movzxb(locations->Out().AsRegister<Register>(), + locations->Out().AsRegister<ByteRegister>()); + } break; case DataType::Type::kInt8: __ xchgb(value.AsRegister<ByteRegister>(), field_addr); - __ movsxb(locations->Out().AsRegister<Register>(), - locations->Out().AsRegister<ByteRegister>()); + if (!is_void) { + __ movsxb(locations->Out().AsRegister<Register>(), + locations->Out().AsRegister<ByteRegister>()); + } break; case DataType::Type::kUint16: __ xchgw(value.AsRegister<Register>(), field_addr); - __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>()); + if (!is_void) { + __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>()); + } break; case DataType::Type::kInt16: __ xchgw(value.AsRegister<Register>(), field_addr); - __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>()); + if (!is_void) { + __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>()); + } break; case DataType::Type::kInt32: __ xchgl(value.AsRegister<Register>(), field_addr); @@ -4332,7 +4375,9 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege case DataType::Type::kFloat32: codegen->Move32(Location::RegisterLocation(EAX), value); __ xchgl(EAX, field_addr); - __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX); + if (!is_void) { + __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX); + } break; case DataType::Type::kReference: { if (codegen->EmitBakerReadBarrier()) { @@ -4353,10 +4398,13 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege __ movl(temp, value.AsRegister<Register>()); __ PoisonHeapReference(temp); __ xchgl(temp, field_addr); - __ UnpoisonHeapReference(temp); - __ movl(locations->Out().AsRegister<Register>(), temp); + if (!is_void) { + __ UnpoisonHeapReference(temp); + __ movl(locations->Out().AsRegister<Register>(), temp); + } } else { - __ xchgl(locations->Out().AsRegister<Register>(), field_addr); + DCHECK_IMPLIES(!is_void, locations->Out().Equals(Location::RegisterLocation(EAX))); + __ xchgl(Location::RegisterLocation(EAX).AsRegister<Register>(), field_addr); } break; } @@ -4590,6 +4638,7 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86* return; } + // Get the type from the shorty as the invokes may not return a value. // The last argument should be the value we intend to set. uint32_t value_index = invoke->GetNumberOfArguments() - 1; DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index); @@ -4615,15 +4664,26 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86* locations->AddTemp(Location::RequiresRegister()); } + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); + if (DataType::IsFloatingPointType(value_type)) { locations->AddTemp(Location::RequiresFpuRegister()); locations->AddTemp(Location::RegisterLocation(EAX)); locations->SetInAt(value_index, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); + // Only set the `out` register if it's needed. In the void case, we do not use `out`. + if (!is_void) { + locations->SetOut(Location::RequiresFpuRegister()); + } } else { // xadd updates the register argument with the old value. ByteRegister required for xaddb. locations->SetInAt(value_index, Location::RegisterLocation(EAX)); - locations->SetOut(Location::RegisterLocation(EAX)); + // Only set the `out` register if it's needed. In the void case we can still use EAX in the + // same manner as it is used an as `in` register. + if (!is_void) { + locations->SetOut(Location::RegisterLocation(EAX)); + } } } @@ -4636,8 +4696,11 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codege LocationSummary* locations = invoke->GetLocations(); uint32_t number_of_arguments = invoke->GetNumberOfArguments(); uint32_t value_index = number_of_arguments - 1; + // Get the type from the shorty as the invokes may not return a value. DataType::Type type = GetDataTypeFromShorty(invoke, value_index); - DCHECK_EQ(type, invoke->GetType()); + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == type); Location value_loc = locations->InAt(value_index); Register temp = locations->GetTemp(0).AsRegister<Register>(); SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); @@ -4659,16 +4722,22 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codege switch (type) { case DataType::Type::kInt8: __ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>()); - __ movsxb(locations->Out().AsRegister<Register>(), - locations->Out().AsRegister<ByteRegister>()); + if (!is_void) { + __ movsxb(locations->Out().AsRegister<Register>(), + locations->Out().AsRegister<ByteRegister>()); + } break; case DataType::Type::kInt16: __ LockXaddw(field_addr, value_loc.AsRegister<Register>()); - __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>()); + if (!is_void) { + __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>()); + } break; case DataType::Type::kUint16: __ LockXaddw(field_addr, value_loc.AsRegister<Register>()); - __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>()); + if (!is_void) { + __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>()); + } break; case DataType::Type::kInt32: __ LockXaddl(field_addr, value_loc.AsRegister<Register>()); @@ -4693,8 +4762,10 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codege temp); __ j(kNotZero, &try_again); - // The old value is present in EAX. - codegen->Move32(locations->Out(), eax); + if (!is_void) { + // The old value is present in EAX. + codegen->Move32(locations->Out(), eax); + } break; } default: @@ -4740,9 +4811,11 @@ static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGenerat return; } + // Get the type from the shorty as the invokes may not return a value. // The last argument should be the value we intend to set. uint32_t value_index = invoke->GetNumberOfArguments() - 1; - if (DataType::Is64BitType(GetDataTypeFromShorty(invoke, value_index))) { + DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index); + if (DataType::Is64BitType(value_type)) { // We avoid the case of an Int64 value because we would need to place it in a register pair. // If the slow path is taken, the ParallelMove might fail to move the pair according to the // X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from @@ -4767,7 +4840,18 @@ static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGenerat } locations->SetInAt(value_index, Location::RegisterOrConstant(invoke->InputAt(value_index))); - locations->SetOut(Location::RegisterLocation(EAX)); + + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); + if (is_void) { + // Used as a temporary, even when we are not outputting it so reserve it. This has to be + // requested before the other temporary since there's variable number of temp registers and the + // other temp register is expected to be the last one. + locations->AddTemp(Location::RegisterLocation(EAX)); + } else { + locations->SetOut(Location::RegisterLocation(EAX)); + } } static void GenerateBitwiseOp(HInvoke* invoke, @@ -4805,9 +4889,12 @@ static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); + // Get the type from the shorty as the invokes may not return a value. uint32_t value_index = invoke->GetNumberOfArguments() - 1; DataType::Type type = GetDataTypeFromShorty(invoke, value_index); - DCHECK_EQ(type, invoke->GetType()); + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == type); Register temp = locations->GetTemp(0).AsRegister<Register>(); SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); codegen->AddSlowPath(slow_path); @@ -4826,8 +4913,9 @@ static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* DCHECK_NE(temp, reference); Address field_addr(reference, offset, TIMES_1, 0); - Register out = locations->Out().AsRegister<Register>(); - DCHECK_EQ(out, EAX); + Location eax_loc = Location::RegisterLocation(EAX); + Register eax = eax_loc.AsRegister<Register>(); + DCHECK_IMPLIES(!is_void, locations->Out().Equals(eax_loc)); if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrRelease || invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorRelease || @@ -4838,12 +4926,12 @@ static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* NearLabel try_again; __ Bind(&try_again); // Place the expected value in EAX for cmpxchg - codegen->LoadFromMemoryNoBarrier(type, locations->Out(), field_addr); + codegen->LoadFromMemoryNoBarrier(type, eax_loc, field_addr); codegen->Move32(locations->GetTemp(0), locations->InAt(value_index)); - GenerateBitwiseOp(invoke, codegen, temp, out); + GenerateBitwiseOp(invoke, codegen, temp, eax); GenPrimitiveLockedCmpxchg(type, codegen, - /* expected_value= */ locations->Out(), + /* expected_value= */ eax_loc, /* new_value= */ locations->GetTemp(0), reference, offset); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 6424201c4b..ba254ee705 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -27,8 +27,8 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "heap_poisoning.h" -#include "intrinsics.h" #include "intrinsic_objects.h" +#include "intrinsics.h" #include "intrinsics_utils.h" #include "lock_word.h" #include "mirror/array-inl.h" @@ -36,6 +36,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "optimizing/code_generator.h" +#include "optimizing/data_type.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" #include "utils/x86_64/assembler_x86_64.h" @@ -2633,7 +2634,11 @@ static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator, // Use the same register for both the output and the new value or addend // to take advantage of XCHG or XADD. Arbitrarily pick RAX. locations->SetInAt(3, Location::RegisterLocation(RAX)); - locations->SetOut(Location::RegisterLocation(RAX)); + // Only set the `out` register if it's needed. In the void case we can still use RAX in the + // same manner as it is used an as `in` register. + if (invoke->GetType() != DataType::Type::kVoid) { + locations->SetOut(Location::RegisterLocation(RAX)); + } } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) { @@ -2697,25 +2702,29 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke, X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); LocationSummary* locations = invoke->GetLocations(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // Result. - CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); // Object pointer. - CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); // Long offset. - DCHECK_EQ(out, locations->InAt(3).AsRegister<CpuRegister>()); // New value or addend. + const bool is_void = invoke->GetType() == DataType::Type::kVoid; + Location rax_loc = Location::RegisterLocation(RAX); + // We requested RAX to use as a temporary for void methods, as we don't return the value. + DCHECK_IMPLIES(!is_void, locations->Out().Equals(rax_loc)); + CpuRegister out_or_temp = rax_loc.AsRegister<CpuRegister>(); // Result. + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); // Object pointer. + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); // Long offset. + DCHECK_EQ(out_or_temp, locations->InAt(3).AsRegister<CpuRegister>()); // New value or addend. Address field_address(base, offset, TIMES_1, 0); if (type == DataType::Type::kInt32) { if (get_and_update_op == GetAndUpdateOp::kAdd) { - __ LockXaddl(field_address, out); + __ LockXaddl(field_address, out_or_temp); } else { DCHECK(get_and_update_op == GetAndUpdateOp::kSet); - __ xchgl(out, field_address); + __ xchgl(out_or_temp, field_address); } } else if (type == DataType::Type::kInt64) { if (get_and_update_op == GetAndUpdateOp::kAdd) { - __ LockXaddq(field_address, out); + __ LockXaddq(field_address, out_or_temp); } else { DCHECK(get_and_update_op == GetAndUpdateOp::kSet); - __ xchgq(out, field_address); + __ xchgq(out_or_temp, field_address); } } else { DCHECK_EQ(type, DataType::Type::kReference); @@ -2740,18 +2749,20 @@ static void GenUnsafeGetAndUpdate(HInvoke* invoke, // Mark card for object as a new value shall be stored. bool new_value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out, new_value_can_be_null); + codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out_or_temp, new_value_can_be_null); if (kPoisonHeapReferences) { // Use a temp to avoid poisoning base of the field address, which might happen if `out` // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`). - __ movl(temp1, out); + __ movl(temp1, out_or_temp); __ PoisonHeapReference(temp1); __ xchgl(temp1, field_address); - __ UnpoisonHeapReference(temp1); - __ movl(out, temp1); + if (!is_void) { + __ UnpoisonHeapReference(temp1); + __ movl(out_or_temp, temp1); + } } else { - __ xchgl(out, field_address); + __ xchgl(out_or_temp, field_address); } } } @@ -4487,23 +4498,33 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86_ return; } + // Get the type from the shorty as the invokes may not return a value. uint32_t number_of_arguments = invoke->GetNumberOfArguments(); uint32_t new_value_index = number_of_arguments - 1; - DataType::Type type = invoke->GetType(); - DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index)); + DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index); + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); LocationSummary* locations = CreateVarHandleCommonLocations(invoke); - if (DataType::IsFloatingPointType(type)) { - locations->SetOut(Location::RequiresFpuRegister()); + if (DataType::IsFloatingPointType(value_type)) { + // Only set the `out` register if it's needed. In the void case we don't use `out`. + if (!is_void) { + locations->SetOut(Location::RequiresFpuRegister()); + } // A temporary is needed to load the new floating-point value into a register for XCHG. locations->AddTemp(Location::RequiresRegister()); } else { - // Use the same register for both the new value and output to take advantage of XCHG. - // It doesn't have to be RAX, but we need to choose some to make sure it's the same. - locations->SetOut(Location::RegisterLocation(RAX)); + // Only set the `out` register if it's needed. In the void case we can still use RAX in the + // same manner as it is used an as `in` register. + if (!is_void) { + // Use the same register for both the new value and output to take advantage of XCHG. + // It doesn't have to be RAX, but we need to choose some to make sure it's the same. + locations->SetOut(Location::RegisterLocation(RAX)); + } locations->SetInAt(new_value_index, Location::RegisterLocation(RAX)); - if (type == DataType::Type::kReference) { + if (value_type == DataType::Type::kReference) { // Need two temporaries for MarkGCCard. locations->AddRegisterTemps(2); if (codegen->EmitReadBarrier()) { @@ -4526,6 +4547,9 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, LocationSummary* locations = invoke->GetLocations(); Location out = locations->Out(); uint32_t temp_count = locations->GetTempCount(); + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == type); if (DataType::IsFloatingPointType(type)) { // `getAndSet` for floating-point types: move the new FP value into a register, atomically @@ -4545,7 +4569,9 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, if (byte_swap) { codegen->GetInstructionCodegen()->Bswap(temp, bswap_type); } - __ movd(out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit); + if (!is_void) { + __ movd(out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit); + } } else if (type == DataType::Type::kReference) { // `getAndSet` for references: load reference and atomically exchange it with the field. // Output register is the same as the one holding new value, so no need to move the result. @@ -4568,15 +4594,17 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, } codegen->MarkGCCard(temp1, temp2, ref); - DCHECK_EQ(valreg, out.AsRegister<CpuRegister>()); + DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>()); if (kPoisonHeapReferences) { // Use a temp to avoid poisoning base of the field address, which might happen if `valreg` is // the same as `target.object` (for code like `vh.getAndSet(obj, obj)`). __ movl(temp1, valreg); __ PoisonHeapReference(temp1); __ xchgl(temp1, field_addr); - __ UnpoisonHeapReference(temp1); - __ movl(valreg, temp1); + if (!is_void) { + __ UnpoisonHeapReference(temp1); + __ movl(valreg, temp1); + } } else { __ xchgl(valreg, field_addr); } @@ -4587,24 +4615,32 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, codegen->GetInstructionCodegen()->Bswap(value, type); } CpuRegister valreg = value.AsRegister<CpuRegister>(); - DCHECK_EQ(valreg, out.AsRegister<CpuRegister>()); + DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>()); switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: __ xchgb(valreg, field_addr); - __ movzxb(valreg, valreg); + if (!is_void) { + __ movzxb(valreg, valreg); + } break; case DataType::Type::kInt8: __ xchgb(valreg, field_addr); - __ movsxb(valreg, valreg); + if (!is_void) { + __ movsxb(valreg, valreg); + } break; case DataType::Type::kUint16: __ xchgw(valreg, field_addr); - __ movzxw(valreg, valreg); + if (!is_void) { + __ movzxw(valreg, valreg); + } break; case DataType::Type::kInt16: __ xchgw(valreg, field_addr); - __ movsxw(valreg, valreg); + if (!is_void) { + __ movsxw(valreg, valreg); + } break; case DataType::Type::kInt32: case DataType::Type::kUint32: @@ -4629,25 +4665,36 @@ static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGenerat return; } + // Get the type from the shorty as the invokes may not return a value. uint32_t number_of_arguments = invoke->GetNumberOfArguments(); uint32_t new_value_index = number_of_arguments - 1; - DataType::Type type = invoke->GetType(); - DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index)); + DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index); + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); LocationSummary* locations = CreateVarHandleCommonLocations(invoke); - DCHECK_NE(DataType::Type::kReference, type); - DCHECK(!DataType::IsFloatingPointType(type)); + DCHECK_NE(DataType::Type::kReference, value_type); + DCHECK(!DataType::IsFloatingPointType(value_type)); + if (!is_void) { + // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary. + locations->SetOut(Location::RegisterLocation(RAX)); + } else { + // Used as a temporary, even when we are not outputting it so reserve it. This has to be + // requested before the other temporary since there's variable number of temp registers and the + // other temp register is expected to be the last one. + locations->AddTemp(Location::RegisterLocation(RAX)); + } // A temporary to compute the bitwise operation on the old and the new values. locations->AddTemp(Location::RequiresRegister()); // We need value to be either in a register, or a 32-bit constant (as there are no arithmetic // instructions that accept 64-bit immediate on x86_64). - locations->SetInAt(new_value_index, DataType::Is64BitType(type) - ? Location::RequiresRegister() - : Location::RegisterOrConstant(invoke->InputAt(new_value_index))); - // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary. - locations->SetOut(Location::RegisterLocation(RAX)); + locations->SetInAt(new_value_index, + DataType::Is64BitType(value_type) ? + Location::RequiresRegister() : + Location::RegisterOrConstant(invoke->InputAt(new_value_index))); } static void GenerateVarHandleGetAndOp(HInvoke* invoke, @@ -4659,8 +4706,9 @@ static void GenerateVarHandleGetAndOp(HInvoke* invoke, bool byte_swap) { X86_64Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); - Location temp_loc = locations->GetTemp(locations->GetTempCount() - 1); - Location rax_loc = locations->Out(); + Location temp_loc = locations->GetTemp(locations->GetTempCount() - 1u); + Location rax_loc = Location::RegisterLocation(RAX); + DCHECK_IMPLIES(invoke->GetType() != DataType::Type::kVoid, locations->Out().Equals(rax_loc)); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); CpuRegister rax = rax_loc.AsRegister<CpuRegister>(); DCHECK_EQ(rax.AsRegister(), RAX); @@ -4768,15 +4816,21 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_ return; } + // Get the type from the shorty as the invokes may not return a value. uint32_t number_of_arguments = invoke->GetNumberOfArguments(); uint32_t new_value_index = number_of_arguments - 1; - DataType::Type type = invoke->GetType(); - DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index)); + DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index); + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == value_type); LocationSummary* locations = CreateVarHandleCommonLocations(invoke); - if (DataType::IsFloatingPointType(type)) { - locations->SetOut(Location::RequiresFpuRegister()); + if (DataType::IsFloatingPointType(value_type)) { + // Only set the `out` register if it's needed. In the void case we don't use `out` + if (!is_void) { + locations->SetOut(Location::RequiresFpuRegister()); + } // Require that the new FP value is in a register (and not a constant) for ADDSS/ADDSD. locations->SetInAt(new_value_index, Location::RequiresFpuRegister()); // CMPXCHG clobbers RAX. @@ -4786,11 +4840,15 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_ // A temporary to hold the new value for CMPXCHG. locations->AddTemp(Location::RequiresRegister()); } else { - DCHECK_NE(type, DataType::Type::kReference); - // Use the same register for both the new value and output to take advantage of XADD. - // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls - // back to GenerateVarHandleGetAndOp that expects out in RAX. - locations->SetOut(Location::RegisterLocation(RAX)); + DCHECK_NE(value_type, DataType::Type::kReference); + // Only set the `out` register if it's needed. In the void case we can still use RAX in the + // same manner as it is used an as `in` register. + if (!is_void) { + // Use the same register for both the new value and output to take advantage of XADD. + // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls + // back to GenerateVarHandleGetAndOp that expects out in RAX. + locations->SetOut(Location::RegisterLocation(RAX)); + } locations->SetInAt(new_value_index, Location::RegisterLocation(RAX)); if (GetExpectedVarHandleCoordinatesCount(invoke) == 2) { // For byte array views with non-native endianness we need extra BSWAP operations, so we @@ -4814,6 +4872,10 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke, Location out = locations->Out(); uint32_t temp_count = locations->GetTempCount(); + DataType::Type return_type = invoke->GetType(); + const bool is_void = return_type == DataType::Type::kVoid; + DCHECK_IMPLIES(!is_void, return_type == type); + if (DataType::IsFloatingPointType(type)) { if (byte_swap) { // This code should never be executed: it is the case of a byte array view (since it requires @@ -4876,7 +4938,9 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke, if (byte_swap) { codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type); } - __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit); + if (!is_void) { + __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit); + } } else { if (byte_swap) { // We cannot use XADD since we need to byte-swap the old value when reading it from memory, @@ -4893,24 +4957,32 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke, // the old value to the field. Output register is the same as the one holding new value. Do // sign extend / zero extend as needed. CpuRegister valreg = value.AsRegister<CpuRegister>(); - DCHECK_EQ(valreg, out.AsRegister<CpuRegister>()); + DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>()); switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: __ LockXaddb(field_addr, valreg); - __ movzxb(valreg, valreg); + if (!is_void) { + __ movzxb(valreg, valreg); + } break; case DataType::Type::kInt8: __ LockXaddb(field_addr, valreg); - __ movsxb(valreg, valreg); + if (!is_void) { + __ movsxb(valreg, valreg); + } break; case DataType::Type::kUint16: __ LockXaddw(field_addr, valreg); - __ movzxw(valreg, valreg); + if (!is_void) { + __ movzxw(valreg, valreg); + } break; case DataType::Type::kInt16: __ LockXaddw(field_addr, valreg); - __ movsxw(valreg, valreg); + if (!is_void) { + __ movsxw(valreg, valreg); + } break; case DataType::Type::kInt32: case DataType::Type::kUint32: @@ -4939,9 +5011,10 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, X86_64Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); + // Get the type from the shorty as the invokes may not return a value. uint32_t number_of_arguments = invoke->GetNumberOfArguments(); Location value = locations->InAt(number_of_arguments - 1); - DataType::Type type = invoke->GetType(); + DataType::Type type = GetDataTypeFromShorty(invoke, number_of_arguments - 1); VarHandleSlowPathX86_64* slow_path = nullptr; VarHandleTarget target = GetVarHandleTarget(invoke); @@ -5199,7 +5272,7 @@ void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>(); CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>(); - CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1).AsRegister<CpuRegister>(); + CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1u).AsRegister<CpuRegister>(); MemberOffset class_offset = mirror::Object::ClassOffset(); MemberOffset array_length_offset = mirror::Array::LengthOffset(); |