diff options
author | 2023-10-31 10:02:46 +0000 | |
---|---|---|
committer | 2023-11-09 15:27:28 +0000 | |
commit | af06af4b126455a0050ff6447ad700ea073d3dc7 (patch) | |
tree | 1c437f576fe17cae20416890d566a0f4d001f88e | |
parent | b3564e613cb4ea438ffacf9bb2b8001dc54fc797 (diff) |
riscv64: Implement VarHandle.compareAndSet/-Exchange intrinsics.
Test: testrunner.py --target --64 --ndebug --optimizing
Bug: 283082089
Change-Id: I8016cb046d1fbaa5ffe71917a4cce685dfc65f02
-rw-r--r-- | compiler/optimizing/code_generator_riscv64.cc | 107 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_riscv64.h | 54 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_riscv64.cc | 722 | ||||
-rw-r--r-- | compiler/optimizing/locations.h | 16 |
5 files changed, 818 insertions, 83 deletions
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc index c7d5ecfb4d..698bf434d4 100644 --- a/compiler/optimizing/code_generator_riscv64.cc +++ b/compiler/optimizing/code_generator_riscv64.cc @@ -1252,16 +1252,34 @@ void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadTwoRegisters( } } -void InstructionCodeGeneratorRISCV64::GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - XRegister obj, - uint32_t offset, - ReadBarrierOption read_barrier_option, - Riscv64Label* label_low) { +SlowPathCodeRISCV64* CodeGeneratorRISCV64::AddGcRootBakerBarrierBarrierSlowPath( + HInstruction* instruction, Location root, Location temp) { + SlowPathCodeRISCV64* slow_path = + new (GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64(instruction, root, temp); + AddSlowPath(slow_path); + return slow_path; +} + +void CodeGeneratorRISCV64::EmitBakerReadBarierMarkingCheck( + SlowPathCodeRISCV64* slow_path, Location root, Location temp) { + const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(root); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Loadd(temp.AsRegister<XRegister>(), TR, entry_point_offset); + __ Bnez(temp.AsRegister<XRegister>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorRISCV64::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + XRegister obj, + uint32_t offset, + ReadBarrierOption read_barrier_option, + Riscv64Label* label_low) { DCHECK_IMPLIES(label_low != nullptr, offset == kLinkTimeOffsetPlaceholderLow) << offset; XRegister root_reg = root.AsRegister<XRegister>(); if (read_barrier_option == kWithReadBarrier) { - DCHECK(codegen_->EmitReadBarrier()); + DCHECK(EmitReadBarrier()); if (kUseBakerReadBarrier) { // Note that we do not actually check the value of `GetIsGcMarking()` // to decide whether to mark the loaded GC root or not. Instead, we @@ -1293,19 +1311,11 @@ void InstructionCodeGeneratorRISCV64::GenerateGcRootFieldLoad(HInstruction* inst "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path marking the GC root `root`. - XRegister tmp = RA; // Use RA as temp. It is clobbered in the slow path anyway. + // Use RA as temp. It is clobbered in the slow path anyway. + Location temp = Location::RegisterLocation(RA); SlowPathCodeRISCV64* slow_path = - new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64( - instruction, root, Location::RegisterLocation(tmp)); - codegen_->AddSlowPath(slow_path); - - const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(root); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ Loadd(tmp, TR, entry_point_offset); - __ Bnez(tmp, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + AddGcRootBakerBarrierBarrierSlowPath(instruction, root, temp); + EmitBakerReadBarierMarkingCheck(slow_path, root, temp); } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -1315,7 +1325,7 @@ void InstructionCodeGeneratorRISCV64::GenerateGcRootFieldLoad(HInstruction* inst } __ AddConst32(root_reg, obj, offset); // /* mirror::Object* */ root = root->Read() - codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + GenerateReadBarrierForRootSlow(instruction, root, root); } } else { // Plain GC root load with no read barrier. @@ -1917,6 +1927,22 @@ void CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier(HInstructio __ Bind(slow_path->GetExitLabel()); } +SlowPathCodeRISCV64* CodeGeneratorRISCV64::AddReadBarrierSlowPath(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + UNUSED(instruction); + UNUSED(out); + UNUSED(ref); + UNUSED(obj); + UNUSED(offset); + UNUSED(index); + LOG(FATAL) << "Unimplemented"; + UNREACHABLE(); +} + void CodeGeneratorRISCV64::GenerateReadBarrierSlow(HInstruction* instruction, Location out, Location ref, @@ -4241,11 +4267,11 @@ void InstructionCodeGeneratorRISCV64::VisitLoadClass(HLoadClass* instruction) DCHECK(!instruction->MustGenerateClinitCheck()); // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ XRegister current_method = locations->InAt(0).AsRegister<XRegister>(); - GenerateGcRootFieldLoad(instruction, - out_loc, - current_method, - ArtMethod::DeclaringClassOffset().Int32Value(), - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(instruction, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + read_barrier_option); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { @@ -4275,12 +4301,12 @@ void InstructionCodeGeneratorRISCV64::VisitLoadClass(HLoadClass* instruction) codegen_->EmitPcRelativeAuipcPlaceholder(bss_info_high, out); CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewTypeBssEntryPatch( instruction, bss_info_high); - GenerateGcRootFieldLoad(instruction, - out_loc, - out, - /* offset= */ kLinkTimeOffsetPlaceholderLow, - read_barrier_option, - &info_low->label); + codegen_->GenerateGcRootFieldLoad(instruction, + out_loc, + out, + /* offset= */ kLinkTimeOffsetPlaceholderLow, + read_barrier_option, + &info_low->label); generate_null_check = true; break; } @@ -4295,7 +4321,8 @@ void InstructionCodeGeneratorRISCV64::VisitLoadClass(HLoadClass* instruction) __ Loadwu(out, codegen_->DeduplicateJitClassLiteral(instruction->GetDexFile(), instruction->GetTypeIndex(), instruction->GetClass())); - GenerateGcRootFieldLoad(instruction, out_loc, out, /* offset= */ 0, read_barrier_option); + codegen_->GenerateGcRootFieldLoad( + instruction, out_loc, out, /* offset= */ 0, read_barrier_option); break; case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: @@ -4405,12 +4432,12 @@ void InstructionCodeGeneratorRISCV64::VisitLoadString(HLoadString* instruction) codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out); CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewStringBssEntryPatch( instruction->GetDexFile(), instruction->GetStringIndex(), info_high); - GenerateGcRootFieldLoad(instruction, - out_loc, - out, - /* offset= */ kLinkTimeOffsetPlaceholderLow, - codegen_->GetCompilerReadBarrierOption(), - &info_low->label); + codegen_->GenerateGcRootFieldLoad(instruction, + out_loc, + out, + /* offset= */ kLinkTimeOffsetPlaceholderLow, + codegen_->GetCompilerReadBarrierOption(), + &info_low->label); SlowPathCodeRISCV64* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathRISCV64(instruction); codegen_->AddSlowPath(slow_path); @@ -4429,7 +4456,7 @@ void InstructionCodeGeneratorRISCV64::VisitLoadString(HLoadString* instruction) out, codegen_->DeduplicateJitStringLiteral( instruction->GetDexFile(), instruction->GetStringIndex(), instruction->GetString())); - GenerateGcRootFieldLoad( + codegen_->GenerateGcRootFieldLoad( instruction, out_loc, out, 0, codegen_->GetCompilerReadBarrierOption()); return; default: @@ -5188,7 +5215,7 @@ void InstructionCodeGeneratorRISCV64::VisitTypeConversion(HTypeConversion* instr XRegister src = locations->InAt(0).AsRegister<XRegister>(); switch (result_type) { case DataType::Type::kUint8: - __ Andi(dst, src, 0xFF); + __ ZextB(dst, src); break; case DataType::Type::kInt8: __ SextB(dst, src); diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h index 41fdae30df..648800ce06 100644 --- a/compiler/optimizing/code_generator_riscv64.h +++ b/compiler/optimizing/code_generator_riscv64.h @@ -161,10 +161,6 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength = V(CRC32UpdateByteBuffer) \ V(MethodHandleInvokeExact) \ V(MethodHandleInvoke) \ - V(VarHandleCompareAndExchange) \ - V(VarHandleCompareAndExchangeAcquire) \ - V(VarHandleCompareAndExchangeRelease) \ - V(VarHandleCompareAndSet) \ V(VarHandleGetAndAdd) \ V(VarHandleGetAndAddAcquire) \ V(VarHandleGetAndAddRelease) \ @@ -179,11 +175,7 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength = V(VarHandleGetAndBitwiseXorRelease) \ V(VarHandleGetAndSet) \ V(VarHandleGetAndSetAcquire) \ - V(VarHandleGetAndSetRelease) \ - V(VarHandleWeakCompareAndSet) \ - V(VarHandleWeakCompareAndSetAcquire) \ - V(VarHandleWeakCompareAndSetPlain) \ - V(VarHandleWeakCompareAndSetRelease) + V(VarHandleGetAndSetRelease) // Method register on invoke. static const XRegister kArtMethodRegister = A0; @@ -374,18 +366,6 @@ class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator { void ShNAdd(XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type); - // Generate a GC root reference load: - // - // root <- *(obj + offset) - // - // while honoring read barriers (if any). - void GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - XRegister obj, - uint32_t offset, - ReadBarrierOption read_barrier_option, - Riscv64Label* label_low = nullptr); - void Load(Location out, XRegister rs1, int32_t offset, DataType::Type type); void Store(Location value, XRegister rs1, int32_t offset, DataType::Type type); @@ -736,6 +716,26 @@ class CodeGeneratorRISCV64 : public CodeGenerator { bool CanUseImplicitSuspendCheck() const; + // Create slow path for a Baker read barrier for a GC root load within `instruction`. + SlowPathCodeRISCV64* AddGcRootBakerBarrierBarrierSlowPath( + HInstruction* instruction, Location root, Location temp); + + // Emit marking check for a Baker read barrier for a GC root load within `instruction`. + void EmitBakerReadBarierMarkingCheck( + SlowPathCodeRISCV64* slow_path, Location root, Location temp); + + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + XRegister obj, + uint32_t offset, + ReadBarrierOption read_barrier_option, + Riscv64Label* label_low = nullptr); + // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -763,6 +763,18 @@ class CodeGeneratorRISCV64 : public CodeGenerator { Location temp, bool needs_null_check); + // Create slow path for a read barrier for a heap reference within `instruction`. + // + // This is a helper function for GenerateReadBarrierSlow() that has the same + // arguments. The creation and adding of the slow path is exposed for intrinsics + // that cannot use GenerateReadBarrierSlow() from their own slow paths. + SlowPathCodeRISCV64* AddReadBarrierSlowPath(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index); + // Generate a read barrier for a heap reference within `instruction` // using a slow path. // diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 9648bb0794..70b7dcaa02 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1261,7 +1261,7 @@ static void GenerateCompareAndSet(CodeGeneratorARM64* codegen, // } // // Flag Z indicates whether `old_value == expected || old_value == expected2`. - // (Is `expected2` is not valid, the `old_value == expected2` part is not emitted.) + // (If `expected2` is not valid, the `old_value == expected2` part is not emitted.) vixl::aarch64::Label loop_head; if (strong) { diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc index 4bdc9d550e..6c11e4a4d7 100644 --- a/compiler/optimizing/intrinsics_riscv64.cc +++ b/compiler/optimizing/intrinsics_riscv64.cc @@ -521,6 +521,296 @@ void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) { GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); } +static void EmitLoadReserved(Riscv64Assembler* assembler, + DataType::Type type, + XRegister ptr, + XRegister old_value, + AqRl aqrl) { + switch (type) { + case DataType::Type::kInt32: + __ LrW(old_value, ptr, aqrl); + break; + case DataType::Type::kReference: + __ LrW(old_value, ptr, aqrl); + // TODO(riscv64): The `ZextW()` macro currently emits `SLLI+SRLI` which are from the + // base "I" instruction set. When the assembler is updated to use a single-instruction + // `ZextW()` macro, either the ADD.UW, or the C.ZEXT.W (16-bit encoding), we need to + // rewrite this to avoid these non-"I" instructions. We could, for example, sign-extend + // the reference and do the CAS as `Int32`. + __ ZextW(old_value, old_value); + break; + case DataType::Type::kInt64: + __ LrD(old_value, ptr, aqrl); + break; + default: + LOG(FATAL) << "Unexpected type: " << type; + UNREACHABLE(); + } +} + +static void EmitStoreConditional(Riscv64Assembler* assembler, + DataType::Type type, + XRegister ptr, + XRegister store_result, + XRegister to_store, + AqRl aqrl) { + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kReference: + __ ScW(store_result, to_store, ptr, aqrl); + break; + case DataType::Type::kInt64: + __ ScD(store_result, to_store, ptr, aqrl); + break; + default: + LOG(FATAL) << "Unexpected type: " << type; + UNREACHABLE(); + } +} + +static void GenerateCompareAndSet(Riscv64Assembler* assembler, + DataType::Type type, + std::memory_order order, + bool strong, + Riscv64Label* cmp_failure, + XRegister ptr, + XRegister new_value, + XRegister old_value, + XRegister mask, + XRegister masked, + XRegister store_result, + XRegister expected, + XRegister expected2 = kNoXRegister) { + DCHECK(!DataType::IsFloatingPointType(type)); + DCHECK_GE(DataType::Size(type), 4u); + + // The `expected2` is valid only for reference slow path and represents the unmarked old value + // from the main path attempt to emit CAS when the marked old value matched `expected`. + DCHECK_IMPLIES(expected2 != kNoXRegister, type == DataType::Type::kReference); + + AqRl load_aqrl = AqRl::kNone; + AqRl store_aqrl = AqRl::kNone; + if (order == std::memory_order_acquire) { + load_aqrl = AqRl::kAcquire; + } else if (order == std::memory_order_release) { + store_aqrl = AqRl::kRelease; + } else if (order == std::memory_order_seq_cst) { + load_aqrl = AqRl::kAqRl; + store_aqrl = AqRl::kRelease; + } else { + DCHECK(order == std::memory_order_relaxed); + } + + // repeat: { + // old_value = [ptr]; // Load exclusive. + // cmp_value = old_value & mask; // Extract relevant bits if applicable. + // if (cmp_value != expected && cmp_value != expected2) goto cmp_failure; + // store_result = failed([ptr] <- new_value); // Store exclusive. + // } + // if (strong) { + // if (store_result) goto repeat; // Repeat until compare fails or store exclusive succeeds. + // } else { + // store_result = store_result ^ 1; // Report success as 1, failure as 0. + // } + // + // (If `mask` is not valid, `expected` is compared with `old_value` instead of `cmp_value`.) + // (If `expected2` is not valid, the `cmp_value == expected2` part is not emitted.) + + // Note: We're using "bare" local branches to enforce that they shall not be expanded + // and the scrach register `TMP` shall not be clobbered if taken. Taking the branch to + // `cmp_failure` can theoretically clobber `TMP` (if outside the 1 MiB range). + Riscv64Label loop; + if (strong) { + __ Bind(&loop); + } + EmitLoadReserved(assembler, type, ptr, old_value, load_aqrl); + XRegister to_store = new_value; + if (mask != kNoXRegister) { + DCHECK_EQ(expected2, kNoXRegister); + DCHECK_NE(masked, kNoXRegister); + __ And(masked, old_value, mask); + __ Bne(masked, expected, cmp_failure); + // The `old_value` does not need to be preserved as the caller shall use `masked` + // to return the old value if needed. + to_store = old_value; + // TODO(riscv64): We could XOR the old and new value before the loop and use XOR here + // instead of the ANDN+OR. (The `new_value` is either Zero or a temporary we can clobber.) + __ Andn(to_store, old_value, mask); + __ Or(to_store, to_store, new_value); + } else if (expected2 != kNoXRegister) { + Riscv64Label match2; + __ Beq(old_value, expected2, &match2, /*is_bare=*/ true); + __ Bne(old_value, expected, cmp_failure); + __ Bind(&match2); + } else { + __ Bne(old_value, expected, cmp_failure); + } + EmitStoreConditional(assembler, type, ptr, store_result, to_store, store_aqrl); + if (strong) { + __ Bnez(store_result, &loop, /*is_bare=*/ true); + } else { + // Flip the `store_result` register to indicate success by 1 and failure by 0. + __ Xori(store_result, store_result, 1); + } +} + +class ReadBarrierCasSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + ReadBarrierCasSlowPathRISCV64(HInvoke* invoke, + std::memory_order order, + bool strong, + XRegister base, + XRegister offset, + XRegister expected, + XRegister new_value, + XRegister old_value, + XRegister old_value_temp, + XRegister store_result, + bool update_old_value, + CodeGeneratorRISCV64* riscv64_codegen) + : SlowPathCodeRISCV64(invoke), + order_(order), + strong_(strong), + base_(base), + offset_(offset), + expected_(expected), + new_value_(new_value), + old_value_(old_value), + old_value_temp_(old_value_temp), + store_result_(store_result), + update_old_value_(update_old_value), + mark_old_value_slow_path_(nullptr), + update_old_value_slow_path_(nullptr) { + // We need to add slow paths now, it is too late when emitting slow path code. + Location old_value_loc = Location::RegisterLocation(old_value); + Location old_value_temp_loc = Location::RegisterLocation(old_value_temp); + if (kUseBakerReadBarrier) { + mark_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath( + invoke, old_value_temp_loc, kBakerReadBarrierTemp); + if (update_old_value_) { + update_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath( + invoke, old_value_loc, kBakerReadBarrierTemp); + } + } else { + Location base_loc = Location::RegisterLocation(base); + Location index = Location::RegisterLocation(offset); + mark_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath( + invoke, old_value_temp_loc, old_value_loc, base_loc, /*offset=*/ 0u, index); + if (update_old_value_) { + update_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath( + invoke, old_value_loc, old_value_temp_loc, base_loc, /*offset=*/ 0u, index); + } + } + } + + const char* GetDescription() const override { return "ReadBarrierCasSlowPathRISCV64"; } + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + Riscv64Assembler* assembler = riscv64_codegen->GetAssembler(); + __ Bind(GetEntryLabel()); + + // Mark the `old_value_` from the main path and compare with `expected_`. + DCHECK(mark_old_value_slow_path_ != nullptr); + if (kUseBakerReadBarrier) { + __ Mv(old_value_temp_, old_value_); + riscv64_codegen->EmitBakerReadBarierMarkingCheck(mark_old_value_slow_path_, + Location::RegisterLocation(old_value_temp_), + kBakerReadBarrierTemp); + } else { + __ J(mark_old_value_slow_path_->GetEntryLabel()); + __ Bind(mark_old_value_slow_path_->GetExitLabel()); + } + Riscv64Label move_marked_old_value; + __ Bne(old_value_temp_, expected_, update_old_value_ ? &move_marked_old_value : GetExitLabel()); + + // The `old_value` we have read did not match `expected` (which is always a to-space + // reference) but after the read barrier the marked to-space value matched, so the + // `old_value` must be a from-space reference to the same object. Do the same CAS loop + // as the main path but check for both `expected` and the unmarked old value + // representing the to-space and from-space references for the same object. + + ScratchRegisterScope srs(assembler); + XRegister tmp_ptr = srs.AllocateXRegister(); + XRegister store_result = + store_result_ != kNoXRegister ? store_result_ : srs.AllocateXRegister(); + + // Recalculate the `tmp_ptr` from main path potentially clobbered by the read barrier above + // or by an expanded conditional branch (clobbers `TMP` if beyond 1MiB). + __ Add(tmp_ptr, base_, offset_); + + Riscv64Label mark_old_value; + GenerateCompareAndSet(riscv64_codegen->GetAssembler(), + DataType::Type::kReference, + order_, + strong_, + /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(), + tmp_ptr, + new_value_, + /*old_value=*/ old_value_temp_, + /*mask=*/ kNoXRegister, + /*masked=*/ kNoXRegister, + store_result, + expected_, + /*expected2=*/ old_value_); + if (update_old_value_) { + // To reach this point, the `old_value_temp_` must be either a from-space or a to-space + // reference of the `expected_` object. Update the `old_value_` to the to-space reference. + __ Mv(old_value_, expected_); + } else if (strong_) { + // Load success value to the result register. + // `GenerateCompareAndSet()` does not emit code to indicate success for a strong CAS. + // TODO(riscv64): We could just jump to an identical instruction in the fast-path. + // This would require an additional label as we would have two different slow path exits. + __ Li(store_result, 1); + } + __ J(GetExitLabel()); + + if (update_old_value_) { + // TODO(riscv64): If we initially saw a from-space reference and then saw + // a different reference, can the latter be also a from-space reference? + // (Shouldn't every reference write store a to-space reference?) + DCHECK(update_old_value_slow_path_ != nullptr); + __ Bind(&mark_old_value); + if (kUseBakerReadBarrier) { + DCHECK(update_old_value_slow_path_ == nullptr); + __ Mv(old_value_, old_value_temp_); + riscv64_codegen->EmitBakerReadBarierMarkingCheck(update_old_value_slow_path_, + Location::RegisterLocation(old_value_), + kBakerReadBarrierTemp); + } else { + // Note: We could redirect the `failure` above directly to the entry label and bind + // the exit label in the main path, but the main path would need to access the + // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps. + __ J(update_old_value_slow_path_->GetEntryLabel()); + __ Bind(update_old_value_slow_path_->GetExitLabel()); + } + __ J(GetExitLabel()); + + __ Bind(&move_marked_old_value); + __ Mv(old_value_, old_value_temp_); + __ J(GetExitLabel()); + } + } + + private: + // Use RA as temp. It is clobbered in the slow path anyway. + static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA); + + std::memory_order order_; + bool strong_; + XRegister base_; + XRegister offset_; + XRegister expected_; + XRegister new_value_; + XRegister old_value_; + XRegister old_value_temp_; + XRegister store_result_; + bool update_old_value_; + SlowPathCodeRISCV64* mark_old_value_slow_path_; + SlowPathCodeRISCV64* update_old_value_slow_path_; +}; + enum class GetAndUpdateOp { kSet, kAdd, @@ -611,21 +901,20 @@ static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorRISCV64* codege ScratchRegisterScope srs(assembler); XRegister temp = srs.AllocateXRegister(); - // Note: The `type` can be `TMP`. Taken branches to `success` and `loop` should be near and never - // expand. Only the branch to `slow_path` can theoretically expand and clobber `TMP` when taken. - // (`TMP` is clobbered only if the target distance is at least 1MiB.) - // FIXME(riscv64): Use "bare" branches. (And add some assembler tests for them.) + // Note: The `type` can be `TMP`. We're using "bare" local branches to enforce that they shall + // not be expanded and the scrach register `TMP` shall not be clobbered if taken. Taking the + // branch to the slow path can theoretically clobber `TMP` (if outside the 1 MiB range). __ Loadwu(temp, object, class_offset.Int32Value()); codegen->MaybeUnpoisonHeapReference(temp); Riscv64Label loop; __ Bind(&loop); - __ Beq(type, temp, &success); + __ Beq(type, temp, &success, /*is_bare=*/ true); // We may not have another scratch register for `Loadwu()`. Use `Lwu()` directly. DCHECK(IsInt<12>(super_class_offset.Int32Value())); __ Lwu(temp, temp, super_class_offset.Int32Value()); codegen->MaybeUnpoisonHeapReference(temp); __ Beqz(temp, slow_path->GetEntryLabel()); - __ J(&loop); + __ J(&loop, /*is_bare=*/ true); __ Bind(&success); } @@ -927,7 +1216,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, __ Loadd(field, varhandle, art_field_offset.Int32Value()); __ Loadwu(target.offset, field, offset_offset.Int32Value()); if (expected_coordinates_count == 0u) { - codegen->GetInstructionVisitor()->GenerateGcRootFieldLoad( + codegen->GenerateGcRootFieldLoad( invoke, Location::RegisterLocation(target.object), field, @@ -1018,6 +1307,11 @@ static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorRISCV64* c CreateVarHandleCommonLocations(invoke, codegen); } +DataType::Type IntTypeForFloatingPointType(DataType::Type fp_type) { + DCHECK(DataType::IsFloatingPointType(fp_type)); + return (fp_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64; +} + static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorRISCV64* codegen, std::memory_order order, @@ -1067,8 +1361,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, DataType::Type load_type = type; if (byte_swap && DataType::IsFloatingPointType(type)) { load_loc = Location::RegisterLocation(target.offset); // Load to the offset temporary. - load_type = (type == DataType::Type::kFloat32) ? DataType::Type::kInt32 - : DataType::Type::kInt64; + load_type = IntTypeForFloatingPointType(type); } codegen->GetInstructionVisitor()->Load(load_loc, address, /*offset=*/ 0, load_type); if (type == DataType::Type::kReference) { @@ -1163,8 +1456,7 @@ static void GenerateVarHandleSet(HInvoke* invoke, // The offset is no longer needed, so reuse the offset temporary for the byte-swapped value. Location new_value = Location::RegisterLocation(target.offset); if (DataType::IsFloatingPointType(value_type)) { - value_type = (value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 - : DataType::Type::kInt64; + value_type = IntTypeForFloatingPointType(value_type); codegen->MoveLocation(new_value, value, value_type); value = new_value; } @@ -1226,14 +1518,418 @@ void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetVolatile(HInvoke* invoke) { GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst); } +static bool ScratchXRegisterNeeded(Location loc, DataType::Type type, bool byte_swap) { + if (loc.IsConstant()) { + DCHECK(loc.GetConstant()->IsZeroBitPattern()); + return false; + } + return DataType::IsFloatingPointType(type) || DataType::Size(type) < 4u || byte_swap; +} + +static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + bool return_success) { + VarHandleOptimizations optimizations(invoke); + if (optimizations.GetDoNotIntrinsify()) { + return; + } + + uint32_t expected_index = invoke->GetNumberOfArguments() - 2; + uint32_t new_value_index = invoke->GetNumberOfArguments() - 1; + DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index); + DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index)); + + if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { + // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores + // the passed reference and reloads it from the field. This breaks the read barriers + // in slow path in different ways. The marked old value may not actually be a to-space + // reference to the same object as `old_value`, breaking slow path assumptions. And + // for CompareAndExchange, marking the old value after comparison failure may actually + // return the reference to `expected`, erroneously indicating success even though we + // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084 + return; + } + + LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); + DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke)); + + if (codegen->EmitNonBakerReadBarrier()) { + // We need callee-save registers for both the class object and offset instead of + // the temporaries reserved in CreateVarHandleCommonLocations(). + static_assert(POPCOUNT(kRiscv64CalleeSaveRefSpills) >= 2u); + uint32_t first_callee_save = CTZ(kRiscv64CalleeSaveRefSpills); + uint32_t second_callee_save = CTZ(kRiscv64CalleeSaveRefSpills ^ (1u << first_callee_save)); + if (expected_index == 1u) { // For static fields. + DCHECK_EQ(locations->GetTempCount(), 2u); + DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister())); + DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save))); + locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save)); + } else { + DCHECK_EQ(locations->GetTempCount(), 1u); + DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister())); + locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save)); + } + } + + if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { + // Add a temporary for the `old_value_temp` in the slow path, `tmp_ptr` is scratch register. + locations->AddTemp(Location::RequiresRegister()); + } else { + Location expected = locations->InAt(expected_index); + Location new_value = locations->InAt(new_value_index); + size_t data_size = DataType::Size(value_type); + bool small = (data_size < 4u); + bool byte_swap = + (expected_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u); + bool fp = DataType::IsFloatingPointType(value_type); + size_t temps_needed = + // The offset temp is used for the `tmp_ptr`. + 1u + + // For small values, we need a temp for the `mask`, `masked` and maybe also for the `shift`. + (small ? (return_success ? 2u : 3u) : 0u) + + // Some cases need modified copies of `new_value` and `expected`. + (ScratchXRegisterNeeded(expected, value_type, byte_swap) ? 1u : 0u) + + (ScratchXRegisterNeeded(new_value, value_type, byte_swap) ? 1u : 0u) + + // We need a scratch register either for the old value or for the result of SC. + // If we need to return a floating point old value, we need a temp for each. + ((!return_success && fp) ? 2u : 1u); + size_t scratch_registers_available = 2u; + DCHECK_EQ(scratch_registers_available, + ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters()); + size_t old_temp_count = locations->GetTempCount(); + DCHECK_EQ(old_temp_count, (expected_index == 1u) ? 2u : 1u); + if (temps_needed > old_temp_count + scratch_registers_available) { + locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available)); + } + } +} + +static XRegister PrepareXRegister(CodeGeneratorRISCV64* codegen, + Location loc, + DataType::Type type, + XRegister shift, + XRegister mask, + bool byte_swap, + ScratchRegisterScope* srs) { + DCHECK_EQ(shift == kNoXRegister, mask == kNoXRegister); + DCHECK_EQ(shift == kNoXRegister, DataType::Size(type) >= 4u); + if (loc.IsConstant()) { + // The `shift`/`mask` and `byte_swap` are irrelevant for zero input. + DCHECK(loc.GetConstant()->IsZeroBitPattern()); + return Zero; + } + + Location result = loc; + if (DataType::IsFloatingPointType(type)) { + type = IntTypeForFloatingPointType(type); + result = Location::RegisterLocation(srs->AllocateXRegister()); + codegen->MoveLocation(result, loc, type); + loc = result; + } else if (byte_swap || shift != kNoXRegister) { + result = Location::RegisterLocation(srs->AllocateXRegister()); + } + if (byte_swap) { + if (type == DataType::Type::kInt16) { + type = DataType::Type::kUint16; // Do the masking as part of the byte swap. + } + GenerateReverseBytes(codegen->GetAssembler(), result, loc.AsRegister<XRegister>(), type); + loc = result; + } + if (shift != kNoXRegister) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + __ Sllw(result.AsRegister<XRegister>(), loc.AsRegister<XRegister>(), shift); + DCHECK_NE(type, DataType::Type::kUint8); + if (type != DataType::Type::kUint16 && type != DataType::Type::kBool) { + __ And(result.AsRegister<XRegister>(), result.AsRegister<XRegister>(), mask); + } + } + return result.AsRegister<XRegister>(); +} + static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorRISCV64* codegen, std::memory_order order, bool return_success, bool strong, bool byte_swap = false) { - UNUSED(invoke, codegen, order, return_success, strong, byte_swap); - LOG(FATAL) << "Unimplemented!"; + DCHECK(return_success || strong); + + uint32_t expected_index = invoke->GetNumberOfArguments() - 2; + uint32_t new_value_index = invoke->GetNumberOfArguments() - 1; + DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index); + DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index)); + + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + Location expected = locations->InAt(expected_index); + Location new_value = locations->InAt(new_value_index); + Location out = locations->Out(); + + VarHandleTarget target = GetVarHandleTarget(invoke); + VarHandleSlowPathRISCV64* slow_path = nullptr; + if (!byte_swap) { + slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type); + GenerateVarHandleTarget(invoke, target, codegen); + if (slow_path != nullptr) { + slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong); + __ Bind(slow_path->GetNativeByteOrderLabel()); + } + } + + // This needs to be before we allocate the scratch registers, as MarkGCCard also uses them. + if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) { + // Mark card for object assuming new value is stored. + bool new_value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(target.object, new_value.AsRegister<XRegister>(), new_value_can_be_null); + } + + // Scratch registers may be needed for `new_value` and `expected`. + ScratchRegisterScope srs(assembler); + DCHECK_EQ(srs.AvailableXRegisters(), 2u); + size_t available_scratch_registers = + (ScratchXRegisterNeeded(expected, value_type, byte_swap) ? 0u : 1u) + + (ScratchXRegisterNeeded(new_value, value_type, byte_swap) ? 0u : 1u); + + // Reuse the `offset` temporary for the pointer to the target location, + // except for references that need the offset for the read barrier. + DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>()); + size_t next_temp = 1u; + XRegister tmp_ptr = target.offset; + if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { + DCHECK_EQ(available_scratch_registers, 2u); + available_scratch_registers -= 1u; + DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke)); + next_temp = expected_index == 1u ? 2u : 1u; // Preserve the class register for static field. + tmp_ptr = srs.AllocateXRegister(); + } + __ Add(tmp_ptr, target.object, target.offset); + + auto get_temp = [&]() { + if (available_scratch_registers != 0u) { + available_scratch_registers -= 1u; + return srs.AllocateXRegister(); + } else { + XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>(); + next_temp += 1u; + return temp; + } + }; + + XRegister shift = kNoXRegister; + XRegister mask = kNoXRegister; + XRegister masked = kNoXRegister; + size_t data_size = DataType::Size(value_type); + if (data_size < 4u) { + // When returning "success" and not the old value, we shall not need the `shift` after + // the raw CAS operation, so use the output register as a temporary here. + shift = return_success ? locations->Out().AsRegister<XRegister>() : get_temp(); + mask = get_temp(); + masked = get_temp(); + __ Andi(shift, tmp_ptr, 3); + __ Andi(tmp_ptr, tmp_ptr, -4); + __ Slli(shift, shift, WhichPowerOf2(kBitsPerByte)); + __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1); + __ Sllw(mask, mask, shift); + } + + // Move floating point values to scratch registers and apply shift, mask and byte swap if needed. + // Note that float/double CAS uses bitwise comparison, rather than the operator==. + XRegister expected_reg = + PrepareXRegister(codegen, expected, value_type, shift, mask, byte_swap, &srs); + XRegister new_value_reg = + PrepareXRegister(codegen, new_value, value_type, shift, mask, byte_swap, &srs); + DataType::Type cas_type = DataType::IsFloatingPointType(value_type) + ? IntTypeForFloatingPointType(value_type) + : (data_size >= 4u ? value_type : DataType::Type::kInt32); + + // Prepare registers for old value and the result of the store conditional. + XRegister old_value; + XRegister store_result; + if (return_success) { + // Use a temp for the old value and the output register for the store conditional result. + old_value = get_temp(); + store_result = out.AsRegister<XRegister>(); + } else if (DataType::IsFloatingPointType(value_type)) { + // We need two temporary registers. + old_value = get_temp(); + store_result = get_temp(); + } else { + // Use the output register for the old value and a temp for the store conditional result. + old_value = out.AsRegister<XRegister>(); + store_result = get_temp(); + } + + Riscv64Label exit_loop_label; + Riscv64Label* exit_loop = &exit_loop_label; + Riscv64Label* cmp_failure = &exit_loop_label; + + if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { + // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked + // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register. + XRegister old_value_temp = locations->GetTemp(next_temp).AsRegister<XRegister>(); + ++next_temp; + // If we are returning the old value rather than the success, + // use a scratch register for the store result in the slow path. + XRegister slow_path_store_result = return_success ? store_result : kNoXRegister; + ReadBarrierCasSlowPathRISCV64* rb_slow_path = + new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathRISCV64( + invoke, + order, + strong, + target.object, + target.offset, + expected_reg, + new_value_reg, + old_value, + old_value_temp, + slow_path_store_result, + /*update_old_value=*/ !return_success, + codegen); + codegen->AddSlowPath(rb_slow_path); + exit_loop = rb_slow_path->GetExitLabel(); + cmp_failure = rb_slow_path->GetEntryLabel(); + } + + if (return_success) { + // Pre-populate the result register with failure for the case when the old value + // differs and we do not execute the store conditional. + __ Li(store_result, 0); + } + GenerateCompareAndSet(codegen->GetAssembler(), + cas_type, + order, + strong, + cmp_failure, + tmp_ptr, + new_value_reg, + old_value, + mask, + masked, + store_result, + expected_reg); + if (return_success && strong) { + // Load success value to the result register. + // `GenerateCompareAndSet()` does not emit code to indicate success for a strong CAS. + __ Li(store_result, 1); + } + __ Bind(exit_loop); + + if (return_success) { + // Nothing to do, the result register already contains 1 on success and 0 on failure. + } else if (byte_swap) { + // Do not apply shift in `GenerateReverseBytes()` for small types. + DataType::Type swap_type = data_size < 4u ? DataType::Type::kInt32 : value_type; + // Also handles moving to FP registers. + GenerateReverseBytes(assembler, out, old_value, swap_type); + if (data_size < 4u) { + DCHECK(Location::RegisterLocation(old_value).Equals(out)); + __ Sllw(old_value, old_value, shift); + if (value_type == DataType::Type::kUint16) { + __ Srliw(old_value, old_value, 16); + } else { + DCHECK_EQ(value_type, DataType::Type::kInt16); + __ Sraiw(old_value, old_value, 16); + } + } + } else if (DataType::IsFloatingPointType(value_type)) { + codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type); + } else if (data_size < 4u) { + __ Srl(old_value, masked, shift); + if (value_type == DataType::Type::kInt8) { + __ SextB(old_value, old_value); + } else if (value_type == DataType::Type::kInt16) { + __ SextH(old_value, old_value); + } + } + + if (slow_path != nullptr) { + DCHECK(!byte_swap); + __ Bind(slow_path->GetExitLabel()); + } + + // Check that we have allocated the right number of temps. We may need more registers + // for byte swapped CAS in the slow path, so skip this check for the main path in that case. + bool has_byte_swap = + (expected_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u); + if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) { + // We allocate a temporary register for the class object for a static field `VarHandle` but + // we do not update the `next_temp` if it's otherwise unused after the address calculation. + CHECK_EQ(expected_index, 1u); + CHECK_EQ(next_temp, 1u); + CHECK_EQ(locations->GetTempCount(), 2u); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false); } static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 7ee076f442..20099ebbc2 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -79,7 +79,7 @@ class Location : public ValueObject { kUnallocated = 11, }; - Location() : ValueObject(), value_(kInvalid) { + constexpr Location() : ValueObject(), value_(kInvalid) { // Verify that non-constant location kinds do not interfere with kConstant. static_assert((kInvalid & kLocationConstantMask) != kConstant, "TagError"); static_assert((kUnallocated & kLocationConstantMask) != kConstant, "TagError"); @@ -95,7 +95,7 @@ class Location : public ValueObject { DCHECK(!IsValid()); } - Location(const Location& other) = default; + constexpr Location(const Location& other) = default; Location& operator=(const Location& other) = default; @@ -126,24 +126,24 @@ class Location : public ValueObject { } // Empty location. Used if there the location should be ignored. - static Location NoLocation() { + static constexpr Location NoLocation() { return Location(); } // Register locations. - static Location RegisterLocation(int reg) { + static constexpr Location RegisterLocation(int reg) { return Location(kRegister, reg); } - static Location FpuRegisterLocation(int reg) { + static constexpr Location FpuRegisterLocation(int reg) { return Location(kFpuRegister, reg); } - static Location RegisterPairLocation(int low, int high) { + static constexpr Location RegisterPairLocation(int low, int high) { return Location(kRegisterPair, low << 16 | high); } - static Location FpuRegisterPairLocation(int low, int high) { + static constexpr Location FpuRegisterPairLocation(int low, int high) { return Location(kFpuRegisterPair, low << 16 | high); } @@ -423,7 +423,7 @@ class Location : public ValueObject { explicit Location(uintptr_t value) : value_(value) {} - Location(Kind kind, uintptr_t payload) + constexpr Location(Kind kind, uintptr_t payload) : value_(KindField::Encode(kind) | PayloadField::Encode(payload)) {} uintptr_t GetPayload() const { |