summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/optimizing/code_generator_riscv64.cc146
-rw-r--r--compiler/optimizing/code_generator_riscv64.h1
-rw-r--r--compiler/optimizing/optimizing_compiler.cc2
-rw-r--r--runtime/arch/riscv64/quick_entrypoints_riscv64.S15
-rw-r--r--runtime/trace.cc2
5 files changed, 150 insertions, 16 deletions
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc
index c77d069478..b9d589b576 100644
--- a/compiler/optimizing/code_generator_riscv64.cc
+++ b/compiler/optimizing/code_generator_riscv64.cc
@@ -32,6 +32,7 @@
#include "mirror/class-inl.h"
#include "optimizing/nodes.h"
#include "stack_map_stream.h"
+#include "trace.h"
#include "utils/label.h"
#include "utils/riscv64/assembler_riscv64.h"
#include "utils/stack_checks.h"
@@ -136,6 +137,18 @@ static constexpr int64_t ShiftedSignExtendedClassStatusValue() {
return static_cast<int64_t>(kShiftedStatusValue) - (INT64_C(1) << 32);
}
+// Split a 64-bit address used by JIT to the nearest 4KiB-aligned base address and a 12-bit
+// signed offset. It is usually cheaper to materialize the aligned address than the full address.
+std::pair<uint64_t, int32_t> SplitJitAddress(uint64_t address) {
+ uint64_t bits0_11 = address & UINT64_C(0xfff);
+ uint64_t bit11 = address & UINT64_C(0x800);
+ // Round the address to nearest 4KiB address because the `imm12` has range [-0x800, 0x800).
+ uint64_t base_address = (address & ~UINT64_C(0xfff)) + (bit11 << 1);
+ int32_t imm12 = dchecked_integral_cast<int32_t>(bits0_11) -
+ dchecked_integral_cast<int32_t>(bit11 << 1);
+ return {base_address, imm12};
+}
+
int32_t ReadBarrierMarkEntrypointOffset(Location ref) {
DCHECK(ref.IsRegister());
int reg = ref.reg();
@@ -501,6 +514,34 @@ class ReadBarrierForRootSlowPathRISCV64 : public SlowPathCodeRISCV64 {
DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathRISCV64);
};
+class MethodEntryExitHooksSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit MethodEntryExitHooksSlowPathRISCV64(HInstruction* instruction)
+ : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ QuickEntrypointEnum entry_point =
+ (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ __ Li(A4, riscv64_codegen->GetFrameSize());
+ }
+ riscv64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
+ RestoreLiveRegisters(codegen, locations);
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override {
+ return "MethodEntryExitHooksSlowPathRISCV";
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathRISCV64);
+};
+
class ArraySetSlowPathRISCV64 : public SlowPathCodeRISCV64 {
public:
explicit ArraySetSlowPathRISCV64(HInstruction* instruction) : SlowPathCodeRISCV64(instruction) {}
@@ -2462,6 +2503,79 @@ void InstructionCodeGeneratorRISCV64::HandleFieldGet(HInstruction* instruction,
}
}
+void InstructionCodeGeneratorRISCV64::GenerateMethodEntryExitHook(HInstruction* instruction) {
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+
+ ScratchRegisterScope temps(GetAssembler());
+ XRegister tmp = temps.AllocateXRegister();
+
+ if (instruction->IsMethodExitHook()) {
+ // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
+ // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
+ // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
+ // disabled in debuggable runtime. The other bit is used when this method itself requires a
+ // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
+ __ Loadwu(tmp, SP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+ __ Bnez(tmp, slow_path->GetEntryLabel());
+ }
+
+ uint64_t hook_offset = instruction->IsMethodExitHook() ?
+ instrumentation::Instrumentation::HaveMethodExitListenersOffset().SizeValue() :
+ instrumentation::Instrumentation::HaveMethodEntryListenersOffset().SizeValue();
+ auto [base_hook_address, hook_imm12] = SplitJitAddress(
+ reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()) + hook_offset);
+ __ LoadConst64(tmp, base_hook_address);
+ __ Lbu(tmp, tmp, hook_imm12);
+ // Check if there are any method entry / exit listeners. If no, continue.
+ __ Beqz(tmp, slow_path->GetExitLabel());
+ // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
+ // If yes, just take the slow path.
+ static_assert(instrumentation::Instrumentation::kFastTraceListeners == 1u);
+ __ Addi(tmp, tmp, -1);
+ __ Bnez(tmp, slow_path->GetEntryLabel());
+
+ // Check if there is place in the buffer to store a new entry, if no, take the slow path.
+ int32_t trace_buffer_index_offset =
+ Thread::TraceBufferIndexOffset<kArm64PointerSize>().Int32Value();
+ __ Loadd(tmp, TR, trace_buffer_index_offset);
+ __ Addi(tmp, tmp, -dchecked_integral_cast<int32_t>(kNumEntriesForWallClock));
+ __ Bltz(tmp, slow_path->GetEntryLabel());
+
+ // Update the index in the `Thread`.
+ __ Stored(tmp, TR, trace_buffer_index_offset);
+
+ // Allocate second core scratch register. We can no longer use `Stored()`
+ // and similar macro instructions because there is no core scratch register left.
+ XRegister tmp2 = temps.AllocateXRegister();
+
+ // Calculate the entry address in the buffer.
+ // /*addr*/ tmp = TR->GetMethodTraceBuffer() + sizeof(void*) * /*index*/ tmp;
+ __ Loadd(tmp2, TR, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue());
+ __ Sh3Add(tmp, tmp, tmp2);
+
+ // Record method pointer and trace action.
+ __ Ld(tmp2, SP, 0);
+ // Use last two bits to encode trace method action. For MethodEntry it is 0
+ // so no need to set the bits since they are 0 already.
+ DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+ if (instruction->IsMethodExitHook()) {
+ __ Ori(tmp2, tmp2, enum_cast<int32_t>(TraceAction::kTraceMethodExit));
+ }
+ static_assert(IsInt<12>(kMethodOffsetInBytes)); // No free scratch register for `Stored()`.
+ __ Sd(tmp2, tmp, kMethodOffsetInBytes);
+
+ // Record the timestamp.
+ __ RdTime(tmp2);
+ static_assert(IsInt<12>(kTimestampOffsetInBytes)); // No free scratch register for `Stored()`.
+ __ Sd(tmp2, tmp, kTimestampOffsetInBytes);
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
void LocationsBuilderRISCV64::VisitAbove(HAbove* instruction) {
HandleCondition(instruction);
}
@@ -4250,23 +4364,26 @@ void InstructionCodeGeneratorRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruc
}
void LocationsBuilderRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
- UNUSED(instruction);
- LOG(FATAL) << "Unimplemented";
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
}
void InstructionCodeGeneratorRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
- UNUSED(instruction);
- LOG(FATAL) << "Unimplemented";
+ DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+ DCHECK(codegen_->RequiresCurrentMethod());
+ GenerateMethodEntryExitHook(instruction);
}
void LocationsBuilderRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) {
- UNUSED(instruction);
- LOG(FATAL) << "Unimplemented";
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ DataType::Type return_type = instruction->InputAt(0)->GetType();
+ locations->SetInAt(0, Riscv64ReturnLocation(return_type));
}
void InstructionCodeGeneratorRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) {
- UNUSED(instruction);
- LOG(FATAL) << "Unimplemented";
+ DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+ DCHECK(codegen_->RequiresCurrentMethod());
+ GenerateMethodEntryExitHook(instruction);
}
void LocationsBuilderRISCV64::VisitMin(HMin* instruction) {
@@ -5406,18 +5523,17 @@ void CodeGeneratorRISCV64::MaybeIncrementHotness(bool is_frame_entry) {
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
DCHECK(!HasEmptyFrame());
- uint64_t address = reinterpret_cast64<uint64_t>(info);
+ uint64_t address = reinterpret_cast64<uint64_t>(info) +
+ ProfilingInfo::BaselineHotnessCountOffset().SizeValue();
+ auto [base_address, imm12] = SplitJitAddress(address);
ScratchRegisterScope srs(GetAssembler());
XRegister tmp = srs.AllocateXRegister();
- __ LoadConst64(tmp, address);
+ __ LoadConst64(tmp, base_address);
XRegister counter = srs.AllocateXRegister();
- __ Loadhu(counter, tmp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value());
+ __ Lhu(counter, tmp, imm12);
__ Beqz(counter, slow_path->GetEntryLabel()); // Can clobber `TMP` if taken.
__ Addi(counter, counter, -1);
- // We do not have another scratch register available for `Storeh`()`,
- // so we must use the `Sh()` function directly.
- static_assert(IsInt<12>(ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
- __ Sh(counter, tmp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value());
+ __ Sh(counter, tmp, imm12);
__ Bind(slow_path->GetExitLabel());
}
}
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h
index 375cec957f..233bddcd57 100644
--- a/compiler/optimizing/code_generator_riscv64.h
+++ b/compiler/optimizing/code_generator_riscv64.h
@@ -481,6 +481,7 @@ class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator {
DataType::Type type,
LocationSummary* locations,
Riscv64Label* label = nullptr);
+ void GenerateMethodEntryExitHook(HInstruction* instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
void GenPackedSwitchWithCompares(XRegister adjusted,
XRegister temp,
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 325fb87fc5..fbab8659fe 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -741,6 +741,8 @@ static bool CanAssembleGraphForRiscv64(HGraph* graph) {
// and this check is done before register allocation.
LOG(FATAL) << "Unexpected ParallelMove before register allocation!";
UNREACHABLE();
+ case HInstruction::kMethodEntryHook:
+ case HInstruction::kMethodExitHook:
case HInstruction::kExit:
case HInstruction::kGoto:
case HInstruction::kPackedSwitch:
diff --git a/runtime/arch/riscv64/quick_entrypoints_riscv64.S b/runtime/arch/riscv64/quick_entrypoints_riscv64.S
index 2a67d0d578..46d4c852bb 100644
--- a/runtime/arch/riscv64/quick_entrypoints_riscv64.S
+++ b/runtime/arch/riscv64/quick_entrypoints_riscv64.S
@@ -417,6 +417,20 @@ ENTRY art_quick_to_interpreter_bridge
END art_quick_to_interpreter_bridge
+ .extern artMethodEntryHook
+ENTRY art_quick_method_entry_hook
+ SETUP_SAVE_EVERYTHING_FRAME
+
+ ld a0, FRAME_SIZE_SAVE_EVERYTHING(sp) // Pass ArtMethod*.
+ mv a1, xSELF // Pass Thread::Current().
+ mv a2, sp // pass SP
+ call artMethodEntryHook // (ArtMethod*, Thread*, SP)
+
+ RESTORE_SAVE_EVERYTHING_FRAME
+ ret
+END art_quick_method_entry_hook
+
+
.extern artMethodExitHook
ENTRY art_quick_method_exit_hook
SETUP_SAVE_EVERYTHING_FRAME
@@ -1423,7 +1437,6 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg31, t6
UNDEFINED art_quick_deoptimize_from_compiled_code
UNDEFINED art_quick_string_builder_append
-UNDEFINED art_quick_method_entry_hook
UNDEFINED art_quick_check_instance_of
UNDEFINED art_quick_alloc_array_resolved_dlmalloc
diff --git a/runtime/trace.cc b/runtime/trace.cc
index c70d7d8e54..8ec82c7db3 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -107,6 +107,8 @@ uint64_t GetTimestamp() {
unsigned int lo, hi;
asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
t = (static_cast<uint64_t>(hi) << 32) | lo;
+#elif defined(__riscv)
+ asm volatile("rdtime %0" : "=r"(t));
#else
t = MicroTime();
#endif