summaryrefslogtreecommitdiff
path: root/runtime/interpreter/interpreter_switch_impl.cc
diff options
context:
space:
mode:
author buzbee <buzbee@google.com> 2016-03-01 15:03:16 -0800
committer buzbee <buzbee@google.com> 2016-03-23 11:11:26 -0700
commitc1d6b341eed646e5adafc6c4fd4e3748f0292368 (patch)
treeddee928954b03a919ce601f12876f1fe161d577a /runtime/interpreter/interpreter_switch_impl.cc
parenteecf60d51b481647c8508f22b3d6ce437773ea0c (diff)
ART: Improve JitProfile perf in arm/arm64 mterp
ART currently requires two profiling-related things from the interpreters: hotness updates and OSR switch checks. The hotness updates previously used the existing instrumentation framework - which is flexible, but quite heavyweight. For most things, the instrumentation framework overhead is acceptable, but because we do a hotness update on every backwards branch the overhead is unacceptable. Prior to this CL, branch profiling dominates interpreter cost. Here, we bypass the instrumentation framework for hotness updates and deliver a significant performance improvement. Running interpreter-only (dalvikvm -Xint) on a Nexus 6, we see the logic subtest of Caffeinemark improving from 2600 to 9200, and the overall score going from 1979 to over 3000. Compared to the C++ switch interpreter, we see a 6x improvement on the branchy logic subtest and a 2.6x improvement overall. Compared with the previous mterp which did not have support for jit profiling, we see a few (1% to 5%) performance loss on the standard command-line benchmarks. I consider this acceptable (we could create an alternate non-profiling mterp which would have no penalty, but I don't consider this overhead big enough to justify that). Change-Id: I50b5b8c5ed8ebda3c8b4e65d27ba7393c3feae04
Diffstat (limited to 'runtime/interpreter/interpreter_switch_impl.cc')
-rw-r--r--runtime/interpreter/interpreter_switch_impl.cc32
1 files changed, 31 insertions, 1 deletions
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 0488dbf028..f9941d20aa 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -18,6 +18,7 @@
#include "experimental_flags.h"
#include "interpreter_common.h"
#include "jit/jit.h"
+#include "jit/jit_instrumentation.h"
#include "safe_math.h"
#include <memory> // std::unique_ptr
@@ -72,7 +73,6 @@ namespace interpreter {
#define BRANCH_INSTRUMENTATION(offset) \
do { \
- ArtMethod* method = shadow_frame.GetMethod(); \
instrumentation->Branch(self, method, dex_pc, offset); \
JValue result; \
if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) { \
@@ -80,6 +80,13 @@ namespace interpreter {
} \
} while (false)
+#define HOTNESS_UPDATE() \
+ do { \
+ if (jit_instrumentation_cache != nullptr) { \
+ jit_instrumentation_cache->AddSamples(self, method, 1); \
+ } \
+ } while (false)
+
static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
DCHECK(inst->IsExperimental());
return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
@@ -101,6 +108,12 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
const uint16_t* const insns = code_item->insns_;
const Instruction* inst = Instruction::At(insns + dex_pc);
uint16_t inst_data;
+ ArtMethod* method = shadow_frame.GetMethod();
+ jit::Jit* jit = Runtime::Current()->GetJit();
+ jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr;
+ if (jit != nullptr) {
+ jit_instrumentation_cache = jit->GetInstrumentationCache();
+ }
// TODO: collapse capture-variable+create-lambda into one opcode, then we won't need
// to keep this live for the scope of the entire function call.
@@ -564,6 +577,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int8_t offset = inst->VRegA_10t(inst_data);
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -574,6 +588,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegA_20t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -584,6 +599,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int32_t offset = inst->VRegA_30t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -594,6 +610,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -604,6 +621,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -708,6 +726,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -724,6 +743,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -740,6 +760,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -756,6 +777,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -772,6 +794,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -788,6 +811,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -803,6 +827,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -818,6 +843,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -833,6 +859,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -848,6 +875,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -863,6 +891,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);
@@ -878,6 +907,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
self->AllowThreadSuspension();
}
inst = inst->RelativeAt(offset);