diff options
| author | 2016-03-01 15:03:16 -0800 | |
|---|---|---|
| committer | 2016-03-23 11:11:26 -0700 | |
| commit | c1d6b341eed646e5adafc6c4fd4e3748f0292368 (patch) | |
| tree | ddee928954b03a919ce601f12876f1fe161d577a /runtime/interpreter/interpreter_switch_impl.cc | |
| parent | eecf60d51b481647c8508f22b3d6ce437773ea0c (diff) | |
ART: Improve JitProfile perf in arm/arm64 mterp
ART currently requires two profiling-related things from the
interpreters: hotness updates and OSR switch checks. The hotness
updates previously used the existing instrumentation framework - which
is flexible, but quite heavyweight. For most things, the
instrumentation framework overhead is acceptable, but because we do a
hotness update on every backwards branch the overhead is unacceptable.
Prior to this CL, branch profiling dominates interpreter cost.
Here, we bypass the instrumentation framework for hotness updates
and deliver a significant performance improvement. Running
interpreter-only (dalvikvm -Xint) on a Nexus 6, we see the logic
subtest of Caffeinemark improving from 2600 to 9200, and the
overall score going from 1979 to over 3000. Compared to the
C++ switch interpreter, we see a 6x improvement on the branchy logic
subtest and a 2.6x improvement overall.
Compared with the previous mterp which did not have support for
jit profiling, we see a few (1% to 5%) performance loss on the
standard command-line benchmarks. I consider this acceptable
(we could create an alternate non-profiling mterp which would
have no penalty, but I don't consider this overhead big enough to
justify that).
Change-Id: I50b5b8c5ed8ebda3c8b4e65d27ba7393c3feae04
Diffstat (limited to 'runtime/interpreter/interpreter_switch_impl.cc')
| -rw-r--r-- | runtime/interpreter/interpreter_switch_impl.cc | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index 0488dbf028..f9941d20aa 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -18,6 +18,7 @@ #include "experimental_flags.h" #include "interpreter_common.h" #include "jit/jit.h" +#include "jit/jit_instrumentation.h" #include "safe_math.h" #include <memory> // std::unique_ptr @@ -72,7 +73,6 @@ namespace interpreter { #define BRANCH_INSTRUMENTATION(offset) \ do { \ - ArtMethod* method = shadow_frame.GetMethod(); \ instrumentation->Branch(self, method, dex_pc, offset); \ JValue result; \ if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) { \ @@ -80,6 +80,13 @@ namespace interpreter { } \ } while (false) +#define HOTNESS_UPDATE() \ + do { \ + if (jit_instrumentation_cache != nullptr) { \ + jit_instrumentation_cache->AddSamples(self, method, 1); \ + } \ + } while (false) + static bool IsExperimentalInstructionEnabled(const Instruction *inst) { DCHECK(inst->IsExperimental()); return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas); @@ -101,6 +108,12 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, const uint16_t* const insns = code_item->insns_; const Instruction* inst = Instruction::At(insns + dex_pc); uint16_t inst_data; + ArtMethod* method = shadow_frame.GetMethod(); + jit::Jit* jit = Runtime::Current()->GetJit(); + jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr; + if (jit != nullptr) { + jit_instrumentation_cache = jit->GetInstrumentationCache(); + } // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need // to keep this live for the scope of the entire function call. @@ -564,6 +577,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int8_t offset = inst->VRegA_10t(inst_data); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -574,6 +588,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegA_20t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -584,6 +599,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int32_t offset = inst->VRegA_30t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -594,6 +610,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -604,6 +621,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -708,6 +726,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -724,6 +743,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -740,6 +760,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -756,6 +777,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -772,6 +794,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -788,6 +811,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegC_22t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -803,6 +827,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -818,6 +843,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -833,6 +859,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -848,6 +875,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -863,6 +891,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -878,6 +907,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, int16_t offset = inst->VRegB_21t(); BRANCH_INSTRUMENTATION(offset); if (IsBackwardBranch(offset)) { + HOTNESS_UPDATE(); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); |