diff options
| author | 2016-02-24 14:24:24 -0800 | |
|---|---|---|
| committer | 2016-02-25 05:47:09 -0800 | |
| commit | f1dcaccfac4a9e022ea49752a774552b2f791489 (patch) | |
| tree | 2e11cbb0bead0b24dd527f307a58a0588e707901 /runtime/interpreter/interpreter_switch_impl.cc | |
| parent | db11e7ecc9c902ffde6336c261fe236299a81944 (diff) | |
ART: Profile all branches for on-stack replacement
Change the switch, goto and mterp interpreters to profile
not-taken as well as taken branches. This allows for on-stack
replacement when the cfg has been rearranged such that the loop
header was originally the fallthrough of a Dalvik byte-code branch.
Note that this increases the already-heavy cost of branch profiling.
Measuring on a Nexus 6 using a very branchy benchmark (logic subtest
from Caffeinemark), we see:
No profiling Taken only Taken & not-taken
mterp 9728 3434 2384
C++ goto 3914 2422 2037
C++ switch 2986 2411 2112
As measured, the cost of branch profiling is dominating execution
time. This will be addressed in follow-up CLs.
Change-Id: Ibc858f317398dd991ed8e4f3c3d72bd4c9a60594
Diffstat (limited to 'runtime/interpreter/interpreter_switch_impl.cc')
| -rw-r--r-- | runtime/interpreter/interpreter_switch_impl.cc | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index 25dbab2494..0488dbf028 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -712,6 +712,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; @@ -727,6 +728,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; @@ -742,6 +744,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; @@ -757,6 +760,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; @@ -772,6 +776,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; @@ -787,6 +792,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; @@ -801,6 +807,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; @@ -815,6 +822,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; @@ -829,6 +837,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; @@ -843,6 +852,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; @@ -857,6 +867,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; @@ -871,6 +882,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } inst = inst->RelativeAt(offset); } else { + BRANCH_INSTRUMENTATION(2); inst = inst->Next_2xx(); } break; |