Assembly TLAB allocation fast path for arm.
Speedup (GSS GC with TLAB on N5):
BinaryTrees: 1872 -> 796 ms (-57%)
MemAllocTest: 2522 -> 2219 ms (-12%)
Bug: 9986565
Change-Id: Icb9d1259461f3abe83a4a82c8aff911937eaf57d
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index eb3b7f3..879364e 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -121,32 +121,32 @@
ADD_TEST_EQ(THREAD_SELF_OFFSET,
art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
+// Offset of field Thread::tlsPtr_.thread_local_objects.
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 168 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
+ art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
// Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 169 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_LOCAL_OBJECTS_OFFSET + 2 * __SIZEOF_POINTER__)
ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
// Offset of field Thread::tlsPtr_.thread_local_end.
#define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
art::Thread::ThreadLocalEndOffset<__SIZEOF_POINTER__>().Int32Value())
-// Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
-ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
- art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
// Offset of field Thread::tlsPtr_.mterp_current_ibase.
-#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__)
+#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET,
art::Thread::MterpCurrentIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
// Offset of field Thread::tlsPtr_.mterp_default_ibase.
-#define THREAD_DEFAULT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 4 * __SIZEOF_POINTER__)
+#define THREAD_DEFAULT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__)
ADD_TEST_EQ(THREAD_DEFAULT_IBASE_OFFSET,
art::Thread::MterpDefaultIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
// Offset of field Thread::tlsPtr_.mterp_alt_ibase.
-#define THREAD_ALT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 5 * __SIZEOF_POINTER__)
+#define THREAD_ALT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 4 * __SIZEOF_POINTER__)
ADD_TEST_EQ(THREAD_ALT_IBASE_OFFSET,
art::Thread::MterpAltIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
// Offset of field Thread::tlsPtr_.rosalloc_runs.
-#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 6 * __SIZEOF_POINTER__)
+#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 5 * __SIZEOF_POINTER__)
ADD_TEST_EQ(THREAD_ROSALLOC_RUNS_OFFSET,
art::Thread::RosAllocRunsOffset<__SIZEOF_POINTER__>().Int32Value())
// Offset of field Thread::tlsPtr_.thread_local_alloc_stack_top.