Move some fields in art::Thread to reduce maintenance burden.

Move fields `thread_local_start`, `thread_local_pos`,
`thread_local_end` and `thread_local_objects` before fields
`jni_entrypoints` and `quick_entrypoints` within
art::Thread, to avoid repetitive art::Thread field moves in
future CLs caused by the addition or deletion of entry
points.

Test: m test-art-host
test: m test-art-target (on ARM)
Change-Id: Ib67842e44a7f21a871ca4d1bb95dc6f7cfedc829
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index ab4f9e9..5e55210 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -5610,7 +5610,7 @@
   " 214:	ecbd 8a10 	vpop	{s16-s31}\n",
   " 218:	e8bd 8de0 	ldmia.w	sp!, {r5, r6, r7, r8, sl, fp, pc}\n",
   " 21c:	4660      	mov	r0, ip\n",
-  " 21e:	f8d9 c2b0 	ldr.w	ip, [r9, #688]	; 0x2b0\n",
+  " 21e:	f8d9 c2c0 	ldr.w	ip, [r9, #704]	; 0x2c0\n",
   " 222:	47e0      	blx	ip\n",
   nullptr
 };
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index e4972da..4b15a22 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -90,7 +90,7 @@
             art::Thread::SelfOffset<POINTER_SIZE>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 198 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 34 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
@@ -98,11 +98,13 @@
 ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
             art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + 2 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value())
+
 // Offset of field Thread::tlsPtr_.mterp_current_ibase.
-#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__)
+#define THREAD_CURRENT_IBASE_OFFSET \
+    (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__ + (1 + 164) * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET,
             art::Thread::MterpCurrentIBaseOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_default_ibase.
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 96e17da..6301f93 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -117,15 +117,14 @@
                         sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_function, active_suspend_barriers,
                         sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspend_barriers, jni_entrypoints,
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspend_barriers, thread_local_start,
                         sizeof(Thread::tls_ptr_sized_values::active_suspend_barriers));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, jni_entrypoints, sizeof(size_t));
 
     // Skip across the entrypoints structures.
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_start, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*));
-
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(size_t));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_alt_ibase, rosalloc_runs, sizeof(void*));
diff --git a/runtime/oat.h b/runtime/oat.h
index f2d457c..ab03252 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '9', '7', '\0' };  // HLoadClass/kBssEntry change
+  static constexpr uint8_t kOatVersion[] = { '0', '9', '8', '\0' };  // art::Thread fields reorder
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/thread.h b/runtime/thread.h
index 3958c10..2b451bc 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1426,7 +1426,7 @@
       stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), checkpoint_function(nullptr),
-      thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_start(nullptr),
+      thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr),
       thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr),
       mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr),
       thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr),
@@ -1545,21 +1545,21 @@
     // to avoid additional cost of a mutex and a condition variable, as used in art::Barrier.
     AtomicInteger* active_suspend_barriers[kMaxSuspendBarriers];
 
-    // Entrypoint function pointers.
-    // TODO: move this to more of a global offset table model to avoid per-thread duplication.
-    JniEntryPoints jni_entrypoints;
-    QuickEntryPoints quick_entrypoints;
+    // Thread-local allocation pointer. Moved here to force alignment for thread_local_pos on ARM.
+    uint8_t* thread_local_start;
 
     // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
     // potentially better performance.
     uint8_t* thread_local_pos;
     uint8_t* thread_local_end;
 
-    // Thread-local allocation pointer.
-    uint8_t* thread_local_start;
-
     size_t thread_local_objects;
 
+    // Entrypoint function pointers.
+    // TODO: move this to more of a global offset table model to avoid per-thread duplication.
+    JniEntryPoints jni_entrypoints;
+    QuickEntryPoints quick_entrypoints;
+
     // Mterp jump table bases.
     void* mterp_current_ibase;
     void* mterp_default_ibase;