Revert^2 "Add hand written assembly for mterp field reads."

This reverts commit bb502e58d8bea577fe25d82c0c1ca2a763131673.

This speeds up non-quickened code by 3% (measured on golem).

Test: ART_HEAP_POISONING=true test.py -b -r --target -t 998
Change-Id: I7161ea5ba174e8a37fa41fd63ef34fd0ec57fc36
diff --git a/runtime/interpreter/mterp/arm/main.S b/runtime/interpreter/mterp/arm/main.S
index 1d8d6c9..62c38bf 100644
--- a/runtime/interpreter/mterp/arm/main.S
+++ b/runtime/interpreter/mterp/arm/main.S
@@ -259,6 +259,14 @@
     mov     \reg, #0
     str     \reg, [rREFS, \vreg, lsl #2]
 .endm
+.macro SET_VREG_WIDE regLo, regHi, vreg
+    add     ip, rFP, \vreg, lsl #2
+    strd    \regLo, \regHi, [ip]
+    mov     \regLo, #0
+    mov     \regHi, #0
+    add     ip, rREFS, \vreg, lsl #2
+    strd    \regLo, \regHi, [ip]
+.endm
 .macro SET_VREG_OBJECT reg, vreg, tmpreg
     str     \reg, [rFP, \vreg, lsl #2]
     str     \reg, [rREFS, \vreg, lsl #2]
@@ -308,6 +316,13 @@
     .size \name, .-\name
 .endm
 
+// Macro to unpoison (negate) the reference for heap poisoning.
+.macro UNPOISON_HEAP_REF rRef
+#ifdef USE_HEAP_POISONING
+    rsb \rRef, \rRef, #0
+#endif  // USE_HEAP_POISONING
+.endm
+
 %def entry():
 /*
  * Copyright (C) 2016 The Android Open Source Project
diff --git a/runtime/interpreter/mterp/arm/object.S b/runtime/interpreter/mterp/arm/object.S
index af1ece2..13009ea 100644
--- a/runtime/interpreter/mterp/arm/object.S
+++ b/runtime/interpreter/mterp/arm/object.S
@@ -34,29 +34,76 @@
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
-%def op_iget(is_object="0", helper="MterpIGetU32"):
-%  field(helper=helper)
+%def op_iget(is_object=False, is_wide=False, load="ldr", helper="MterpIGetU32"):
+   @ Fast-path which gets the field offset from thread-local cache.
+   add      r0, rSELF, #THREAD_INTERPRETER_CACHE_OFFSET       @ cache address
+   ubfx     r1, rPC, #2, #THREAD_INTERPRETER_CACHE_SIZE_LOG2  @ entry index
+   add      r0, r0, r1, lsl #3            @ entry address within the cache
+   ldrd     r0, r1, [r0]                  @ entry key (pc) and value (offset)
+   mov      r2, rINST, lsr #12            @ B
+   GET_VREG r2, r2                        @ object we're operating on
+   cmp      r0, rPC
+%  slow_path_label = add_helper("slow_path", lambda: field(helper))
+   bne      ${slow_path_label}            @ cache miss
+   cmp      r2, #0
+   beq      common_errNullObject          @ null object
+%  if is_wide:
+     ldrd     r0, r1, [r2, r1]            @ r0,r1 <- obj.field
+%  else:
+     ${load}  r0, [r2, r1]                @ r0 <- obj.field
+%  #endif
+%  if is_object:
+     UNPOISON_HEAP_REF r0
+#if defined(USE_READ_BARRIER)
+# if defined(USE_BAKER_READ_BARRIER)
+     ldr    ip, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
+     cmp    ip, #0
+     bne    .L_${opcode}_mark             @ GC is active
+.L_${opcode}_marked:
+# else
+     bl artReadBarrierMark                @ r0 <- artReadBarrierMark(r0)
+# endif
+#endif
+%  #endif
+   ubfx     r2, rINST, #8, #4             @ A
+   FETCH_ADVANCE_INST 2                   @ advance rPC, load rINST
+%  if is_object:
+     SET_VREG_OBJECT r0, r2               @ fp[A]<- r0
+%  elif is_wide:
+     SET_VREG_WIDE r0, r1, r2             @ fp[A]<- r0, r1
+%  else:
+     SET_VREG r0, r2                      @ fp[A]<- r0
+%  #endif
+   GET_INST_OPCODE ip                     @ extract opcode from rINST
+   GOTO_OPCODE ip                         @ jump to next instruction
+%  if is_object:
+#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
+.L_${opcode}_mark:
+     bl artReadBarrierMark                @ r0 <- artReadBarrierMark(r0)
+     b .L_${opcode}_marked
+#endif
+%  #endif
 
 %def op_iget_boolean():
-%  op_iget(helper="MterpIGetU8")
+%  op_iget(load="ldrb", helper="MterpIGetU8")
 
 %def op_iget_boolean_quick():
 %  op_iget_quick(load="ldrb")
 
 %def op_iget_byte():
-%  op_iget(helper="MterpIGetI8")
+%  op_iget(load="ldrsb", helper="MterpIGetI8")
 
 %def op_iget_byte_quick():
 %  op_iget_quick(load="ldrsb")
 
 %def op_iget_char():
-%  op_iget(helper="MterpIGetU16")
+%  op_iget(load="ldrh", helper="MterpIGetU16")
 
 %def op_iget_char_quick():
 %  op_iget_quick(load="ldrh")
 
 %def op_iget_object():
-%  op_iget(is_object="1", helper="MterpIGetObj")
+%  op_iget(is_object=True, helper="MterpIGetObj")
 
 %def op_iget_object_quick():
     /* For: iget-object-quick */
@@ -92,13 +139,13 @@
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_iget_short():
-%  op_iget(helper="MterpIGetI16")
+%  op_iget(load="ldrsh", helper="MterpIGetI16")
 
 %def op_iget_short_quick():
 %  op_iget_quick(load="ldrsh")
 
 %def op_iget_wide():
-%  op_iget(helper="MterpIGetU64")
+%  op_iget(is_wide=True, helper="MterpIGetU64")
 
 %def op_iget_wide_quick():
     /* iget-wide-quick vA, vB, offset@CCCC */
@@ -141,7 +188,7 @@
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
-%def op_iput(is_object="0", helper="MterpIPutU32"):
+%def op_iput(helper="MterpIPutU32"):
 %  field(helper=helper)
 
 %def op_iput_boolean():
@@ -163,7 +210,7 @@
 %  op_iput_quick(store="strh")
 
 %def op_iput_object():
-%  op_iput(is_object="1", helper="MterpIPutObj")
+%  op_iput(helper="MterpIPutObj")
 
 %def op_iput_object_quick():
     EXPORT_PC
@@ -232,7 +279,7 @@
     GET_INST_OPCODE ip                 @ extract opcode from rINST
     GOTO_OPCODE ip                     @ jump to next instruction
 
-%def op_sget(is_object="0", helper="MterpSGetU32"):
+%def op_sget(helper="MterpSGetU32"):
 %  field(helper=helper)
 
 %def op_sget_boolean():
@@ -245,7 +292,7 @@
 %  op_sget(helper="MterpSGetU16")
 
 %def op_sget_object():
-%  op_sget(is_object="1", helper="MterpSGetObj")
+%  op_sget(helper="MterpSGetObj")
 
 %def op_sget_short():
 %  op_sget(helper="MterpSGetI16")
@@ -253,7 +300,7 @@
 %def op_sget_wide():
 %  op_sget(helper="MterpSGetU64")
 
-%def op_sput(is_object="0", helper="MterpSPutU32"):
+%def op_sput(helper="MterpSPutU32"):
 %  field(helper=helper)
 
 %def op_sput_boolean():
@@ -266,7 +313,7 @@
 %  op_sput(helper="MterpSPutU16")
 
 %def op_sput_object():
-%  op_sput(is_object="1", helper="MterpSPutObj")
+%  op_sput(helper="MterpSPutObj")
 
 %def op_sput_short():
 %  op_sput(helper="MterpSPutI16")
diff --git a/runtime/interpreter/mterp/arm64/main.S b/runtime/interpreter/mterp/arm64/main.S
index 31356ff..f248265 100644
--- a/runtime/interpreter/mterp/arm64/main.S
+++ b/runtime/interpreter/mterp/arm64/main.S
@@ -351,6 +351,13 @@
     .size \name, .-\name
 .endm
 
+// Macro to unpoison (negate) the reference for heap poisoning.
+.macro UNPOISON_HEAP_REF rRef
+#ifdef USE_HEAP_POISONING
+    neg \rRef, \rRef
+#endif  // USE_HEAP_POISONING
+.endm
+
 %def entry():
 /*
  * Copyright (C) 2016 The Android Open Source Project
@@ -438,7 +445,7 @@
 
 %def footer():
     .cfi_endproc
-    END ExecuteMterpImpl
+    END MterpHelpers
 
 %def fallback():
 /* Transfer stub to alternate interpreter */
diff --git a/runtime/interpreter/mterp/arm64/object.S b/runtime/interpreter/mterp/arm64/object.S
index 388aba5..3d44731 100644
--- a/runtime/interpreter/mterp/arm64/object.S
+++ b/runtime/interpreter/mterp/arm64/object.S
@@ -32,29 +32,74 @@
     GET_INST_OPCODE ip                  // extract opcode from rINST
     GOTO_OPCODE ip                      // jump to next instruction
 
-%def op_iget(is_object="0", helper="MterpIGetU32"):
-%  field(helper=helper)
+%def op_iget(is_object=False, is_wide=False, load="ldr", helper="MterpIGetU32"):
+   // Fast-path which gets the field offset from thread-local cache.
+   add      x0, xSELF, #THREAD_INTERPRETER_CACHE_OFFSET       // cache address
+   ubfx     x1, xPC, #2, #THREAD_INTERPRETER_CACHE_SIZE_LOG2  // entry index
+   add      x0, x0, x1, lsl #4            // entry address within the cache
+   ldp      x0, x1, [x0]                  // entry key (pc) and value (offset)
+   lsr      w2, wINST, #12                // B
+   GET_VREG w2, w2                        // object we're operating on
+   cmp      x0, xPC
+%  slow_path_label = add_helper("slow_path", lambda: field(helper))
+   b.ne     ${slow_path_label}            // cache miss
+   cbz      w2, common_errNullObject      // null object
+%  if is_wide:
+     ldr      x0, [x2, x1]                // x0<- obj.field
+%  else:
+     ${load}  w0, [x2, x1]                // w0<- obj.field
+%  #endif
+%  if is_object:
+     UNPOISON_HEAP_REF w0
+#if defined(USE_READ_BARRIER)
+# if defined(USE_BAKER_READ_BARRIER)
+     ldr    w1, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+     cbnz   w1, .L_${opcode}_mark         // GC is active.
+.L_${opcode}_marked:
+# else
+     bl artReadBarrierMark                // x0 <- artReadBarrierMark(x0)
+# endif
+#endif
+%  #endif
+   ubfx     w2, wINST, #8, #4             // w2<- A
+   FETCH_ADVANCE_INST 2                   // advance rPC, load rINST
+%  if is_object:
+     SET_VREG_OBJECT w0, w2               // fp[A]<- w0
+%  elif is_wide:
+     SET_VREG_WIDE x0, w2                 // fp[A]<- x0
+%  else:
+     SET_VREG w0, w2                      // fp[A]<- w0
+%  #endif
+   GET_INST_OPCODE ip                     // extract opcode from rINST
+   GOTO_OPCODE ip                         // jump to next instruction
+%  if is_object:
+#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
+.L_${opcode}_mark:
+     bl artReadBarrierMark                // x0 <- artReadBarrierMark(x0)
+     b .L_${opcode}_marked
+#endif
+%  #endif
 
 %def op_iget_boolean():
-%  op_iget(helper="MterpIGetU8")
+%  op_iget(load="ldrb", helper="MterpIGetU8")
 
 %def op_iget_boolean_quick():
 %  op_iget_quick(load="ldrb")
 
 %def op_iget_byte():
-%  op_iget(helper="MterpIGetI8")
+%  op_iget(load="ldrsb", helper="MterpIGetI8")
 
 %def op_iget_byte_quick():
 %  op_iget_quick(load="ldrsb")
 
 %def op_iget_char():
-%  op_iget(helper="MterpIGetU16")
+%  op_iget(load="ldrh", helper="MterpIGetU16")
 
 %def op_iget_char_quick():
 %  op_iget_quick(load="ldrh")
 
 %def op_iget_object():
-%  op_iget(is_object="1", helper="MterpIGetObj")
+%  op_iget(is_object=True, helper="MterpIGetObj")
 
 %def op_iget_object_quick():
     /* For: iget-object-quick */
@@ -89,13 +134,13 @@
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def op_iget_short():
-%  op_iget(helper="MterpIGetI16")
+%  op_iget(load="ldrsh", helper="MterpIGetI16")
 
 %def op_iget_short_quick():
 %  op_iget_quick(load="ldrsh")
 
 %def op_iget_wide():
-%  op_iget(helper="MterpIGetU64")
+%  op_iget(is_wide=True, helper="MterpIGetU64")
 
 %def op_iget_wide_quick():
     /* iget-wide-quick vA, vB, offset//CCCC */
@@ -134,7 +179,7 @@
     GET_INST_OPCODE ip                  // extract opcode from rINST
     GOTO_OPCODE ip                      // jump to next instruction
 
-%def op_iput(is_object="0", helper="MterpIPutU32"):
+%def op_iput(helper="MterpIPutU32"):
 %  field(helper=helper)
 
 %def op_iput_boolean():
@@ -156,7 +201,7 @@
 %  op_iput_quick(store="strh")
 
 %def op_iput_object():
-%  op_iput(is_object="1", helper="MterpIPutObj")
+%  op_iput(helper="MterpIPutObj")
 
 %def op_iput_object_quick():
     EXPORT_PC
@@ -220,7 +265,7 @@
     GET_INST_OPCODE ip                 // extract opcode from rINST
     GOTO_OPCODE ip                     // jump to next instruction
 
-%def op_sget(is_object="0", helper="MterpSGetU32"):
+%def op_sget(helper="MterpSGetU32"):
 %  field(helper=helper)
 
 %def op_sget_boolean():
@@ -233,7 +278,7 @@
 %  op_sget(helper="MterpSGetU16")
 
 %def op_sget_object():
-%  op_sget(is_object="1", helper="MterpSGetObj")
+%  op_sget(helper="MterpSGetObj")
 
 %def op_sget_short():
 %  op_sget(helper="MterpSGetI16")
@@ -241,7 +286,7 @@
 %def op_sget_wide():
 %  op_sget(helper="MterpSGetU64")
 
-%def op_sput(is_object="0", helper="MterpSPutU32"):
+%def op_sput(helper="MterpSPutU32"):
 %  field(helper=helper)
 
 %def op_sput_boolean():
@@ -254,7 +299,7 @@
 %  op_sput(helper="MterpSPutU16")
 
 %def op_sput_object():
-%  op_sput(is_object="1", helper="MterpSPutObj")
+%  op_sput(helper="MterpSPutObj")
 
 %def op_sput_short():
 %  op_sput(helper="MterpSPutI16")