Reduced memory usage of primitive fields smaller than 4-bytes

Reduced memory used by byte and boolean fields from 4 bytes down to a
single byte and shorts and chars down to two bytes. Fields are now
arranged as Reference followed by decreasing component sizes, with
fields shuffled forward as needed.

Bug: 8135266
Change-Id: I65eaf31ed27e5bd5ba0c7d4606454b720b074752
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index b9f9437..f9a05c2 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -121,15 +121,25 @@
         break;
 
       case Instruction::IPUT:
-      case Instruction::IPUT_BOOLEAN:
-      case Instruction::IPUT_BYTE:
-      case Instruction::IPUT_CHAR:
-      case Instruction::IPUT_SHORT:
-        // These opcodes have the same implementation in interpreter so group
-        // them under IPUT_QUICK.
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_QUICK, true);
         break;
 
+      case Instruction::IPUT_BOOLEAN:
+        CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BOOLEAN_QUICK, true);
+        break;
+
+      case Instruction::IPUT_BYTE:
+        CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BYTE_QUICK, true);
+        break;
+
+      case Instruction::IPUT_CHAR:
+        CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_CHAR_QUICK, true);
+        break;
+
+      case Instruction::IPUT_SHORT:
+        CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_SHORT_QUICK, true);
+        break;
+
       case Instruction::IPUT_WIDE:
         CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_WIDE_QUICK, true);
         break;
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 963a586..a170614 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -1944,6 +1944,10 @@
     case Instruction::IPUT_SHORT:
     case Instruction::IPUT_QUICK:
     case Instruction::IPUT_OBJECT_QUICK:
+    case Instruction::IPUT_BOOLEAN_QUICK:
+    case Instruction::IPUT_BYTE_QUICK:
+    case Instruction::IPUT_CHAR_QUICK:
+    case Instruction::IPUT_SHORT_QUICK:
     case Instruction::APUT:
     case Instruction::APUT_OBJECT:
     case Instruction::APUT_BOOLEAN:
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index e4a895e..1777e98 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -1704,13 +1704,13 @@
 }
 
 bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
-  ArmOpcode wide = (size == k64) ? WIDE(0) : UNWIDE(0);
+  ArmOpcode wide = IsWide(size) ? WIDE(0) : UNWIDE(0);
   RegLocation rl_src_i = info->args[0];
-  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
+  RegLocation rl_dest = IsWide(size) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
+  RegLocation rl_i = IsWide(size) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
   NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg());
-  (size == k64) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
+  IsWide(size) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
   return true;
 }
 
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 3f22913..b2af298 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -524,11 +524,9 @@
   const RegStorage r_base_;
 };
 
-void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double,
-                      bool is_object) {
+void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) {
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedStaticField(field_info.FastPut(), field_info.IsReferrersClass());
-  OpSize store_size = LoadStoreOpSize(is_long_or_double, is_object);
   if (!SLOW_FIELD_PATH && field_info.FastPut()) {
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     RegStorage r_base;
@@ -587,37 +585,59 @@
       FreeTemp(r_method);
     }
     // rBase now holds static storage base
-    RegisterClass reg_class = RegClassForFieldLoadStore(store_size, field_info.IsVolatile());
-    if (is_long_or_double) {
+    RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
+    if (IsWide(size)) {
       rl_src = LoadValueWide(rl_src, reg_class);
     } else {
       rl_src = LoadValue(rl_src, reg_class);
     }
-    if (is_object) {
+    if (IsRef(size)) {
       StoreRefDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg,
                    field_info.IsVolatile() ? kVolatile : kNotVolatile);
     } else {
-      StoreBaseDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg, store_size,
+      StoreBaseDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg, size,
                     field_info.IsVolatile() ? kVolatile : kNotVolatile);
     }
-    if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) {
+    if (IsRef(size) && !mir_graph_->IsConstantNullRef(rl_src)) {
       MarkGCCard(rl_src.reg, r_base);
     }
     FreeTemp(r_base);
   } else {
     FlushAllRegs();  // Everything to home locations
-    QuickEntrypointEnum target =
-        is_long_or_double ? kQuickSet64Static
-            : (is_object ? kQuickSetObjStatic : kQuickSet32Static);
+    QuickEntrypointEnum target;
+    switch (size) {
+      case kReference:
+        target = kQuickSetObjStatic;
+        break;
+      case k64:
+      case kDouble:
+        target = kQuickSet64Static;
+        break;
+      case k32:
+      case kSingle:
+        target = kQuickSet32Static;
+        break;
+      case kSignedHalf:
+      case kUnsignedHalf:
+        target = kQuickSet16Static;
+        break;
+      case kSignedByte:
+      case kUnsignedByte:
+        target = kQuickSet8Static;
+        break;
+      case kWord:  // Intentional fallthrough.
+      default:
+        LOG(FATAL) << "Can't determine entrypoint for: " << size;
+        target = kQuickSet32Static;
+    }
     CallRuntimeHelperImmRegLocation(target, field_info.FieldIndex(), rl_src, true);
   }
 }
 
-void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest,
-                      bool is_long_or_double, bool is_object) {
+void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Type type) {
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedStaticField(field_info.FastGet(), field_info.IsReferrersClass());
-  OpSize load_size = LoadStoreOpSize(is_long_or_double, is_object);
+
   if (!SLOW_FIELD_PATH && field_info.FastGet()) {
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     RegStorage r_base;
@@ -668,33 +688,62 @@
       FreeTemp(r_method);
     }
     // r_base now holds static storage base
-    RegisterClass reg_class = RegClassForFieldLoadStore(load_size, field_info.IsVolatile());
+    RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
     RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
 
     int field_offset = field_info.FieldOffset().Int32Value();
-    if (is_object) {
+    if (IsRef(size)) {
+      // TODO: DCHECK?
       LoadRefDisp(r_base, field_offset, rl_result.reg, field_info.IsVolatile() ? kVolatile :
           kNotVolatile);
     } else {
-      LoadBaseDisp(r_base, field_offset, rl_result.reg, load_size, field_info.IsVolatile() ?
+      LoadBaseDisp(r_base, field_offset, rl_result.reg, size, field_info.IsVolatile() ?
           kVolatile : kNotVolatile);
     }
     FreeTemp(r_base);
 
-    if (is_long_or_double) {
+    if (IsWide(size)) {
       StoreValueWide(rl_dest, rl_result);
     } else {
       StoreValue(rl_dest, rl_result);
     }
   } else {
+    DCHECK(SizeMatchesTypeForEntrypoint(size, type));
     FlushAllRegs();  // Everything to home locations
-    QuickEntrypointEnum target =
-        is_long_or_double ? kQuickGet64Static
-            : (is_object ? kQuickGetObjStatic : kQuickGet32Static);
+    QuickEntrypointEnum target;
+    switch (type) {
+      case Primitive::kPrimNot:
+        target = kQuickGetObjStatic;
+        break;
+      case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
+        target = kQuickGet64Static;
+        break;
+      case Primitive::kPrimInt:
+      case Primitive::kPrimFloat:
+        target = kQuickGet32Static;
+        break;
+      case Primitive::kPrimShort:
+        target = kQuickGetShortStatic;
+        break;
+      case Primitive::kPrimChar:
+        target = kQuickGetCharStatic;
+        break;
+      case Primitive::kPrimByte:
+        target = kQuickGetByteStatic;
+        break;
+      case Primitive::kPrimBoolean:
+        target = kQuickGetBooleanStatic;
+        break;
+      case Primitive::kPrimVoid:  // Intentional fallthrough.
+      default:
+        LOG(FATAL) << "Can't determine entrypoint for: " << type;
+        target = kQuickGet32Static;
+    }
     CallRuntimeHelperImm(target, field_info.FieldIndex(), true);
 
     // FIXME: pGetXXStatic always return an int or int64 regardless of rl_dest.fp.
-    if (is_long_or_double) {
+    if (IsWide(size)) {
       RegLocation rl_result = GetReturnWide(kCoreReg);
       StoreValueWide(rl_dest, rl_result);
     } else {
@@ -715,14 +764,12 @@
   slow_paths_.Reset();
 }
 
-void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size,
-                      RegLocation rl_dest, RegLocation rl_obj, bool is_long_or_double,
-                      bool is_object) {
+void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, Primitive::Type type,
+                      RegLocation rl_dest, RegLocation rl_obj) {
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastGet());
-  OpSize load_size = LoadStoreOpSize(is_long_or_double, is_object);
   if (!SLOW_FIELD_PATH && field_info.FastGet()) {
-    RegisterClass reg_class = RegClassForFieldLoadStore(load_size, field_info.IsVolatile());
+    RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
     // A load of the class will lead to an iget with offset 0.
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     rl_obj = LoadValue(rl_obj, kRefReg);
@@ -730,29 +777,57 @@
     RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
     int field_offset = field_info.FieldOffset().Int32Value();
     LIR* load_lir;
-    if (is_object) {
+    if (IsRef(size)) {
       load_lir = LoadRefDisp(rl_obj.reg, field_offset, rl_result.reg, field_info.IsVolatile() ?
           kVolatile : kNotVolatile);
     } else {
-      load_lir = LoadBaseDisp(rl_obj.reg, field_offset, rl_result.reg, load_size,
+      load_lir = LoadBaseDisp(rl_obj.reg, field_offset, rl_result.reg, size,
                               field_info.IsVolatile() ? kVolatile : kNotVolatile);
     }
     MarkPossibleNullPointerExceptionAfter(opt_flags, load_lir);
-    if (is_long_or_double) {
+    if (IsWide(size)) {
       StoreValueWide(rl_dest, rl_result);
     } else {
       StoreValue(rl_dest, rl_result);
     }
   } else {
-    QuickEntrypointEnum target =
-        is_long_or_double ? kQuickGet64Instance
-            : (is_object ? kQuickGetObjInstance : kQuickGet32Instance);
+    DCHECK(SizeMatchesTypeForEntrypoint(size, type));
+    QuickEntrypointEnum target;
+    switch (type) {
+      case Primitive::kPrimNot:
+        target = kQuickGetObjInstance;
+        break;
+      case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
+        target = kQuickGet64Instance;
+        break;
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimInt:
+        target = kQuickGet32Instance;
+        break;
+      case Primitive::kPrimShort:
+        target = kQuickGetShortInstance;
+        break;
+      case Primitive::kPrimChar:
+        target = kQuickGetCharInstance;
+        break;
+      case Primitive::kPrimByte:
+        target = kQuickGetByteInstance;
+        break;
+      case Primitive::kPrimBoolean:
+        target = kQuickGetBooleanInstance;
+        break;
+      case Primitive::kPrimVoid:  // Intentional fallthrough.
+      default:
+        LOG(FATAL) << "Can't determine entrypoint for: " << type;
+        target = kQuickGet32Instance;
+    }
     // Second argument of pGetXXInstance is always a reference.
     DCHECK_EQ(static_cast<unsigned int>(rl_obj.wide), 0U);
     CallRuntimeHelperImmRegLocation(target, field_info.FieldIndex(), rl_obj, true);
 
     // FIXME: pGetXXInstance always return an int or int64 regardless of rl_dest.fp.
-    if (is_long_or_double) {
+    if (IsWide(size)) {
       RegLocation rl_result = GetReturnWide(kCoreReg);
       StoreValueWide(rl_dest, rl_result);
     } else {
@@ -763,18 +838,16 @@
 }
 
 void Mir2Lir::GenIPut(MIR* mir, int opt_flags, OpSize size,
-                      RegLocation rl_src, RegLocation rl_obj, bool is_long_or_double,
-                      bool is_object) {
+                      RegLocation rl_src, RegLocation rl_obj) {
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastPut());
-  OpSize store_size = LoadStoreOpSize(is_long_or_double, is_object);
   if (!SLOW_FIELD_PATH && field_info.FastPut()) {
-    RegisterClass reg_class = RegClassForFieldLoadStore(store_size, field_info.IsVolatile());
+    RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
     // Dex code never writes to the class field.
     DCHECK_GE(static_cast<uint32_t>(field_info.FieldOffset().Int32Value()),
               sizeof(mirror::HeapReference<mirror::Class>));
     rl_obj = LoadValue(rl_obj, kRefReg);
-    if (is_long_or_double) {
+    if (IsWide(size)) {
       rl_src = LoadValueWide(rl_src, reg_class);
     } else {
       rl_src = LoadValue(rl_src, reg_class);
@@ -782,21 +855,44 @@
     GenNullCheck(rl_obj.reg, opt_flags);
     int field_offset = field_info.FieldOffset().Int32Value();
     LIR* store;
-    if (is_object) {
+    if (IsRef(size)) {
       store = StoreRefDisp(rl_obj.reg, field_offset, rl_src.reg, field_info.IsVolatile() ?
           kVolatile : kNotVolatile);
     } else {
-      store = StoreBaseDisp(rl_obj.reg, field_offset, rl_src.reg, store_size,
+      store = StoreBaseDisp(rl_obj.reg, field_offset, rl_src.reg, size,
                             field_info.IsVolatile() ? kVolatile : kNotVolatile);
     }
     MarkPossibleNullPointerExceptionAfter(opt_flags, store);
-    if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) {
+    if (IsRef(size) && !mir_graph_->IsConstantNullRef(rl_src)) {
       MarkGCCard(rl_src.reg, rl_obj.reg);
     }
   } else {
-    QuickEntrypointEnum target =
-        is_long_or_double ? kQuickSet64Instance
-            : (is_object ? kQuickSetObjInstance : kQuickSet32Instance);
+    QuickEntrypointEnum target;
+    switch (size) {
+      case kReference:
+        target = kQuickSetObjInstance;
+        break;
+      case k64:
+      case kDouble:
+        target = kQuickSet64Instance;
+        break;
+      case k32:
+      case kSingle:
+        target = kQuickSet32Instance;
+        break;
+      case kSignedHalf:
+      case kUnsignedHalf:
+        target = kQuickSet16Instance;
+        break;
+      case kSignedByte:
+      case kUnsignedByte:
+        target = kQuickSet8Instance;
+        break;
+      case kWord:  // Intentional fallthrough.
+      default:
+        LOG(FATAL) << "Can't determine entrypoint for: " << size;
+        target = kQuickSet32Instance;
+    }
     CallRuntimeHelperImmRegLocationRegLocation(target, field_info.FieldIndex(), rl_obj, rl_src,
                                                true);
   }
@@ -2096,4 +2192,28 @@
   }
 }
 
+bool Mir2Lir::SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type) {
+  switch (size) {
+    case kReference:
+      return type == Primitive::kPrimNot;
+    case k64:
+    case kDouble:
+      return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
+    case k32:
+    case kSingle:
+      return type == Primitive::kPrimInt || type == Primitive::kPrimFloat;
+    case kSignedHalf:
+      return type == Primitive::kPrimShort;
+    case kUnsignedHalf:
+      return type == Primitive::kPrimChar;
+    case kSignedByte:
+      return type == Primitive::kPrimByte;
+    case kUnsignedByte:
+      return type == Primitive::kPrimBoolean;
+    case kWord:  // Intentional fallthrough.
+    default:
+      return false;  // There are no sane types with this op size.
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 3cfc9a6..3fdbe20 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1158,12 +1158,12 @@
 
   // intrinsic logic start.
   RegLocation rl_obj = info->args[0];
-  rl_obj = LoadValue(rl_obj);
+  rl_obj = LoadValue(rl_obj, kRefReg);
 
   RegStorage reg_slow_path = AllocTemp();
   RegStorage reg_disabled = AllocTemp();
-  Load32Disp(reg_class, slow_path_flag_offset, reg_slow_path);
-  Load32Disp(reg_class, disable_flag_offset, reg_disabled);
+  Load8Disp(reg_class, slow_path_flag_offset, reg_slow_path);
+  Load8Disp(reg_class, disable_flag_offset, reg_disabled);
   FreeTemp(reg_class);
   LIR* or_inst = OpRegRegReg(kOpOr, reg_slow_path, reg_slow_path, reg_disabled);
   FreeTemp(reg_disabled);
@@ -1297,10 +1297,10 @@
     return false;
   }
   RegLocation rl_src_i = info->args[0];
-  RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
-  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
+  RegLocation rl_i = IsWide(size) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
+  RegLocation rl_dest = IsWide(size) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (size == k64) {
+  if (IsWide(size)) {
     if (cu_->instruction_set == kArm64 || cu_->instruction_set == kX86_64) {
       OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
       StoreValueWide(rl_dest, rl_result);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index c4dfcb9..3ec37f2 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -18,6 +18,7 @@
 #include "dex/dataflow_iterator-inl.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "mir_to_lir-inl.h"
+#include "primitive.h"
 #include "thread-inl.h"
 
 namespace art {
@@ -223,9 +224,27 @@
     return false;
   }
 
-  bool wide = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_WIDE));
-  bool ref = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT));
-  OpSize size = LoadStoreOpSize(wide, ref);
+  OpSize size = k32;
+  switch (data.op_variant) {
+    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT):
+      size = kReference;
+      break;
+    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_WIDE):
+      size = k64;
+      break;
+    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_SHORT):
+      size = kSignedHalf;
+      break;
+    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_CHAR):
+      size = kUnsignedHalf;
+      break;
+    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_BYTE):
+      size = kSignedByte;
+      break;
+    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_BOOLEAN):
+      size = kUnsignedByte;
+      break;
+  }
 
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
@@ -233,20 +252,20 @@
   RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
   RegisterClass ret_reg_class = ShortyToRegClass(cu_->shorty[0]);
-  RegLocation rl_dest = wide ? GetReturnWide(ret_reg_class) : GetReturn(ret_reg_class);
+  RegLocation rl_dest = IsWide(size) ? GetReturnWide(ret_reg_class) : GetReturn(ret_reg_class);
   RegStorage r_result = rl_dest.reg;
   if (!RegClassMatches(reg_class, r_result)) {
-    r_result = wide ? AllocTypedTempWide(rl_dest.fp, reg_class)
-                    : AllocTypedTemp(rl_dest.fp, reg_class);
+    r_result = IsWide(size) ? AllocTypedTempWide(rl_dest.fp, reg_class)
+                            : AllocTypedTemp(rl_dest.fp, reg_class);
   }
-  if (ref) {
+  if (IsRef(size)) {
     LoadRefDisp(reg_obj, data.field_offset, r_result, data.is_volatile ? kVolatile : kNotVolatile);
   } else {
     LoadBaseDisp(reg_obj, data.field_offset, r_result, size, data.is_volatile ? kVolatile :
         kNotVolatile);
   }
   if (r_result.NotExactlyEquals(rl_dest.reg)) {
-    if (wide) {
+    if (IsWide(size)) {
       OpRegCopyWide(rl_dest.reg, r_result);
     } else {
       OpRegCopy(rl_dest.reg, r_result);
@@ -267,24 +286,42 @@
     return false;
   }
 
-  bool wide = (data.op_variant == InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE));
-  bool ref = (data.op_variant == InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT));
-  OpSize size = LoadStoreOpSize(wide, ref);
+  OpSize size = k32;
+  switch (data.op_variant) {
+    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT):
+      size = kReference;
+      break;
+    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE):
+      size = k64;
+      break;
+    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_SHORT):
+      size = kSignedHalf;
+      break;
+    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_CHAR):
+      size = kUnsignedHalf;
+      break;
+    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_BYTE):
+      size = kSignedByte;
+      break;
+    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_BOOLEAN):
+      size = kUnsignedByte;
+      break;
+  }
 
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
   LockArg(data.object_arg);
-  LockArg(data.src_arg, wide);
+  LockArg(data.src_arg, IsWide(size));
   RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
-  RegStorage reg_src = LoadArg(data.src_arg, reg_class, wide);
-  if (ref) {
+  RegStorage reg_src = LoadArg(data.src_arg, reg_class, IsWide(size));
+  if (IsRef(size)) {
     StoreRefDisp(reg_obj, data.field_offset, reg_src, data.is_volatile ? kVolatile : kNotVolatile);
   } else {
     StoreBaseDisp(reg_obj, data.field_offset, reg_src, size, data.is_volatile ? kVolatile :
         kNotVolatile);
   }
-  if (ref) {
+  if (IsRef(size)) {
     MarkGCCard(reg_src, reg_obj);
   }
   return true;
@@ -720,84 +757,112 @@
       break;
 
     case Instruction::IGET_OBJECT:
-      GenIGet(mir, opt_flags, kReference, rl_dest, rl_src[0], false, true);
+      GenIGet(mir, opt_flags, kReference, Primitive::kPrimNot, rl_dest, rl_src[0]);
       break;
 
     case Instruction::IGET_WIDE:
-      GenIGet(mir, opt_flags, k64, rl_dest, rl_src[0], true, false);
+      // kPrimLong and kPrimDouble share the same entrypoints.
+      GenIGet(mir, opt_flags, k64, Primitive::kPrimLong, rl_dest, rl_src[0]);
       break;
 
     case Instruction::IGET:
-      GenIGet(mir, opt_flags, k32, rl_dest, rl_src[0], false, false);
+      GenIGet(mir, opt_flags, k32, Primitive::kPrimInt, rl_dest, rl_src[0]);
       break;
 
     case Instruction::IGET_CHAR:
-      GenIGet(mir, opt_flags, kUnsignedHalf, rl_dest, rl_src[0], false, false);
+      GenIGet(mir, opt_flags, kUnsignedHalf, Primitive::kPrimChar, rl_dest, rl_src[0]);
       break;
 
     case Instruction::IGET_SHORT:
-      GenIGet(mir, opt_flags, kSignedHalf, rl_dest, rl_src[0], false, false);
+      GenIGet(mir, opt_flags, kSignedHalf, Primitive::kPrimShort, rl_dest, rl_src[0]);
       break;
 
     case Instruction::IGET_BOOLEAN:
+      GenIGet(mir, opt_flags, kUnsignedByte, Primitive::kPrimBoolean, rl_dest, rl_src[0]);
+      break;
+
     case Instruction::IGET_BYTE:
-      GenIGet(mir, opt_flags, kUnsignedByte, rl_dest, rl_src[0], false, false);
+      GenIGet(mir, opt_flags, kSignedByte, Primitive::kPrimByte, rl_dest, rl_src[0]);
       break;
 
     case Instruction::IPUT_WIDE:
-      GenIPut(mir, opt_flags, k64, rl_src[0], rl_src[1], true, false);
+      GenIPut(mir, opt_flags, k64, rl_src[0], rl_src[1]);
       break;
 
     case Instruction::IPUT_OBJECT:
-      GenIPut(mir, opt_flags, kReference, rl_src[0], rl_src[1], false, true);
+      GenIPut(mir, opt_flags, kReference, rl_src[0], rl_src[1]);
       break;
 
     case Instruction::IPUT:
-      GenIPut(mir, opt_flags, k32, rl_src[0], rl_src[1], false, false);
+      GenIPut(mir, opt_flags, k32, rl_src[0], rl_src[1]);
       break;
 
-    case Instruction::IPUT_BOOLEAN:
     case Instruction::IPUT_BYTE:
-      GenIPut(mir, opt_flags, kUnsignedByte, rl_src[0], rl_src[1], false, false);
+    case Instruction::IPUT_BOOLEAN:
+      GenIPut(mir, opt_flags, kUnsignedByte, rl_src[0], rl_src[1]);
       break;
 
     case Instruction::IPUT_CHAR:
-      GenIPut(mir, opt_flags, kUnsignedHalf, rl_src[0], rl_src[1], false, false);
+      GenIPut(mir, opt_flags, kUnsignedHalf, rl_src[0], rl_src[1]);
       break;
 
     case Instruction::IPUT_SHORT:
-      GenIPut(mir, opt_flags, kSignedHalf, rl_src[0], rl_src[1], false, false);
+      GenIPut(mir, opt_flags, kSignedHalf, rl_src[0], rl_src[1]);
       break;
 
     case Instruction::SGET_OBJECT:
-      GenSget(mir, rl_dest, false, true);
+      GenSget(mir, rl_dest, kReference, Primitive::kPrimNot);
       break;
+
     case Instruction::SGET:
-    case Instruction::SGET_BOOLEAN:
-    case Instruction::SGET_BYTE:
+      GenSget(mir, rl_dest, k32, Primitive::kPrimInt);
+      break;
+
     case Instruction::SGET_CHAR:
+      GenSget(mir, rl_dest, kUnsignedHalf, Primitive::kPrimChar);
+      break;
+
     case Instruction::SGET_SHORT:
-      GenSget(mir, rl_dest, false, false);
+      GenSget(mir, rl_dest, kSignedHalf, Primitive::kPrimShort);
+      break;
+
+    case Instruction::SGET_BOOLEAN:
+      GenSget(mir, rl_dest, kUnsignedByte, Primitive::kPrimBoolean);
+      break;
+
+    case Instruction::SGET_BYTE:
+      GenSget(mir, rl_dest, kSignedByte, Primitive::kPrimByte);
       break;
 
     case Instruction::SGET_WIDE:
-      GenSget(mir, rl_dest, true, false);
+      // kPrimLong and kPrimDouble share the same entrypoints.
+      GenSget(mir, rl_dest, k64, Primitive::kPrimLong);
       break;
 
     case Instruction::SPUT_OBJECT:
-      GenSput(mir, rl_src[0], false, true);
+      GenSput(mir, rl_src[0], kReference);
       break;
 
     case Instruction::SPUT:
-    case Instruction::SPUT_BOOLEAN:
-    case Instruction::SPUT_BYTE:
-    case Instruction::SPUT_CHAR:
-    case Instruction::SPUT_SHORT:
-      GenSput(mir, rl_src[0], false, false);
+      GenSput(mir, rl_src[0], k32);
       break;
 
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_BOOLEAN:
+      GenSput(mir, rl_src[0], kUnsignedByte);
+      break;
+
+    case Instruction::SPUT_CHAR:
+      GenSput(mir, rl_src[0], kUnsignedHalf);
+      break;
+
+    case Instruction::SPUT_SHORT:
+      GenSput(mir, rl_src[0], kSignedHalf);
+      break;
+
+
     case Instruction::SPUT_WIDE:
-      GenSput(mir, rl_src[0], true, false);
+      GenSput(mir, rl_src[0], k64);
       break;
 
     case Instruction::INVOKE_STATIC_RANGE:
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 64ef48d..d101a13 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -831,14 +831,14 @@
     void GenNewArray(uint32_t type_idx, RegLocation rl_dest,
                      RegLocation rl_src);
     void GenFilledNewArray(CallInfo* info);
-    void GenSput(MIR* mir, RegLocation rl_src,
-                 bool is_long_or_double, bool is_object);
-    void GenSget(MIR* mir, RegLocation rl_dest,
-                 bool is_long_or_double, bool is_object);
-    void GenIGet(MIR* mir, int opt_flags, OpSize size,
-                 RegLocation rl_dest, RegLocation rl_obj, bool is_long_or_double, bool is_object);
+    void GenSput(MIR* mir, RegLocation rl_src, OpSize size);
+    // Get entrypoints are specific for types, size alone is not sufficient to safely infer
+    // entrypoint.
+    void GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Type type);
+    void GenIGet(MIR* mir, int opt_flags, OpSize size, Primitive::Type type,
+                 RegLocation rl_dest, RegLocation rl_obj);
     void GenIPut(MIR* mir, int opt_flags, OpSize size,
-                 RegLocation rl_src, RegLocation rl_obj, bool is_long_or_double, bool is_object);
+                 RegLocation rl_src, RegLocation rl_obj);
     void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
                         RegLocation rl_src);
 
@@ -978,6 +978,10 @@
     virtual LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
       return LoadBaseDisp(r_base, displacement, r_dest, kWord, kNotVolatile);
     }
+    // Load 8 bits, regardless of target.
+    virtual LIR* Load8Disp(RegStorage r_base, int displacement, RegStorage r_dest) {
+      return LoadBaseDisp(r_base, displacement, r_dest, kSignedByte, kNotVolatile);
+    }
     // Load 32 bits, regardless of target.
     virtual LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest)  {
       return LoadBaseDisp(r_base, displacement, r_dest, k32, kNotVolatile);
@@ -1149,6 +1153,14 @@
              (info1->StorageMask() & info2->StorageMask()) != 0);
     }
 
+    static constexpr bool IsWide(OpSize size) {
+      return size == k64 || size == kDouble;
+    }
+
+    static constexpr bool IsRef(OpSize size) {
+      return size == kReference;
+    }
+
     /**
      * @brief Portable way of getting special registers from the backend.
      * @param reg Enumeration describing the purpose of the register.
@@ -1483,10 +1495,6 @@
      */
     virtual RegLocation ForceTempWide(RegLocation loc);
 
-    static constexpr OpSize LoadStoreOpSize(bool wide, bool ref) {
-      return wide ? k64 : ref ? kReference : k32;
-    }
-
     virtual void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
                                     RegLocation rl_dest, RegLocation rl_src);
 
@@ -1724,6 +1732,9 @@
     // (i.e. 8 bytes on 32-bit arch, 16 bytes on 64-bit arch) and we use ResourceMaskCache
     // to deduplicate the masks.
     ResourceMaskCache mask_cache_;
+
+  private:
+    static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type);
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 4fea1f0..9691864 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -387,10 +387,10 @@
     Instruction::IPUT_OBJECT_QUICK,
     Instruction::INVOKE_VIRTUAL_QUICK,
     Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
-    Instruction::UNUSED_EB,
-    Instruction::UNUSED_EC,
-    Instruction::UNUSED_ED,
-    Instruction::UNUSED_EE,
+    Instruction::IPUT_BOOLEAN_QUICK,
+    Instruction::IPUT_BYTE_QUICK,
+    Instruction::IPUT_CHAR_QUICK,
+    Instruction::IPUT_SHORT_QUICK,
     Instruction::UNUSED_EF,
     Instruction::UNUSED_F0,
     Instruction::UNUSED_F1,
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index a48613f..f159beb 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -875,6 +875,17 @@
 
   // StoreBaseDisp() will emit correct insn for atomic store on x86
   // assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
+  // x86 only allows registers EAX-EDX to be used as byte registers, if the input src is not
+  // valid, allocate a temp.
+  bool allocated_temp = false;
+  if (size == kUnsignedByte || size == kSignedByte) {
+    if (!cu_->target64 && !r_src.Low4()) {
+      RegStorage r_input = r_src;
+      r_src = AllocateByteRegister();
+      OpRegCopy(r_src, r_input);
+      allocated_temp = true;
+    }
+  }
 
   LIR* store = StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size);
 
@@ -884,6 +895,10 @@
     GenMemBarrier(kAnyAny);
   }
 
+  if (allocated_temp) {
+    FreeTemp(r_src);
+  }
+
   return store;
 }
 
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 28710e0..e858a7b 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -188,7 +188,7 @@
   EXPECT_EQ(84U, sizeof(OatHeader));
   EXPECT_EQ(8U, sizeof(OatMethodOffsets));
   EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(79 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(91 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {