Merge "Remove dump-oat-Calculator"
diff --git a/build/Android.bp b/build/Android.bp
index 8e8a2f6..ff762dd 100644
--- a/build/Android.bp
+++ b/build/Android.bp
@@ -83,7 +83,7 @@
                 "bionic/libc/private",
             ],
         },
-        linux: {
+        linux_glibc: {
             cflags: [
                 // Enable missing-noreturn only on non-Mac. As lots of things are not implemented for
                 // Apple, it's a pain.
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 571c91a..0f92a25 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -604,7 +604,7 @@
   endif
 
 .PHONY: $$(rule_name)
-$$(rule_name): $$(dependencies)
+$$(rule_name): $$(dependencies) dx d8
 	$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
 
   # Clear locally defined variables.
diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h
index c8a0119..bfefead 100644
--- a/compiler/intrinsics_list.h
+++ b/compiler/intrinsics_list.h
@@ -158,7 +158,12 @@
   V(UnsafeFullFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "fullFence", "()V") \
   V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;") \
   V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;") \
-  V(ThreadInterrupted, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kNoThrow, "Ljava/lang/Thread;", "interrupted", "()Z")
+  V(ThreadInterrupted, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kNoThrow, "Ljava/lang/Thread;", "interrupted", "()Z") \
+  V(VarHandleFullFence, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kNoThrow, "Ljava/lang/invoke/VarHandle;", "fullFence", "()V") \
+  V(VarHandleAcquireFence, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kNoThrow, "Ljava/lang/invoke/VarHandle;", "acquireFence", "()V") \
+  V(VarHandleReleaseFence, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kNoThrow, "Ljava/lang/invoke/VarHandle;", "releaseFence", "()V") \
+  V(VarHandleLoadLoadFence, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kNoThrow, "Ljava/lang/invoke/VarHandle;", "loadLoadFence", "()V") \
+  V(VarHandleStoreStoreFence, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/invoke/VarHandle;", "storeStoreFence", "()V") \
 
 #endif  // ART_COMPILER_INTRINSICS_LIST_H_
 #undef ART_COMPILER_INTRINSICS_LIST_H_   // #define is only for lint.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 42e9f68..468e93a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1929,17 +1929,18 @@
                               const MemOperand& src) {
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
       __ Ldrb(Register(dst), src);
       break;
     case DataType::Type::kInt8:
       __ Ldrsb(Register(dst), src);
       break;
-    case DataType::Type::kInt16:
-      __ Ldrsh(Register(dst), src);
-      break;
     case DataType::Type::kUint16:
       __ Ldrh(Register(dst), src);
       break;
+    case DataType::Type::kInt16:
+      __ Ldrsh(Register(dst), src);
+      break;
     case DataType::Type::kInt32:
     case DataType::Type::kReference:
     case DataType::Type::kInt64:
@@ -1972,14 +1973,7 @@
     MemOperand base = MemOperand(temp_base);
     switch (type) {
       case DataType::Type::kBool:
-        {
-          ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
-          __ ldarb(Register(dst), base);
-          if (needs_null_check) {
-            MaybeRecordImplicitNullCheck(instruction);
-          }
-        }
-        break;
+      case DataType::Type::kUint8:
       case DataType::Type::kInt8:
         {
           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
@@ -1988,17 +1982,11 @@
             MaybeRecordImplicitNullCheck(instruction);
           }
         }
-        __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
-        break;
-      case DataType::Type::kUint16:
-        {
-          ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
-          __ ldarh(Register(dst), base);
-          if (needs_null_check) {
-            MaybeRecordImplicitNullCheck(instruction);
-          }
+        if (type == DataType::Type::kInt8) {
+          __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
         }
         break;
+      case DataType::Type::kUint16:
       case DataType::Type::kInt16:
         {
           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
@@ -2007,7 +1995,9 @@
             MaybeRecordImplicitNullCheck(instruction);
           }
         }
-        __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
+        if (type == DataType::Type::kInt16) {
+          __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
+        }
         break;
       case DataType::Type::kInt32:
       case DataType::Type::kReference:
@@ -2048,6 +2038,7 @@
                                const MemOperand& dst) {
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       __ Strb(Register(src), dst);
       break;
@@ -2087,6 +2078,7 @@
   // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       {
         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
@@ -3222,9 +3214,10 @@
   DataType::Type in_type = compare->InputAt(0)->GetType();
   switch (in_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
     case DataType::Type::kInt64: {
       locations->SetInAt(0, Location::RequiresRegister());
@@ -3255,9 +3248,10 @@
   // -1 if: left  < right
   switch (in_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
     case DataType::Type::kInt64: {
       Register result = OutputRegister(compare);
@@ -5744,7 +5738,8 @@
       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   DataType::Type input_type = conversion->GetInputType();
   DataType::Type result_type = conversion->GetResultType();
-  DCHECK_NE(input_type, result_type);
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
   if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
       (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
@@ -5767,7 +5762,8 @@
   DataType::Type result_type = conversion->GetResultType();
   DataType::Type input_type = conversion->GetInputType();
 
-  DCHECK_NE(input_type, result_type);
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
 
   if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
     int result_size = DataType::Size(result_type);
@@ -5784,11 +5780,9 @@
       // 32bit input value as a 64bit value assuming that the top 32 bits are
       // zero.
       __ Mov(output.W(), source.W());
-    } else if (result_type == DataType::Type::kUint16 ||
-               (input_type == DataType::Type::kUint16 && input_size < result_size)) {
-      __ Ubfx(output,
-              output.IsX() ? source.X() : source.W(),
-              0, DataType::Size(DataType::Type::kUint16) * kBitsPerByte);
+    } else if (DataType::IsUnsignedType(result_type) ||
+               (DataType::IsUnsignedType(input_type) && input_size < result_size)) {
+      __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte);
     } else {
       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
     }
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 2b9e0fe..d4fb064 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -282,6 +282,58 @@
   return stack_offset;
 }
 
+static LoadOperandType GetLoadOperandType(DataType::Type type) {
+  switch (type) {
+    case DataType::Type::kReference:
+      return kLoadWord;
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+      return kLoadUnsignedByte;
+    case DataType::Type::kInt8:
+      return kLoadSignedByte;
+    case DataType::Type::kUint16:
+      return kLoadUnsignedHalfword;
+    case DataType::Type::kInt16:
+      return kLoadSignedHalfword;
+    case DataType::Type::kInt32:
+      return kLoadWord;
+    case DataType::Type::kInt64:
+      return kLoadWordPair;
+    case DataType::Type::kFloat32:
+      return kLoadSWord;
+    case DataType::Type::kFloat64:
+      return kLoadDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+static StoreOperandType GetStoreOperandType(DataType::Type type) {
+  switch (type) {
+    case DataType::Type::kReference:
+      return kStoreWord;
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+      return kStoreByte;
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
+      return kStoreHalfword;
+    case DataType::Type::kInt32:
+      return kStoreWord;
+    case DataType::Type::kInt64:
+      return kStoreWordPair;
+    case DataType::Type::kFloat32:
+      return kStoreSWord;
+    case DataType::Type::kFloat64:
+      return kStoreDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
 void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
   size_t orig_offset = stack_offset;
@@ -2598,12 +2650,13 @@
 
 Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
   switch (type) {
+    case DataType::Type::kReference:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
-    case DataType::Type::kInt32:
-    case DataType::Type::kReference: {
+    case DataType::Type::kInt32: {
       uint32_t index = gp_index_++;
       uint32_t stack_index = stack_index_++;
       if (index < calling_convention.GetNumberOfRegisters()) {
@@ -2674,12 +2727,13 @@
 
 Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const {
   switch (type) {
+    case DataType::Type::kReference:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
-    case DataType::Type::kInt32:
-    case DataType::Type::kReference: {
+    case DataType::Type::kInt32: {
       return LocationFrom(r0);
     }
 
@@ -3728,7 +3782,8 @@
 void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
   DataType::Type result_type = conversion->GetResultType();
   DataType::Type input_type = conversion->GetInputType();
-  DCHECK_NE(result_type, input_type);
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
 
   // The float-to-long, double-to-long and long-to-float type conversions
   // rely on a call to the runtime.
@@ -3741,67 +3796,30 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
 
-  // The Java language does not allow treating boolean as an integral type but
-  // our bit representation makes it safe.
-
   switch (result_type) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-      switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to byte is a result of code transformations.
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt16:
-        case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-byte' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
-          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected type conversion from " << input_type
-                     << " to " << result_type;
-      }
-      break;
-
+    case DataType::Type::kUint16:
     case DataType::Type::kInt16:
-      switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to short is a result of code transformations.
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-short' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
-          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected type conversion from " << input_type
-                     << " to " << result_type;
-      }
+      DCHECK(DataType::IsIntegralType(input_type)) << input_type;
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
     case DataType::Type::kInt32:
       switch (input_type) {
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-int' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
           break;
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-int' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
           locations->AddTemp(Location::RequiresFpuRegister());
           break;
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-int' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
           locations->AddTemp(Location::RequiresFpuRegister());
@@ -3816,18 +3834,16 @@
     case DataType::Type::kInt64:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-long' instruction.
           locations->SetInAt(0, Location::RequiresRegister());
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
           break;
 
         case DataType::Type::kFloat32: {
-          // Processing a Dex `float-to-long' instruction.
           InvokeRuntimeCallingConventionARMVIXL calling_convention;
           locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
           locations->SetOut(LocationFrom(r0, r1));
@@ -3835,7 +3851,6 @@
         }
 
         case DataType::Type::kFloat64: {
-          // Processing a Dex `double-to-long' instruction.
           InvokeRuntimeCallingConventionARMVIXL calling_convention;
           locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0),
                                              calling_convention.GetFpuRegisterAt(1)));
@@ -3849,41 +3864,19 @@
       }
       break;
 
-    case DataType::Type::kUint16:
-      switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to char is a result of code transformations.
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt16:
-        case DataType::Type::kInt32:
-          // Processing a Dex `int-to-char' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
-          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected type conversion from " << input_type
-                     << " to " << result_type;
-      }
-      break;
-
     case DataType::Type::kFloat32:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-float' instruction.
           locations->SetInAt(0, Location::RequiresRegister());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case DataType::Type::kInt64: {
-          // Processing a Dex `long-to-float' instruction.
           InvokeRuntimeCallingConventionARMVIXL calling_convention;
           locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0),
                                              calling_convention.GetRegisterAt(1)));
@@ -3892,7 +3885,6 @@
         }
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-float' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
@@ -3906,18 +3898,16 @@
     case DataType::Type::kFloat64:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-double' instruction.
           locations->SetInAt(0, Location::RequiresRegister());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-double' instruction.
           locations->SetInAt(0, Location::RequiresRegister());
           locations->SetOut(Location::RequiresFpuRegister());
           locations->AddTemp(Location::RequiresFpuRegister());
@@ -3925,7 +3915,6 @@
           break;
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-double' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
@@ -3948,22 +3937,55 @@
   Location in = locations->InAt(0);
   DataType::Type result_type = conversion->GetResultType();
   DataType::Type input_type = conversion->GetInputType();
-  DCHECK_NE(result_type, input_type);
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
   switch (result_type) {
-    case DataType::Type::kInt8:
+    case DataType::Type::kUint8:
       switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to byte is a result of code transformations.
-          __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
-          break;
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
+          __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
+          break;
+        case DataType::Type::kInt64:
+          __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
+    case DataType::Type::kInt8:
+      switch (input_type) {
+        case DataType::Type::kUint8:
         case DataType::Type::kUint16:
-          // Processing a Dex `int-to-byte' instruction.
+        case DataType::Type::kInt16:
+        case DataType::Type::kInt32:
           __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
           break;
+        case DataType::Type::kInt64:
+          __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
+    case DataType::Type::kUint16:
+      switch (input_type) {
+        case DataType::Type::kInt8:
+        case DataType::Type::kInt16:
+        case DataType::Type::kInt32:
+          __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
+          break;
+        case DataType::Type::kInt64:
+          __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
+          break;
 
         default:
           LOG(FATAL) << "Unexpected type conversion from " << input_type
@@ -3973,18 +3995,13 @@
 
     case DataType::Type::kInt16:
       switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to short is a result of code transformations.
-          __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
-          break;
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt32:
         case DataType::Type::kUint16:
-          // Processing a Dex `int-to-short' instruction.
+        case DataType::Type::kInt32:
           __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
           break;
+        case DataType::Type::kInt64:
+          __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
+          break;
 
         default:
           LOG(FATAL) << "Unexpected type conversion from " << input_type
@@ -3995,7 +4012,6 @@
     case DataType::Type::kInt32:
       switch (input_type) {
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-int' instruction.
           DCHECK(out.IsRegister());
           if (in.IsRegisterPair()) {
             __ Mov(OutputRegister(conversion), LowRegisterFrom(in));
@@ -4013,7 +4029,6 @@
           break;
 
         case DataType::Type::kFloat32: {
-          // Processing a Dex `float-to-int' instruction.
           vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
           __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0));
           __ Vmov(OutputRegister(conversion), temp);
@@ -4021,7 +4036,6 @@
         }
 
         case DataType::Type::kFloat64: {
-          // Processing a Dex `double-to-int' instruction.
           vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
           __ Vcvt(S32, F64, temp_s, DRegisterFrom(in));
           __ Vmov(OutputRegister(conversion), temp_s);
@@ -4037,12 +4051,11 @@
     case DataType::Type::kInt64:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-long' instruction.
           DCHECK(out.IsRegisterPair());
           DCHECK(in.IsRegister());
           __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0));
@@ -4051,13 +4064,11 @@
           break;
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-long' instruction.
           codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
           CheckEntrypointTypes<kQuickF2l, int64_t, float>();
           break;
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-long' instruction.
           codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
           CheckEntrypointTypes<kQuickD2l, int64_t, double>();
           break;
@@ -4068,49 +4079,24 @@
       }
       break;
 
-    case DataType::Type::kUint16:
-      switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to char is a result of code transformations.
-          __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
-          break;
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt16:
-        case DataType::Type::kInt32:
-          // Processing a Dex `int-to-char' instruction.
-          __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected type conversion from " << input_type
-                     << " to " << result_type;
-      }
-      break;
-
     case DataType::Type::kFloat32:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16: {
-          // Processing a Dex `int-to-float' instruction.
           __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
           __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion));
           break;
-        }
 
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-float' instruction.
           codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc());
           CheckEntrypointTypes<kQuickL2f, float, int64_t>();
           break;
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-float' instruction.
           __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in));
           break;
 
@@ -4123,19 +4109,16 @@
     case DataType::Type::kFloat64:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16: {
-          // Processing a Dex `int-to-double' instruction.
           __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
           __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out));
           break;
-        }
 
         case DataType::Type::kInt64: {
-          // Processing a Dex `long-to-double' instruction.
           vixl32::Register low = LowRegisterFrom(in);
           vixl32::Register high = HighRegisterFrom(in);
           vixl32::SRegister out_s = LowSRegisterFrom(out);
@@ -4158,7 +4141,6 @@
         }
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-double' instruction.
           __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0));
           break;
 
@@ -4760,6 +4742,7 @@
 
   switch (instruction->GetType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -5288,9 +5271,10 @@
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
     case DataType::Type::kInt64: {
       locations->SetInAt(0, Location::RequiresRegister());
@@ -5323,9 +5307,10 @@
   vixl32::Condition less_cond = vixl32::Condition(kNone);
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32: {
       // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
       __ Mov(out, 0);
@@ -5513,18 +5498,16 @@
 
   switch (field_type) {
     case DataType::Type::kBool:
-    case DataType::Type::kInt8: {
-      GetAssembler()->StoreToOffset(kStoreByte, RegisterFrom(value), base, offset);
-      break;
-    }
-
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+    case DataType::Type::kUint16:
     case DataType::Type::kInt16:
-    case DataType::Type::kUint16: {
-      GetAssembler()->StoreToOffset(kStoreHalfword, RegisterFrom(value), base, offset);
+    case DataType::Type::kInt32: {
+      StoreOperandType operand_type = GetStoreOperandType(field_type);
+      GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset);
       break;
     }
 
-    case DataType::Type::kInt32:
     case DataType::Type::kReference: {
       if (kPoisonHeapReferences && needs_write_barrier) {
         // Note that in the case where `value` is a null reference,
@@ -5764,24 +5747,15 @@
 
   switch (field_type) {
     case DataType::Type::kBool:
-      GetAssembler()->LoadFromOffset(kLoadUnsignedByte, RegisterFrom(out), base, offset);
-      break;
-
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-      GetAssembler()->LoadFromOffset(kLoadSignedByte, RegisterFrom(out), base, offset);
-      break;
-
-    case DataType::Type::kInt16:
-      GetAssembler()->LoadFromOffset(kLoadSignedHalfword, RegisterFrom(out), base, offset);
-      break;
-
     case DataType::Type::kUint16:
-      GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, RegisterFrom(out), base, offset);
+    case DataType::Type::kInt16:
+    case DataType::Type::kInt32: {
+      LoadOperandType operand_type = GetLoadOperandType(field_type);
+      GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset);
       break;
-
-    case DataType::Type::kInt32:
-      GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
-      break;
+    }
 
     case DataType::Type::kReference: {
       // /* HeapReference<Object> */ out = *(base + offset)
@@ -5995,56 +5969,6 @@
   codegen_->GenerateNullCheck(instruction);
 }
 
-static LoadOperandType GetLoadOperandType(DataType::Type type) {
-  switch (type) {
-    case DataType::Type::kReference:
-      return kLoadWord;
-    case DataType::Type::kBool:
-      return kLoadUnsignedByte;
-    case DataType::Type::kInt8:
-      return kLoadSignedByte;
-    case DataType::Type::kUint16:
-      return kLoadUnsignedHalfword;
-    case DataType::Type::kInt16:
-      return kLoadSignedHalfword;
-    case DataType::Type::kInt32:
-      return kLoadWord;
-    case DataType::Type::kInt64:
-      return kLoadWordPair;
-    case DataType::Type::kFloat32:
-      return kLoadSWord;
-    case DataType::Type::kFloat64:
-      return kLoadDWord;
-    default:
-      LOG(FATAL) << "Unreachable type " << type;
-      UNREACHABLE();
-  }
-}
-
-static StoreOperandType GetStoreOperandType(DataType::Type type) {
-  switch (type) {
-    case DataType::Type::kReference:
-      return kStoreWord;
-    case DataType::Type::kBool:
-    case DataType::Type::kInt8:
-      return kStoreByte;
-    case DataType::Type::kUint16:
-    case DataType::Type::kInt16:
-      return kStoreHalfword;
-    case DataType::Type::kInt32:
-      return kStoreWord;
-    case DataType::Type::kInt64:
-      return kStoreWordPair;
-    case DataType::Type::kFloat32:
-      return kStoreSWord;
-    case DataType::Type::kFloat64:
-      return kStoreDWord;
-    default:
-      LOG(FATAL) << "Unreachable type " << type;
-      UNREACHABLE();
-  }
-}
-
 void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(DataType::Type type,
                                                     Location out_loc,
                                                     vixl32::Register base,
@@ -6054,18 +5978,19 @@
   MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
 
   switch (type) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
       break;
     case DataType::Type::kBool:
       __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
       break;
-    case DataType::Type::kInt16:
-      __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
-      break;
     case DataType::Type::kUint16:
       __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
       break;
+    case DataType::Type::kInt16:
+      __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
+      break;
     case DataType::Type::kReference:
     case DataType::Type::kInt32:
       __ Ldr(cond, RegisterFrom(out_loc), mem_address);
@@ -6089,12 +6014,13 @@
   MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
 
   switch (type) {
-    case DataType::Type::kInt8:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
       __ Strb(cond, RegisterFrom(loc), mem_address);
       break;
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
       __ Strh(cond, RegisterFrom(loc), mem_address);
       break;
     case DataType::Type::kReference:
@@ -6182,9 +6108,10 @@
 
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32: {
       vixl32::Register length;
       if (maybe_compressed_char_at) {
@@ -6434,9 +6361,10 @@
 
   switch (value_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32: {
       if (index.IsConstant()) {
         int32_t const_index = Int32ConstantFrom(index);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index a7c8557..70c8a5b 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -51,12 +51,13 @@
 
 Location MipsReturnLocation(DataType::Type return_type) {
   switch (return_type) {
+    case DataType::Type::kReference:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
     case DataType::Type::kInt32:
-    case DataType::Type::kReference:
       return Location::RegisterLocation(V0);
 
     case DataType::Type::kInt64:
@@ -84,12 +85,13 @@
   Location next_location;
 
   switch (type) {
+    case DataType::Type::kReference:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
-    case DataType::Type::kInt32:
-    case DataType::Type::kReference: {
+    case DataType::Type::kInt32: {
       uint32_t gp_index = gp_index_++;
       if (gp_index < calling_convention.GetNumberOfRegisters()) {
         next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index));
@@ -2592,7 +2594,8 @@
   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
                                         instruction->IsStringCharAt();
   switch (type) {
-    case DataType::Type::kBool: {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8: {
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -2618,19 +2621,6 @@
       break;
     }
 
-    case DataType::Type::kInt16: {
-      Register out = out_loc.AsRegister<Register>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
-      } else {
-        __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_2, TMP);
-        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
-      }
-      break;
-    }
-
     case DataType::Type::kUint16: {
       Register out = out_loc.AsRegister<Register>();
       if (maybe_compressed_char_at) {
@@ -2683,6 +2673,19 @@
       break;
     }
 
+    case DataType::Type::kInt16: {
+      Register out = out_loc.AsRegister<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
+      } else {
+        __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_2, TMP);
+        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
+      }
+      break;
+    }
+
     case DataType::Type::kInt32: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
       Register out = out_loc.AsRegister<Register>();
@@ -2880,6 +2883,7 @@
 
   switch (value_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       if (index.IsConstant()) {
@@ -2897,8 +2901,8 @@
       break;
     }
 
-    case DataType::Type::kInt16:
-    case DataType::Type::kUint16: {
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       if (index.IsConstant()) {
         data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2;
@@ -3390,9 +3394,10 @@
 
   switch (in_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -3429,9 +3434,10 @@
   // -1 if: left  < right
   switch (in_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32: {
       Register lhs = locations->InAt(0).AsRegister<Register>();
       Register rhs = locations->InAt(1).AsRegister<Register>();
@@ -3833,6 +3839,7 @@
 
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -6159,17 +6166,18 @@
 
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
       load_type = kLoadUnsignedByte;
       break;
     case DataType::Type::kInt8:
       load_type = kLoadSignedByte;
       break;
-    case DataType::Type::kInt16:
-      load_type = kLoadSignedHalfword;
-      break;
     case DataType::Type::kUint16:
       load_type = kLoadUnsignedHalfword;
       break;
+    case DataType::Type::kInt16:
+      load_type = kLoadSignedHalfword;
+      break;
     case DataType::Type::kInt32:
     case DataType::Type::kFloat32:
     case DataType::Type::kReference:
@@ -6312,11 +6320,12 @@
 
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       store_type = kStoreByte;
       break;
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
       store_type = kStoreHalfword;
       break;
     case DataType::Type::kInt32:
@@ -8601,7 +8610,8 @@
 void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) {
   DataType::Type input_type = conversion->GetInputType();
   DataType::Type result_type = conversion->GetResultType();
-  DCHECK_NE(input_type, result_type);
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
   bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
@@ -8652,7 +8662,8 @@
   bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
   bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
-  DCHECK_NE(input_type, result_type);
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
 
   if (result_type == DataType::Type::kInt64 && DataType::IsIntegralType(input_type)) {
     Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
@@ -8670,8 +8681,8 @@
         : locations->InAt(0).AsRegister<Register>();
 
     switch (result_type) {
-      case DataType::Type::kUint16:
-        __ Andi(dst, src, 0xFFFF);
+      case DataType::Type::kUint8:
+        __ Andi(dst, src, 0xFF);
         break;
       case DataType::Type::kInt8:
         if (has_sign_extension) {
@@ -8681,6 +8692,9 @@
           __ Sra(dst, dst, 24);
         }
         break;
+      case DataType::Type::kUint16:
+        __ Andi(dst, src, 0xFFFF);
+        break;
       case DataType::Type::kInt16:
         if (has_sign_extension) {
           __ Seh(dst, src);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 7051cce..6877003 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -50,6 +50,7 @@
 Location Mips64ReturnLocation(DataType::Type return_type) {
   switch (return_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -2170,7 +2171,8 @@
   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
                                         instruction->IsStringCharAt();
   switch (type) {
-    case DataType::Type::kBool: {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8: {
       GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -2196,19 +2198,6 @@
       break;
     }
 
-    case DataType::Type::kInt16: {
-      GpuRegister out = out_loc.AsRegister<GpuRegister>();
-      if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
-      } else {
-        __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_2);
-        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
-      }
-      break;
-    }
-
     case DataType::Type::kUint16: {
       GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (maybe_compressed_char_at) {
@@ -2261,6 +2250,19 @@
       break;
     }
 
+    case DataType::Type::kInt16: {
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
+      } else {
+        __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_2);
+        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
+      }
+      break;
+    }
+
     case DataType::Type::kInt32: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
       GpuRegister out = out_loc.AsRegister<GpuRegister>();
@@ -2460,6 +2462,7 @@
 
   switch (value_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       if (index.IsConstant()) {
@@ -2477,8 +2480,8 @@
       break;
     }
 
-    case DataType::Type::kInt16:
-    case DataType::Type::kUint16: {
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       if (index.IsConstant()) {
         data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2;
@@ -2969,9 +2972,10 @@
 
   switch (in_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
     case DataType::Type::kInt64:
       locations->SetInAt(0, Location::RequiresRegister());
@@ -3001,9 +3005,10 @@
   // -1 if: left  < right
   switch (in_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
     case DataType::Type::kInt64: {
       GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
@@ -4681,17 +4686,18 @@
 
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
       load_type = kLoadUnsignedByte;
       break;
     case DataType::Type::kInt8:
       load_type = kLoadSignedByte;
       break;
-    case DataType::Type::kInt16:
-      load_type = kLoadSignedHalfword;
-      break;
     case DataType::Type::kUint16:
       load_type = kLoadUnsignedHalfword;
       break;
+    case DataType::Type::kInt16:
+      load_type = kLoadSignedHalfword;
+      break;
     case DataType::Type::kInt32:
     case DataType::Type::kFloat32:
       load_type = kLoadWord;
@@ -4779,11 +4785,12 @@
 
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       store_type = kStoreByte;
       break;
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
       store_type = kStoreHalfword;
       break;
     case DataType::Type::kInt32:
@@ -6767,7 +6774,8 @@
 void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) {
   DataType::Type input_type = conversion->GetInputType();
   DataType::Type result_type = conversion->GetResultType();
-  DCHECK_NE(input_type, result_type);
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
 
   if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
       (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
@@ -6794,15 +6802,16 @@
   DataType::Type result_type = conversion->GetResultType();
   DataType::Type input_type = conversion->GetInputType();
 
-  DCHECK_NE(input_type, result_type);
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
 
   if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
     GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
     GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
 
     switch (result_type) {
-      case DataType::Type::kUint16:
-        __ Andi(dst, src, 0xFFFF);
+      case DataType::Type::kUint8:
+        __ Andi(dst, src, 0xFF);
         break;
       case DataType::Type::kInt8:
         if (input_type == DataType::Type::kInt64) {
@@ -6815,6 +6824,9 @@
           __ Seb(dst, src);
         }
         break;
+      case DataType::Type::kUint16:
+        __ Andi(dst, src, 0xFFFF);
+        break;
       case DataType::Type::kInt16:
         if (input_type == DataType::Type::kInt64) {
           // Type conversion from long to types narrower than int is a result of code
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 5d5623b..b2aec1e 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -42,6 +42,7 @@
   HInstruction* input = instruction->InputAt(0);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -73,6 +74,7 @@
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       if (src_loc.IsConstant()) {
@@ -132,6 +134,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -185,6 +188,7 @@
                         instruction->IsVecNot() ? Location::kOutputOverlap
                                                 : Location::kNoOutputOverlap);
       break;
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -268,6 +272,7 @@
   VRegister src = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ Neg(dst.V16B(), src.V16B());
@@ -312,7 +317,6 @@
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ Abs(dst.V16B(), src.V16B());
       break;
-    case DataType::Type::kUint16:
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ Abs(dst.V8H(), src.V8H());
@@ -353,6 +357,7 @@
       __ Movi(dst.V16B(), 1);
       __ Eor(dst.V16B(), dst.V16B(), src.V16B());
       break;
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -371,6 +376,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -398,6 +404,7 @@
   VRegister rhs = VRegisterFrom(locations->InAt(1));
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
@@ -439,30 +446,29 @@
   VRegister rhs = VRegisterFrom(locations->InAt(1));
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      instruction->IsRounded()
+          ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+          : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        instruction->IsRounded()
-            ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
-            : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
-      } else {
-        instruction->IsRounded()
-            ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
-            : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
-      }
+      instruction->IsRounded()
+          ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+          : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      instruction->IsRounded()
+          ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+          : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        instruction->IsRounded()
-            ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
-            : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
-      } else {
-        instruction->IsRounded()
-            ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
-            : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
-      }
+      instruction->IsRounded()
+          ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+          : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -480,6 +486,7 @@
   VRegister rhs = VRegisterFrom(locations->InAt(1));
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
@@ -521,6 +528,7 @@
   VRegister rhs = VRegisterFrom(locations->InAt(1));
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
@@ -582,22 +590,21 @@
   VRegister rhs = VRegisterFrom(locations->InAt(1));
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
-      } else {
-        __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
-      }
+      __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
-      } else {
-        __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
-      }
+      __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -633,22 +640,21 @@
   VRegister rhs = VRegisterFrom(locations->InAt(1));
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
-      } else {
-        __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
-      }
+      __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
-      } else {
-        __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
-      }
+      __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -675,6 +681,7 @@
 }
 
 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
+  // TODO: Allow constants supported by BIC (vector, immediate).
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
 
@@ -685,6 +692,7 @@
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -705,6 +713,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
+  // TODO: Use BIC (vector, register).
   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
 }
 
@@ -719,6 +728,7 @@
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -745,6 +755,7 @@
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -764,6 +775,7 @@
 static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -789,6 +801,7 @@
   VRegister dst = VRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ Shl(dst.V16B(), lhs.V16B(), value);
@@ -822,6 +835,7 @@
   VRegister dst = VRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ Sshr(dst.V16B(), lhs.V16B(), value);
@@ -855,6 +869,7 @@
   VRegister dst = VRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ Ushr(dst.V16B(), lhs.V16B(), value);
@@ -888,6 +903,7 @@
 
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -926,6 +942,7 @@
   // Set required elements.
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
@@ -953,6 +970,7 @@
 static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -985,6 +1003,7 @@
   DCHECK(locations->InAt(0).Equals(locations->Out()));
 
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       if (instruction->GetOpKind() == HInstruction::kAdd) {
@@ -1024,6 +1043,7 @@
   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
   DCHECK_EQ(a->GetPackedType(), b->GetPackedType());
   switch (a->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       switch (instruction->GetPackedType()) {
         case DataType::Type::kInt64:
@@ -1069,10 +1089,10 @@
   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
   DCHECK_EQ(a->GetPackedType(), b->GetPackedType());
   switch (a->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, a->GetVectorLength());
       switch (instruction->GetPackedType()) {
-        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
           DCHECK_EQ(8u, instruction->GetVectorLength());
           __ Sabal(acc.V8H(), left.V8B(), right.V8B());
@@ -1202,6 +1222,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -1300,6 +1321,7 @@
       }
       FALLTHROUGH_INTENDED;
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kInt16:
     case DataType::Type::kInt32:
@@ -1329,6 +1351,7 @@
 
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 333d108..df75752 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -36,6 +36,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -54,6 +55,7 @@
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ Vdup(Untyped8, dst, InputRegisterAt(instruction, 0));
@@ -91,6 +93,7 @@
                         instruction->IsVecNot() ? Location::kOutputOverlap
                                                 : Location::kNoOutputOverlap);
       break;
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -129,6 +132,7 @@
   vixl32::DRegister src = DRegisterFrom(locations->InAt(0));
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ Vneg(DataTypeValue::S8, dst, src);
@@ -161,7 +165,6 @@
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ Vabs(DataTypeValue::S8, dst, src);
       break;
-    case DataType::Type::kUint16:
     case DataType::Type::kInt16:
       DCHECK_EQ(4u, instruction->GetVectorLength());
       __ Vabs(DataTypeValue::S16, dst, src);
@@ -190,6 +193,7 @@
       __ Vmov(I8, dst, 1);
       __ Veor(dst, dst, src);
       break;
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -207,6 +211,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -231,6 +236,7 @@
   vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ Vadd(I8, dst, lhs, rhs);
@@ -260,30 +266,29 @@
   vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      instruction->IsRounded()
+          ? __ Vrhadd(DataTypeValue::U8, dst, lhs, rhs)
+          : __ Vhadd(DataTypeValue::U8, dst, lhs, rhs);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        instruction->IsRounded()
-            ? __ Vrhadd(DataTypeValue::U8, dst, lhs, rhs)
-            : __ Vhadd(DataTypeValue::U8, dst, lhs, rhs);
-      } else {
-        instruction->IsRounded()
-            ? __ Vrhadd(DataTypeValue::S8, dst, lhs, rhs)
-            : __ Vhadd(DataTypeValue::S8, dst, lhs, rhs);
-      }
+      instruction->IsRounded()
+          ? __ Vrhadd(DataTypeValue::S8, dst, lhs, rhs)
+          : __ Vhadd(DataTypeValue::S8, dst, lhs, rhs);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      instruction->IsRounded()
+          ? __ Vrhadd(DataTypeValue::U16, dst, lhs, rhs)
+          : __ Vhadd(DataTypeValue::U16, dst, lhs, rhs);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(4u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        instruction->IsRounded()
-            ? __ Vrhadd(DataTypeValue::U16, dst, lhs, rhs)
-            : __ Vhadd(DataTypeValue::U16, dst, lhs, rhs);
-      } else {
-        instruction->IsRounded()
-            ? __ Vrhadd(DataTypeValue::S16, dst, lhs, rhs)
-            : __ Vhadd(DataTypeValue::S16, dst, lhs, rhs);
-      }
+      instruction->IsRounded()
+          ? __ Vrhadd(DataTypeValue::S16, dst, lhs, rhs)
+          : __ Vhadd(DataTypeValue::S16, dst, lhs, rhs);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -301,6 +306,7 @@
   vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ Vsub(I8, dst, lhs, rhs);
@@ -330,6 +336,7 @@
   vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ Vmul(I8, dst, lhs, rhs);
@@ -367,22 +374,21 @@
   vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Vmin(DataTypeValue::U8, dst, lhs, rhs);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Vmin(DataTypeValue::U8, dst, lhs, rhs);
-      } else {
-        __ Vmin(DataTypeValue::S8, dst, lhs, rhs);
-      }
+      __ Vmin(DataTypeValue::S8, dst, lhs, rhs);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Vmin(DataTypeValue::U16, dst, lhs, rhs);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(4u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Vmin(DataTypeValue::U16, dst, lhs, rhs);
-      } else {
-        __ Vmin(DataTypeValue::S16, dst, lhs, rhs);
-      }
+      __ Vmin(DataTypeValue::S16, dst, lhs, rhs);
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(2u, instruction->GetVectorLength());
@@ -408,22 +414,21 @@
   vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Vmax(DataTypeValue::U8, dst, lhs, rhs);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Vmax(DataTypeValue::U8, dst, lhs, rhs);
-      } else {
-        __ Vmax(DataTypeValue::S8, dst, lhs, rhs);
-      }
+      __ Vmax(DataTypeValue::S8, dst, lhs, rhs);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Vmax(DataTypeValue::U16, dst, lhs, rhs);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(4u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Vmax(DataTypeValue::U16, dst, lhs, rhs);
-      } else {
-        __ Vmax(DataTypeValue::S16, dst, lhs, rhs);
-      }
+      __ Vmax(DataTypeValue::S16, dst, lhs, rhs);
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(2u, instruction->GetVectorLength());
@@ -440,6 +445,7 @@
 }
 
 void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) {
+  // TODO: Allow constants supported by VAND (immediate).
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
 
@@ -450,6 +456,7 @@
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -481,6 +488,7 @@
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -504,6 +512,7 @@
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -520,6 +529,7 @@
 static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -544,6 +554,7 @@
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ Vshl(I8, dst, lhs, value);
@@ -573,6 +584,7 @@
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ Vshr(DataTypeValue::S8, dst, lhs, value);
@@ -602,6 +614,7 @@
   vixl32::DRegister dst = DRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ Vshr(DataTypeValue::U8, dst, lhs, value);
@@ -633,6 +646,7 @@
 static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -678,6 +692,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -764,6 +779,7 @@
 
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       if (IsWordAligned(instruction)) {
@@ -811,6 +827,7 @@
   vixl32::Register scratch;
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       if (IsWordAligned(instruction)) {
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index c25f5ac..e8c5157 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -27,6 +27,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -51,6 +52,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ FillB(dst, locations->InAt(0).AsRegister<Register>());
@@ -106,6 +108,7 @@
                         instruction->IsVecNot() ? Location::kOutputOverlap
                                                 : Location::kNoOutputOverlap);
       break;
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -160,6 +163,7 @@
   VectorRegister src = VectorRegisterFrom(locations->InAt(0));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ FillB(dst, ZERO);
@@ -211,7 +215,6 @@
       __ FillB(dst, ZERO);       // all zeroes
       __ Add_aB(dst, dst, src);  // dst = abs(0) + abs(src)
       break;
-    case DataType::Type::kUint16:
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ FillH(dst, ZERO);       // all zeroes
@@ -259,6 +262,7 @@
       __ LdiB(dst, 1);
       __ XorV(dst, dst, src);
       break;
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -281,6 +285,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -308,6 +313,7 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ AddvB(dst, lhs, rhs);
@@ -349,30 +355,29 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      instruction->IsRounded()
+          ? __ Aver_uB(dst, lhs, rhs)
+          : __ Ave_uB(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        instruction->IsRounded()
-            ? __ Aver_uB(dst, lhs, rhs)
-            : __ Ave_uB(dst, lhs, rhs);
-      } else {
-        instruction->IsRounded()
-            ? __ Aver_sB(dst, lhs, rhs)
-            : __ Ave_sB(dst, lhs, rhs);
-      }
+      instruction->IsRounded()
+          ? __ Aver_sB(dst, lhs, rhs)
+          : __ Ave_sB(dst, lhs, rhs);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      instruction->IsRounded()
+          ? __ Aver_uH(dst, lhs, rhs)
+          : __ Ave_uH(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        instruction->IsRounded()
-            ? __ Aver_uH(dst, lhs, rhs)
-            : __ Ave_uH(dst, lhs, rhs);
-      } else {
-        instruction->IsRounded()
-            ? __ Aver_sH(dst, lhs, rhs)
-            : __ Ave_sH(dst, lhs, rhs);
-      }
+      instruction->IsRounded()
+          ? __ Aver_sH(dst, lhs, rhs)
+          : __ Ave_sH(dst, lhs, rhs);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -390,6 +395,7 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ SubvB(dst, lhs, rhs);
@@ -431,6 +437,7 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ MulvB(dst, lhs, rhs);
@@ -496,22 +503,21 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ Min_uB(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Min_uB(dst, lhs, rhs);
-      } else {
-        __ Min_sB(dst, lhs, rhs);
-      }
+      __ Min_sB(dst, lhs, rhs);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Min_uH(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Min_uH(dst, lhs, rhs);
-      } else {
-        __ Min_sH(dst, lhs, rhs);
-      }
+      __ Min_sH(dst, lhs, rhs);
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -557,22 +563,21 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ Max_uB(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Max_uB(dst, lhs, rhs);
-      } else {
-        __ Max_sB(dst, lhs, rhs);
-      }
+      __ Max_sB(dst, lhs, rhs);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Max_uH(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Max_uH(dst, lhs, rhs);
-      } else {
-        __ Max_sH(dst, lhs, rhs);
-      }
+      __ Max_sH(dst, lhs, rhs);
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -619,6 +624,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -655,6 +661,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -683,6 +690,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -704,6 +712,7 @@
 static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -729,6 +738,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ SlliB(dst, lhs, value);
@@ -762,6 +772,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ SraiB(dst, lhs, value);
@@ -795,6 +806,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ SrliB(dst, lhs, value);
@@ -830,6 +842,7 @@
 static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -856,6 +869,7 @@
   VectorRegister left = VectorRegisterFrom(locations->InAt(1));
   VectorRegister right = VectorRegisterFrom(locations->InAt(2));
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       if (instruction->GetOpKind() == HInstruction::kAdd) {
@@ -911,6 +925,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -976,6 +991,7 @@
   int32_t offset = VecAddress(locations, size, &base);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ LdB(reg, base, offset);
@@ -1018,6 +1034,7 @@
   int32_t offset = VecAddress(locations, size, &base);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ StB(reg, base, offset);
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index f60f708..7d69773 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -32,6 +32,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -56,6 +57,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ FillB(dst, locations->InAt(0).AsRegister<GpuRegister>());
@@ -109,6 +111,7 @@
                         instruction->IsVecNot() ? Location::kOutputOverlap
                                                 : Location::kNoOutputOverlap);
       break;
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -164,6 +167,7 @@
   VectorRegister src = VectorRegisterFrom(locations->InAt(0));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ FillB(dst, ZERO);
@@ -215,7 +219,6 @@
       __ FillB(dst, ZERO);       // all zeroes
       __ Add_aB(dst, dst, src);  // dst = abs(0) + abs(src)
       break;
-    case DataType::Type::kUint16:
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ FillH(dst, ZERO);       // all zeroes
@@ -263,6 +266,7 @@
       __ LdiB(dst, 1);
       __ XorV(dst, dst, src);
       break;
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -285,6 +289,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -312,6 +317,7 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ AddvB(dst, lhs, rhs);
@@ -353,30 +359,29 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      instruction->IsRounded()
+          ? __ Aver_uB(dst, lhs, rhs)
+          : __ Ave_uB(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        instruction->IsRounded()
-            ? __ Aver_uB(dst, lhs, rhs)
-            : __ Ave_uB(dst, lhs, rhs);
-      } else {
-        instruction->IsRounded()
-            ? __ Aver_sB(dst, lhs, rhs)
-            : __ Ave_sB(dst, lhs, rhs);
-      }
+      instruction->IsRounded()
+          ? __ Aver_sB(dst, lhs, rhs)
+          : __ Ave_sB(dst, lhs, rhs);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      instruction->IsRounded()
+          ? __ Aver_uH(dst, lhs, rhs)
+          : __ Ave_uH(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        instruction->IsRounded()
-            ? __ Aver_uH(dst, lhs, rhs)
-            : __ Ave_uH(dst, lhs, rhs);
-      } else {
-        instruction->IsRounded()
-            ? __ Aver_sH(dst, lhs, rhs)
-            : __ Ave_sH(dst, lhs, rhs);
-      }
+      instruction->IsRounded()
+          ? __ Aver_sH(dst, lhs, rhs)
+          : __ Ave_sH(dst, lhs, rhs);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -394,6 +399,7 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ SubvB(dst, lhs, rhs);
@@ -435,6 +441,7 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ MulvB(dst, lhs, rhs);
@@ -500,22 +507,21 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ Min_uB(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Min_uB(dst, lhs, rhs);
-      } else {
-        __ Min_sB(dst, lhs, rhs);
-      }
+      __ Min_sB(dst, lhs, rhs);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Min_uH(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Min_uH(dst, lhs, rhs);
-      } else {
-        __ Min_sH(dst, lhs, rhs);
-      }
+      __ Min_sH(dst, lhs, rhs);
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -561,22 +567,21 @@
   VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ Max_uB(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Max_uB(dst, lhs, rhs);
-      } else {
-        __ Max_sB(dst, lhs, rhs);
-      }
+      __ Max_sB(dst, lhs, rhs);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Max_uH(dst, lhs, rhs);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ Max_uH(dst, lhs, rhs);
-      } else {
-        __ Max_sH(dst, lhs, rhs);
-      }
+      __ Max_sH(dst, lhs, rhs);
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -623,6 +628,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -659,6 +665,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -687,6 +694,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -708,6 +716,7 @@
 static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -733,6 +742,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ SlliB(dst, lhs, value);
@@ -766,6 +776,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ SraiB(dst, lhs, value);
@@ -799,6 +810,7 @@
   VectorRegister dst = VectorRegisterFrom(locations->Out());
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ SrliB(dst, lhs, value);
@@ -834,6 +846,7 @@
 static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -860,6 +873,7 @@
   VectorRegister left = VectorRegisterFrom(locations->InAt(1));
   VectorRegister right = VectorRegisterFrom(locations->InAt(2));
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       if (instruction->GetOpKind() == HInstruction::kAdd) {
@@ -915,6 +929,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -980,6 +995,7 @@
   int32_t offset = VecAddress(locations, size, &base);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ LdB(reg, base, offset);
@@ -1022,6 +1038,7 @@
   int32_t offset = VecAddress(locations, size, &base);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ StB(reg, base, offset);
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 6515dbe..a2ef1b1 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -37,6 +37,7 @@
       }
       FALLTHROUGH_INTENDED;
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -70,6 +71,7 @@
 
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ movd(dst, locations->InAt(0).AsRegister<Register>());
@@ -122,6 +124,7 @@
       locations->AddTemp(Location::RequiresFpuRegister());
       FALLTHROUGH_INTENDED;
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -145,6 +148,7 @@
   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:  // TODO: up to here, and?
@@ -180,6 +184,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -290,6 +295,7 @@
   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ pxor(dst, dst);
@@ -390,6 +396,7 @@
       __ pxor(dst, src);
       break;
     }
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -421,6 +428,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -448,6 +456,7 @@
   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ paddb(dst, src);
@@ -490,15 +499,13 @@
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
 
   DCHECK(instruction->IsRounded());
-  DCHECK(instruction->IsUnsigned());
 
   switch (instruction->GetPackedType()) {
-    case DataType::Type::kInt8:
+    case DataType::Type::kUint8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
      __ pavgb(dst, src);
      return;
     case DataType::Type::kUint16:
-    case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ pavgw(dst, src);
       return;
@@ -518,6 +525,7 @@
   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ psubb(dst, src);
@@ -616,22 +624,21 @@
   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ pminub(dst, src);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ pminub(dst, src);
-      } else {
-        __ pminsb(dst, src);
-      }
+      __ pminsb(dst, src);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ pminuw(dst, src);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ pminuw(dst, src);
-      } else {
-        __ pminsw(dst, src);
-      }
+      __ pminsw(dst, src);
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -668,22 +675,21 @@
   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ pmaxub(dst, src);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ pmaxub(dst, src);
-      } else {
-        __ pmaxsb(dst, src);
-      }
+      __ pmaxsb(dst, src);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ pmaxuw(dst, src);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ pmaxuw(dst, src);
-      } else {
-        __ pmaxsw(dst, src);
-      }
+      __ pmaxsw(dst, src);
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -721,6 +727,7 @@
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -755,6 +762,7 @@
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -789,6 +797,7 @@
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -823,6 +832,7 @@
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -963,6 +973,7 @@
       }
       FALLTHROUGH_INTENDED;
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -1000,6 +1011,7 @@
   // Set required elements.
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:  // TODO: up to here, and?
@@ -1036,6 +1048,7 @@
 static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -1077,6 +1090,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -1156,6 +1170,7 @@
       }
       FALLTHROUGH_INTENDED;
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kInt16:
     case DataType::Type::kInt32:
@@ -1190,6 +1205,7 @@
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 4241042..2270f6b 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -31,6 +31,7 @@
   bool is_zero = IsZeroBitPattern(input);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -65,6 +66,7 @@
 
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
@@ -109,6 +111,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -133,6 +136,7 @@
   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:  // TODO: up to here, and?
@@ -163,6 +167,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -273,6 +278,7 @@
   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ pxor(dst, dst);
@@ -373,6 +379,7 @@
       __ pxor(dst, src);
       break;
     }
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -404,6 +411,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -431,6 +439,7 @@
   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ paddb(dst, src);
@@ -473,15 +482,13 @@
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
 
   DCHECK(instruction->IsRounded());
-  DCHECK(instruction->IsUnsigned());
 
   switch (instruction->GetPackedType()) {
-    case DataType::Type::kInt8:
+    case DataType::Type::kUint8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
      __ pavgb(dst, src);
      return;
     case DataType::Type::kUint16:
-    case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ pavgw(dst, src);
       return;
@@ -501,6 +508,7 @@
   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
       __ psubb(dst, src);
@@ -599,22 +607,21 @@
   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ pminub(dst, src);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ pminub(dst, src);
-      } else {
-        __ pminsb(dst, src);
-      }
+      __ pminsb(dst, src);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ pminuw(dst, src);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ pminuw(dst, src);
-      } else {
-        __ pminsw(dst, src);
-      }
+      __ pminsw(dst, src);
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -651,22 +658,21 @@
   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ pmaxub(dst, src);
+      break;
     case DataType::Type::kInt8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ pmaxub(dst, src);
-      } else {
-        __ pmaxsb(dst, src);
-      }
+      __ pmaxsb(dst, src);
       break;
     case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ pmaxuw(dst, src);
+      break;
     case DataType::Type::kInt16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      if (instruction->IsUnsigned()) {
-        __ pmaxuw(dst, src);
-      } else {
-        __ pmaxsw(dst, src);
-      }
+      __ pmaxsw(dst, src);
       break;
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -704,6 +710,7 @@
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -738,6 +745,7 @@
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -772,6 +780,7 @@
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -806,6 +815,7 @@
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -940,6 +950,7 @@
 
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -978,6 +989,7 @@
   // Set required elements.
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:  // TODO: up to here, and?
@@ -1009,6 +1021,7 @@
 static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -1050,6 +1063,7 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -1129,6 +1143,7 @@
       }
       FALLTHROUGH_INTENDED;
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kInt16:
     case DataType::Type::kInt32:
@@ -1163,6 +1178,7 @@
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 70e270e..3515649 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1131,12 +1131,13 @@
 
 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
   switch (type) {
+    case DataType::Type::kReference:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
     case DataType::Type::kInt32:
-    case DataType::Type::kReference:
       return Location::RegisterLocation(EAX);
 
     case DataType::Type::kInt64:
@@ -1159,12 +1160,13 @@
 
 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
   switch (type) {
+    case DataType::Type::kReference:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
-    case DataType::Type::kInt32:
-    case DataType::Type::kReference: {
+    case DataType::Type::kInt32: {
       uint32_t index = gp_index_++;
       stack_index_++;
       if (index < calling_convention.GetNumberOfRegisters()) {
@@ -2099,12 +2101,13 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
   switch (ret->InputAt(0)->GetType()) {
+    case DataType::Type::kReference:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
     case DataType::Type::kInt32:
-    case DataType::Type::kReference:
       locations->SetInAt(0, Location::RegisterLocation(EAX));
       break;
 
@@ -2127,12 +2130,13 @@
 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
   if (kIsDebugBuild) {
     switch (ret->InputAt(0)->GetType()) {
+      case DataType::Type::kReference:
       case DataType::Type::kBool:
+      case DataType::Type::kUint8:
       case DataType::Type::kInt8:
       case DataType::Type::kUint16:
       case DataType::Type::kInt16:
       case DataType::Type::kInt32:
-      case DataType::Type::kReference:
         DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
         break;
 
@@ -2408,7 +2412,8 @@
 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
   DataType::Type result_type = conversion->GetResultType();
   DataType::Type input_type = conversion->GetInputType();
-  DCHECK_NE(result_type, input_type);
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
 
   // The float-to-long and double-to-long type conversions rely on a
   // call to the runtime.
@@ -2420,14 +2425,21 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
 
-  // The Java language does not allow treating boolean as an integral type but
-  // our bit representation makes it safe.
-
   switch (result_type) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       switch (input_type) {
+        case DataType::Type::kUint8:
+        case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
+        case DataType::Type::kInt16:
+        case DataType::Type::kInt32:
+          locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
+          // Make the output overlap to please the register allocator. This greatly simplifies
+          // the validation of the linear scan implementation
+          locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+          break;
         case DataType::Type::kInt64: {
-          // Type conversion from long to byte is a result of code transformations.
           HInstruction* input = conversion->InputAt(0);
           Location input_location = input->IsConstant()
               ? Location::ConstantLocation(input->AsConstant())
@@ -2438,17 +2450,6 @@
           locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
           break;
         }
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt16:
-        case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-byte' instruction.
-          locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
-          // Make the output overlap to please the register allocator. This greatly simplifies
-          // the validation of the linear scan implementation
-          locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-          break;
 
         default:
           LOG(FATAL) << "Unexpected type conversion from " << input_type
@@ -2456,43 +2457,27 @@
       }
       break;
 
+    case DataType::Type::kUint16:
     case DataType::Type::kInt16:
-      switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to short is a result of code transformations.
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-short' instruction.
-          locations->SetInAt(0, Location::Any());
-          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected type conversion from " << input_type
-                     << " to " << result_type;
-      }
+      DCHECK(DataType::IsIntegralType(input_type)) << input_type;
+      locations->SetInAt(0, Location::Any());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
     case DataType::Type::kInt32:
       switch (input_type) {
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-int' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
           break;
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-int' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
           locations->AddTemp(Location::RequiresFpuRegister());
           break;
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-int' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
           locations->AddTemp(Location::RequiresFpuRegister());
@@ -2507,19 +2492,17 @@
     case DataType::Type::kInt64:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-long' instruction.
           locations->SetInAt(0, Location::RegisterLocation(EAX));
           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
           break;
 
         case DataType::Type::kFloat32:
         case DataType::Type::kFloat64: {
-          // Processing a Dex `float-to-long' or 'double-to-long' instruction.
           InvokeRuntimeCallingConvention calling_convention;
           XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
           locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
@@ -2535,47 +2518,24 @@
       }
       break;
 
-    case DataType::Type::kUint16:
-      switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to char is a result of code transformations.
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt16:
-        case DataType::Type::kInt32:
-          // Processing a Dex `int-to-char' instruction.
-          locations->SetInAt(0, Location::Any());
-          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected type conversion from " << input_type
-                     << " to " << result_type;
-      }
-      break;
-
     case DataType::Type::kFloat32:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-float' instruction.
           locations->SetInAt(0, Location::RequiresRegister());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-float' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::Any());
           break;
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-float' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
@@ -2589,24 +2549,21 @@
     case DataType::Type::kFloat64:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-double' instruction.
           locations->SetInAt(0, Location::RequiresRegister());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-double' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::Any());
           break;
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-double' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
@@ -2629,12 +2586,54 @@
   Location in = locations->InAt(0);
   DataType::Type result_type = conversion->GetResultType();
   DataType::Type input_type = conversion->GetInputType();
-  DCHECK_NE(result_type, input_type);
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
   switch (result_type) {
+    case DataType::Type::kUint8:
+      switch (input_type) {
+        case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
+        case DataType::Type::kInt16:
+        case DataType::Type::kInt32:
+          if (in.IsRegister()) {
+            __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
+          } else {
+            DCHECK(in.GetConstant()->IsIntConstant());
+            int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
+          }
+          break;
+        case DataType::Type::kInt64:
+          if (in.IsRegisterPair()) {
+            __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
+          }
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case DataType::Type::kInt8:
       switch (input_type) {
+        case DataType::Type::kUint8:
+        case DataType::Type::kUint16:
+        case DataType::Type::kInt16:
+        case DataType::Type::kInt32:
+          if (in.IsRegister()) {
+            __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
+          } else {
+            DCHECK(in.GetConstant()->IsIntConstant());
+            int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
+          }
+          break;
         case DataType::Type::kInt64:
-          // Type conversion from long to byte is a result of code transformations.
           if (in.IsRegisterPair()) {
             __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
           } else {
@@ -2643,18 +2642,37 @@
             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
           }
           break;
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
+    case DataType::Type::kUint16:
+      switch (input_type) {
+        case DataType::Type::kInt8:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-byte' instruction.
           if (in.IsRegister()) {
-            __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
+            __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
+          } else if (in.IsStackSlot()) {
+            __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
           } else {
             DCHECK(in.GetConstant()->IsIntConstant());
             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
-            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
+          }
+          break;
+        case DataType::Type::kInt64:
+          if (in.IsRegisterPair()) {
+            __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
+          } else if (in.IsDoubleStackSlot()) {
+            __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
           }
           break;
 
@@ -2666,24 +2684,8 @@
 
     case DataType::Type::kInt16:
       switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to short is a result of code transformations.
-          if (in.IsRegisterPair()) {
-            __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
-          } else if (in.IsDoubleStackSlot()) {
-            __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
-          } else {
-            DCHECK(in.GetConstant()->IsLongConstant());
-            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
-            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
-          }
-          break;
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt32:
         case DataType::Type::kUint16:
-          // Processing a Dex `int-to-short' instruction.
+        case DataType::Type::kInt32:
           if (in.IsRegister()) {
             __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
           } else if (in.IsStackSlot()) {
@@ -2694,6 +2696,17 @@
             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
           }
           break;
+        case DataType::Type::kInt64:
+          if (in.IsRegisterPair()) {
+            __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
+          } else if (in.IsDoubleStackSlot()) {
+            __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
+          }
+          break;
 
         default:
           LOG(FATAL) << "Unexpected type conversion from " << input_type
@@ -2704,7 +2717,6 @@
     case DataType::Type::kInt32:
       switch (input_type) {
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-int' instruction.
           if (in.IsRegisterPair()) {
             __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
           } else if (in.IsDoubleStackSlot()) {
@@ -2718,7 +2730,6 @@
           break;
 
         case DataType::Type::kFloat32: {
-          // Processing a Dex `float-to-int' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           Register output = out.AsRegister<Register>();
           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
@@ -2743,7 +2754,6 @@
         }
 
         case DataType::Type::kFloat64: {
-          // Processing a Dex `double-to-int' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           Register output = out.AsRegister<Register>();
           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
@@ -2776,12 +2786,11 @@
     case DataType::Type::kInt64:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-long' instruction.
           DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
           DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
           DCHECK_EQ(in.AsRegister<Register>(), EAX);
@@ -2789,13 +2798,11 @@
           break;
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-long' instruction.
           codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
           CheckEntrypointTypes<kQuickF2l, int64_t, float>();
           break;
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-long' instruction.
           codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
           CheckEntrypointTypes<kQuickD2l, int64_t, double>();
           break;
@@ -2806,57 +2813,18 @@
       }
       break;
 
-    case DataType::Type::kUint16:
-      switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to short is a result of code transformations.
-          if (in.IsRegisterPair()) {
-            __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
-          } else if (in.IsDoubleStackSlot()) {
-            __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
-          } else {
-            DCHECK(in.GetConstant()->IsLongConstant());
-            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
-            __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
-          }
-          break;
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt16:
-        case DataType::Type::kInt32:
-          // Processing a Dex `Process a Dex `int-to-char'' instruction.
-          if (in.IsRegister()) {
-            __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
-          } else if (in.IsStackSlot()) {
-            __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
-          } else {
-            DCHECK(in.GetConstant()->IsIntConstant());
-            int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
-            __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
-          }
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected type conversion from " << input_type
-                     << " to " << result_type;
-      }
-      break;
-
     case DataType::Type::kFloat32:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-float' instruction.
           __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
           break;
 
         case DataType::Type::kInt64: {
-          // Processing a Dex `long-to-float' instruction.
           size_t adjustment = 0;
 
           // Create stack space for the call to
@@ -2886,7 +2854,6 @@
         }
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-float' instruction.
           __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
           break;
 
@@ -2899,17 +2866,15 @@
     case DataType::Type::kFloat64:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-double' instruction.
           __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
           break;
 
         case DataType::Type::kInt64: {
-          // Processing a Dex `long-to-double' instruction.
           size_t adjustment = 0;
 
           // Create stack space for the call to
@@ -2939,7 +2904,6 @@
         }
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-double' instruction.
           __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
           break;
 
@@ -3832,6 +3796,7 @@
   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   switch (instruction->GetType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -3860,6 +3825,7 @@
 
   switch (instruction->GetType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -4349,9 +4315,10 @@
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
     case DataType::Type::kInt64: {
       locations->SetInAt(0, Location::RequiresRegister());
@@ -4388,9 +4355,10 @@
 
   switch (compare->InputAt(0)->GetType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32: {
       codegen_->GenerateIntCompare(left, right);
       break;
@@ -4792,7 +4760,8 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (field_type) {
-    case DataType::Type::kBool: {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8: {
       __ movzxb(out.AsRegister<Register>(), Address(base, offset));
       break;
     }
@@ -4802,13 +4771,13 @@
       break;
     }
 
-    case DataType::Type::kInt16: {
-      __ movsxw(out.AsRegister<Register>(), Address(base, offset));
+    case DataType::Type::kUint16: {
+      __ movzxw(out.AsRegister<Register>(), Address(base, offset));
       break;
     }
 
-    case DataType::Type::kUint16: {
-      __ movzxw(out.AsRegister<Register>(), Address(base, offset));
+    case DataType::Type::kInt16: {
+      __ movsxw(out.AsRegister<Register>(), Address(base, offset));
       break;
     }
 
@@ -4897,8 +4866,7 @@
   locations->SetInAt(0, Location::RequiresRegister());
   bool is_volatile = field_info.IsVolatile();
   DataType::Type field_type = field_info.GetFieldType();
-  bool is_byte_type = (field_type == DataType::Type::kBool)
-    || (field_type == DataType::Type::kInt8);
+  bool is_byte_type = DataType::Size(field_type) == 1u;
 
   // The register allocator does not support multiple
   // inputs that die at entry with one in a specific register.
@@ -4957,13 +4925,14 @@
 
   switch (field_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8: {
       __ movb(Address(base, offset), value.AsRegister<ByteRegister>());
       break;
     }
 
-    case DataType::Type::kInt16:
-    case DataType::Type::kUint16: {
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16: {
       if (value.IsConstant()) {
         __ movw(Address(base, offset),
                 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
@@ -5242,7 +5211,8 @@
 
   DataType::Type type = instruction->GetType();
   switch (type) {
-    case DataType::Type::kBool: {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8: {
       Register out = out_loc.AsRegister<Register>();
       __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
       break;
@@ -5254,12 +5224,6 @@
       break;
     }
 
-    case DataType::Type::kInt16: {
-      Register out = out_loc.AsRegister<Register>();
-      __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
-      break;
-    }
-
     case DataType::Type::kUint16: {
       Register out = out_loc.AsRegister<Register>();
       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
@@ -5284,6 +5248,12 @@
       break;
     }
 
+    case DataType::Type::kInt16: {
+      Register out = out_loc.AsRegister<Register>();
+      __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
+      break;
+    }
+
     case DataType::Type::kInt32: {
       Register out = out_loc.AsRegister<Register>();
       __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
@@ -5368,8 +5338,7 @@
           LocationSummary::kCallOnSlowPath :
           LocationSummary::kNoCall);
 
-  bool is_byte_type = (value_type == DataType::Type::kBool)
-      || (value_type == DataType::Type::kInt8);
+  bool is_byte_type = DataType::Size(value_type) == 1u;
   // We need the inputs to be different than the output in case of long operation.
   // In case of a byte operation, the register allocator does not support multiple
   // inputs that die at entry with one in a specific register.
@@ -5407,6 +5376,7 @@
 
   switch (value_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8: {
       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
@@ -5419,8 +5389,8 @@
       break;
     }
 
-    case DataType::Type::kInt16:
-    case DataType::Type::kUint16: {
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16: {
       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
       if (value.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 42704e9..e8bfa66 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1521,6 +1521,7 @@
   DataType::Type type = condition->InputAt(0)->GetType();
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -2036,9 +2037,10 @@
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
   switch (compare->InputAt(0)->GetType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
     case DataType::Type::kInt64: {
       locations->SetInAt(0, Location::RequiresRegister());
@@ -2070,9 +2072,10 @@
 
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32: {
       codegen_->GenerateIntCompare(left, right);
       break;
@@ -2207,12 +2210,13 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
   switch (ret->InputAt(0)->GetType()) {
+    case DataType::Type::kReference:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
     case DataType::Type::kInt32:
-    case DataType::Type::kReference:
     case DataType::Type::kInt64:
       locations->SetInAt(0, Location::RegisterLocation(RAX));
       break;
@@ -2230,12 +2234,13 @@
 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
   if (kIsDebugBuild) {
     switch (ret->InputAt(0)->GetType()) {
+      case DataType::Type::kReference:
       case DataType::Type::kBool:
+      case DataType::Type::kUint8:
       case DataType::Type::kInt8:
       case DataType::Type::kUint16:
       case DataType::Type::kInt16:
       case DataType::Type::kInt32:
-      case DataType::Type::kReference:
       case DataType::Type::kInt64:
         DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
         break;
@@ -2255,12 +2260,13 @@
 
 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
   switch (type) {
+    case DataType::Type::kReference:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
     case DataType::Type::kInt32:
-    case DataType::Type::kReference:
     case DataType::Type::kInt64:
       return Location::RegisterLocation(RAX);
 
@@ -2281,12 +2287,13 @@
 
 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
   switch (type) {
+    case DataType::Type::kReference:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
-    case DataType::Type::kInt32:
-    case DataType::Type::kReference: {
+    case DataType::Type::kInt32: {
       uint32_t index = gp_index_++;
       stack_index_++;
       if (index < calling_convention.GetNumberOfRegisters()) {
@@ -2536,68 +2543,32 @@
       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   DataType::Type result_type = conversion->GetResultType();
   DataType::Type input_type = conversion->GetInputType();
-  DCHECK_NE(result_type, input_type);
-
-  // The Java language does not allow treating boolean as an integral type but
-  // our bit representation makes it safe.
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
 
   switch (result_type) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-      switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to byte is a result of code transformations.
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt16:
-        case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-byte' instruction.
-          locations->SetInAt(0, Location::Any());
-          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected type conversion from " << input_type
-                     << " to " << result_type;
-      }
-      break;
-
+    case DataType::Type::kUint16:
     case DataType::Type::kInt16:
-      switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to short is a result of code transformations.
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-short' instruction.
-          locations->SetInAt(0, Location::Any());
-          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected type conversion from " << input_type
-                     << " to " << result_type;
-      }
+      DCHECK(DataType::IsIntegralType(input_type)) << input_type;
+      locations->SetInAt(0, Location::Any());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
     case DataType::Type::kInt32:
       switch (input_type) {
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-int' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
           break;
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-int' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
           break;
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-int' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
           break;
@@ -2611,12 +2582,11 @@
     case DataType::Type::kInt64:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-long' instruction.
           // TODO: We would benefit from a (to-be-implemented)
           // Location::RegisterOrStackSlot requirement for this input.
           locations->SetInAt(0, Location::RequiresRegister());
@@ -2624,13 +2594,11 @@
           break;
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-long' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
           break;
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-long' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
           break;
@@ -2641,47 +2609,24 @@
       }
       break;
 
-    case DataType::Type::kUint16:
-      switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to char is a result of code transformations.
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt16:
-        case DataType::Type::kInt32:
-          // Processing a Dex `int-to-char' instruction.
-          locations->SetInAt(0, Location::Any());
-          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected type conversion from " << input_type
-                     << " to " << result_type;
-      }
-      break;
-
     case DataType::Type::kFloat32:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-float' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-float' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-float' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
@@ -2695,24 +2640,21 @@
     case DataType::Type::kFloat64:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-double' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-double' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-double' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
           break;
@@ -2735,18 +2677,40 @@
   Location in = locations->InAt(0);
   DataType::Type result_type = conversion->GetResultType();
   DataType::Type input_type = conversion->GetInputType();
-  DCHECK_NE(result_type, input_type);
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
   switch (result_type) {
-    case DataType::Type::kInt8:
+    case DataType::Type::kUint8:
       switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to byte is a result of code transformations.
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
+        case DataType::Type::kInt64:
+          if (in.IsRegister()) {
+            __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
+          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
+            __ movzxb(out.AsRegister<CpuRegister>(),
+                      Address(CpuRegister(RSP), in.GetStackIndex()));
+          } else {
+            __ movl(out.AsRegister<CpuRegister>(),
+                    Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
+          }
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
+    case DataType::Type::kInt8:
+      switch (input_type) {
+        case DataType::Type::kUint8:
         case DataType::Type::kUint16:
-          // Processing a Dex `int-to-byte' instruction.
+        case DataType::Type::kInt16:
+        case DataType::Type::kInt32:
+        case DataType::Type::kInt64:
           if (in.IsRegister()) {
             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
@@ -2764,16 +2728,34 @@
       }
       break;
 
+    case DataType::Type::kUint16:
+      switch (input_type) {
+        case DataType::Type::kInt8:
+        case DataType::Type::kInt16:
+        case DataType::Type::kInt32:
+        case DataType::Type::kInt64:
+          if (in.IsRegister()) {
+            __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
+          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
+            __ movzxw(out.AsRegister<CpuRegister>(),
+                      Address(CpuRegister(RSP), in.GetStackIndex()));
+          } else {
+            __ movl(out.AsRegister<CpuRegister>(),
+                    Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
+          }
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case DataType::Type::kInt16:
       switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to short is a result of code transformations.
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt32:
         case DataType::Type::kUint16:
-          // Processing a Dex `int-to-short' instruction.
+        case DataType::Type::kInt32:
+        case DataType::Type::kInt64:
           if (in.IsRegister()) {
             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
@@ -2794,7 +2776,6 @@
     case DataType::Type::kInt32:
       switch (input_type) {
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-int' instruction.
           if (in.IsRegister()) {
             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
           } else if (in.IsDoubleStackSlot()) {
@@ -2809,7 +2790,6 @@
           break;
 
         case DataType::Type::kFloat32: {
-          // Processing a Dex `float-to-int' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
           NearLabel done, nan;
@@ -2831,7 +2811,6 @@
         }
 
         case DataType::Type::kFloat64: {
-          // Processing a Dex `double-to-int' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
           NearLabel done, nan;
@@ -2862,18 +2841,16 @@
       switch (input_type) {
         DCHECK(out.IsRegister());
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-long' instruction.
           DCHECK(in.IsRegister());
           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
           break;
 
         case DataType::Type::kFloat32: {
-          // Processing a Dex `float-to-long' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
           NearLabel done, nan;
@@ -2895,7 +2872,6 @@
         }
 
         case DataType::Type::kFloat64: {
-          // Processing a Dex `double-to-long' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
           NearLabel done, nan;
@@ -2922,42 +2898,14 @@
       }
       break;
 
-    case DataType::Type::kUint16:
-      switch (input_type) {
-        case DataType::Type::kInt64:
-          // Type conversion from long to char is a result of code transformations.
-        case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
-        case DataType::Type::kInt8:
-        case DataType::Type::kInt16:
-        case DataType::Type::kInt32:
-          // Processing a Dex `int-to-char' instruction.
-          if (in.IsRegister()) {
-            __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
-          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
-            __ movzxw(out.AsRegister<CpuRegister>(),
-                      Address(CpuRegister(RSP), in.GetStackIndex()));
-          } else {
-            __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
-          }
-          break;
-
-        default:
-          LOG(FATAL) << "Unexpected type conversion from " << input_type
-                     << " to " << result_type;
-      }
-      break;
-
     case DataType::Type::kFloat32:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-float' instruction.
           if (in.IsRegister()) {
             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
           } else if (in.IsConstant()) {
@@ -2971,7 +2919,6 @@
           break;
 
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-float' instruction.
           if (in.IsRegister()) {
             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
           } else if (in.IsConstant()) {
@@ -2985,7 +2932,6 @@
           break;
 
         case DataType::Type::kFloat64:
-          // Processing a Dex `double-to-float' instruction.
           if (in.IsFpuRegister()) {
             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
           } else if (in.IsConstant()) {
@@ -3007,12 +2953,11 @@
     case DataType::Type::kFloat64:
       switch (input_type) {
         case DataType::Type::kBool:
-          // Boolean input is a result of code transformations.
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
+        case DataType::Type::kUint16:
         case DataType::Type::kInt16:
         case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-          // Processing a Dex `int-to-double' instruction.
           if (in.IsRegister()) {
             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
           } else if (in.IsConstant()) {
@@ -3026,7 +2971,6 @@
           break;
 
         case DataType::Type::kInt64:
-          // Processing a Dex `long-to-double' instruction.
           if (in.IsRegister()) {
             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
           } else if (in.IsConstant()) {
@@ -3040,7 +2984,6 @@
           break;
 
         case DataType::Type::kFloat32:
-          // Processing a Dex `float-to-double' instruction.
           if (in.IsFpuRegister()) {
             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
           } else if (in.IsConstant()) {
@@ -3883,6 +3826,7 @@
 
   switch (instruction->GetType()) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -4290,7 +4234,8 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (field_type) {
-    case DataType::Type::kBool: {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8: {
       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
       break;
     }
@@ -4300,13 +4245,13 @@
       break;
     }
 
-    case DataType::Type::kInt16: {
-      __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
+    case DataType::Type::kUint16: {
+      __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
       break;
     }
 
-    case DataType::Type::kUint16: {
-      __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
+    case DataType::Type::kInt16: {
+      __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
       break;
     }
 
@@ -4433,6 +4378,7 @@
 
   switch (field_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8: {
       if (value.IsConstant()) {
         __ movb(Address(base, offset),
@@ -4443,8 +4389,8 @@
       break;
     }
 
-    case DataType::Type::kInt16:
-    case DataType::Type::kUint16: {
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16: {
       if (value.IsConstant()) {
         __ movw(Address(base, offset),
                 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
@@ -4714,7 +4660,8 @@
 
   DataType::Type type = instruction->GetType();
   switch (type) {
-    case DataType::Type::kBool: {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8: {
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
       break;
@@ -4726,12 +4673,6 @@
       break;
     }
 
-    case DataType::Type::kInt16: {
-      CpuRegister out = out_loc.AsRegister<CpuRegister>();
-      __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
-      break;
-    }
-
     case DataType::Type::kUint16: {
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
@@ -4754,6 +4695,12 @@
       break;
     }
 
+    case DataType::Type::kInt16: {
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
+      __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
+      break;
+    }
+
     case DataType::Type::kInt32: {
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
@@ -4865,6 +4812,7 @@
 
   switch (value_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8: {
       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
@@ -4877,8 +4825,8 @@
       break;
     }
 
-    case DataType::Type::kInt16:
-    case DataType::Type::kUint16: {
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16: {
       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
       if (value.IsRegister()) {
diff --git a/compiler/optimizing/data_type-inl.h b/compiler/optimizing/data_type-inl.h
index fbc0c12..e389bad 100644
--- a/compiler/optimizing/data_type-inl.h
+++ b/compiler/optimizing/data_type-inl.h
@@ -46,17 +46,19 @@
 
 constexpr char DataType::TypeId(DataType::Type type) {
   // Type id for visualizer.
+  // Types corresponding to Java types are given a lower-case version of their shorty character.
   switch (type) {
-    case DataType::Type::kBool: return 'z';
-    case DataType::Type::kInt8: return 'b';
-    case DataType::Type::kUint16: return 'c';
-    case DataType::Type::kInt16: return 's';
-    case DataType::Type::kInt32: return 'i';
-    case DataType::Type::kInt64: return 'j';
-    case DataType::Type::kFloat32: return 'f';
-    case DataType::Type::kFloat64: return 'd';
-    case DataType::Type::kReference: return 'l';
-    case DataType::Type::kVoid: return 'v';
+    case DataType::Type::kBool: return 'z';       // Java boolean (Z).
+    case DataType::Type::kUint8: return 'a';      // The character before Java byte's 'b'.
+    case DataType::Type::kInt8: return 'b';       // Java byte (B).
+    case DataType::Type::kUint16: return 'c';     // Java char (C).
+    case DataType::Type::kInt16: return 's';      // Java short (S).
+    case DataType::Type::kInt32: return 'i';      // Java int (I).
+    case DataType::Type::kInt64: return 'j';      // Java long (J).
+    case DataType::Type::kFloat32: return 'f';    // Java float (F).
+    case DataType::Type::kFloat64: return 'd';    // Java double (D).
+    case DataType::Type::kReference: return 'l';  // Java reference (L).
+    case DataType::Type::kVoid: return 'v';       // Java void (V).
   }
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
diff --git a/compiler/optimizing/data_type.cc b/compiler/optimizing/data_type.cc
index 6890617..3c99a76 100644
--- a/compiler/optimizing/data_type.cc
+++ b/compiler/optimizing/data_type.cc
@@ -21,6 +21,7 @@
 static const char* kTypeNames[] = {
     "Reference",
     "Bool",
+    "Uint8",
     "Int8",
     "Uint16",
     "Int16",
diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h
index 08f9263..5a023ad 100644
--- a/compiler/optimizing/data_type.h
+++ b/compiler/optimizing/data_type.h
@@ -29,6 +29,7 @@
   enum class Type : uint8_t {
     kReference = 0,
     kBool,
+    kUint8,
     kInt8,
     kUint16,
     kInt16,
@@ -47,6 +48,7 @@
     switch (type) {
       case Type::kVoid:
       case Type::kBool:
+      case Type::kUint8:
       case Type::kInt8:
         return 0;
       case Type::kUint16:
@@ -71,6 +73,7 @@
       case Type::kVoid:
         return 0;
       case Type::kBool:
+      case Type::kUint8:
       case Type::kInt8:
         return 1;
       case Type::kUint16:
@@ -99,6 +102,7 @@
     // our bit representation makes it safe.
     switch (type) {
       case Type::kBool:
+      case Type::kUint8:
       case Type::kInt8:
       case Type::kUint16:
       case Type::kInt16:
@@ -118,10 +122,27 @@
     return type == Type::kInt64 || type == Type::kFloat64;
   }
 
+  static bool IsUnsignedType(Type type) {
+    return type == Type::kUint8 || type == Type::kUint16;
+  }
+
+  static Type ToSignedType(Type type) {
+    switch (type) {
+      case Type::kUint8:
+        return Type::kInt8;
+      case Type::kUint16:
+        return Type::kInt16;
+      default:
+        DCHECK(type != Type::kVoid && type != Type::kReference);
+        return type;
+    }
+  }
+
   // Return the general kind of `type`, fusing integer-like types as Type::kInt.
   static Type Kind(Type type) {
     switch (type) {
       case Type::kBool:
+      case Type::kUint8:
       case Type::kInt8:
       case Type::kInt16:
       case Type::kUint16:
@@ -136,6 +157,8 @@
     switch (type) {
       case Type::kBool:
         return std::numeric_limits<bool>::min();
+      case Type::kUint8:
+        return std::numeric_limits<uint8_t>::min();
       case Type::kInt8:
         return std::numeric_limits<int8_t>::min();
       case Type::kUint16:
@@ -156,6 +179,8 @@
     switch (type) {
       case Type::kBool:
         return std::numeric_limits<bool>::max();
+      case Type::kUint8:
+        return std::numeric_limits<uint8_t>::max();
       case Type::kInt8:
         return std::numeric_limits<int8_t>::max();
       case Type::kUint16:
@@ -172,6 +197,8 @@
     return 0;
   }
 
+  static bool IsTypeConversionImplicit(Type input_type, Type result_type);
+
   static const char* PrettyDescriptor(Type type);
 
  private:
@@ -179,6 +206,25 @@
 };
 std::ostream& operator<<(std::ostream& os, DataType::Type data_type);
 
+// Defined outside DataType to have the operator<< available for DCHECK_NE().
+inline bool DataType::IsTypeConversionImplicit(Type input_type, Type result_type) {
+  DCHECK_NE(DataType::Type::kVoid, result_type);
+  DCHECK_NE(DataType::Type::kVoid, input_type);
+
+  // Invariant: We should never generate a conversion to a Boolean value.
+  DCHECK_NE(DataType::Type::kBool, result_type);
+
+  // Besides conversion to the same type, integral conversions to non-Int64 types
+  // are implicit if the result value range covers the input value range, i.e.
+  // widening conversions that do not need to trim the sign bits.
+  return result_type == input_type ||
+         (result_type != Type::kInt64 &&
+          IsIntegralType(input_type) &&
+          IsIntegralType(result_type) &&
+          MinValueOfIntegralType(input_type) >= MinValueOfIntegralType(result_type) &&
+          MaxValueOfIntegralType(input_type) <= MaxValueOfIntegralType(result_type));
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_DATA_TYPE_H_
diff --git a/compiler/optimizing/data_type_test.cc b/compiler/optimizing/data_type_test.cc
index 927291a..3ce683a 100644
--- a/compiler/optimizing/data_type_test.cc
+++ b/compiler/optimizing/data_type_test.cc
@@ -18,6 +18,8 @@
 
 #include "data_type-inl.h"
 
+#include "base/array_ref.h"
+#include "base/macros.h"
 #include "primitive.h"
 
 namespace art {
@@ -57,4 +59,58 @@
 #undef CHECK_NAME
 }
 
+TEST(DataType, IsTypeConversionImplicit) {
+  static const DataType::Type kIntegralTypes[] = {
+      DataType::Type::kBool,
+      DataType::Type::kUint8,
+      DataType::Type::kInt8,
+      DataType::Type::kUint16,
+      DataType::Type::kInt16,
+      DataType::Type::kInt32,
+      DataType::Type::kInt64,
+  };
+  const ArrayRef<const DataType::Type> kIntegralInputTypes(kIntegralTypes);
+  // Note: kBool cannot be used as a result type.
+  DCHECK_EQ(kIntegralTypes[0], DataType::Type::kBool);
+  const ArrayRef<const DataType::Type> kIntegralResultTypes = kIntegralInputTypes.SubArray(1u);
+
+  static const bool kImplicitIntegralConversions[][arraysize(kIntegralTypes)] = {
+      //             Bool   Uint8   Int8 Uint16 Int16  Int32  Int64
+      { /*   Bool    N/A */  true,  true,  true,  true,  true, false },
+      { /*  Uint8    N/A */  true, false,  true,  true,  true, false },
+      { /*   Int8    N/A */ false,  true, false,  true,  true, false },
+      { /* Uint16    N/A */ false, false,  true, false,  true, false },
+      { /*  Int16    N/A */ false, false, false,  true,  true, false },
+      { /*  Int32    N/A */ false, false, false, false,  true, false },
+      { /*  Int64    N/A */ false, false, false, false, false,  true },
+  };
+  static_assert(arraysize(kIntegralTypes) == arraysize(kImplicitIntegralConversions), "size check");
+
+  for (size_t input_index = 0; input_index != kIntegralInputTypes.size(); ++input_index) {
+    DataType::Type input_type = kIntegralInputTypes[input_index];
+    for (size_t result_index = 1u; result_index != kIntegralResultTypes.size(); ++result_index) {
+      DataType::Type result_type = kIntegralResultTypes[result_index];
+      EXPECT_EQ(kImplicitIntegralConversions[input_index][result_index],
+                DataType::IsTypeConversionImplicit(input_type, result_type))
+          << input_type << " " << result_type;
+    }
+  }
+  for (DataType::Type input_type : kIntegralInputTypes) {
+    EXPECT_FALSE(DataType::IsTypeConversionImplicit(input_type, DataType::Type::kFloat32));
+    EXPECT_FALSE(DataType::IsTypeConversionImplicit(input_type, DataType::Type::kFloat64));
+  }
+  for (DataType::Type result_type : kIntegralResultTypes) {
+    EXPECT_FALSE(DataType::IsTypeConversionImplicit(DataType::Type::kFloat32, result_type));
+    EXPECT_FALSE(DataType::IsTypeConversionImplicit(DataType::Type::kFloat64, result_type));
+  }
+  EXPECT_TRUE(
+      DataType::IsTypeConversionImplicit(DataType::Type::kFloat32, DataType::Type::kFloat32));
+  EXPECT_FALSE(
+      DataType::IsTypeConversionImplicit(DataType::Type::kFloat32, DataType::Type::kFloat64));
+  EXPECT_FALSE(
+      DataType::IsTypeConversionImplicit(DataType::Type::kFloat64, DataType::Type::kFloat32));
+  EXPECT_TRUE(
+      DataType::IsTypeConversionImplicit(DataType::Type::kFloat64, DataType::Type::kFloat64));
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 194f063..eccdccf 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -521,20 +521,28 @@
     StartAttributeStream("kind") << deoptimize->GetKind();
   }
 
+  void VisitVecOperation(HVecOperation* vec_operation) OVERRIDE {
+    StartAttributeStream("packed_type") << vec_operation->GetPackedType();
+  }
+
   void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE {
+    VisitVecBinaryOperation(hadd);
     StartAttributeStream("unsigned") << std::boolalpha << hadd->IsUnsigned() << std::noboolalpha;
     StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
   }
 
   void VisitVecMin(HVecMin* min) OVERRIDE {
+    VisitVecBinaryOperation(min);
     StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha;
   }
 
   void VisitVecMax(HVecMax* max) OVERRIDE {
+    VisitVecBinaryOperation(max);
     StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha;
   }
 
   void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE {
+    VisitVecOperation(instruction);
     StartAttributeStream("kind") << instruction->GetOpKind();
   }
 
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index fe286ab..eab17aa 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -59,14 +59,19 @@
 static bool IsNarrowingIntegralConversion(DataType::Type from, DataType::Type to) {
   switch (from) {
     case DataType::Type::kInt64:
-      return to == DataType::Type::kInt8 || to == DataType::Type::kInt16
-          || to == DataType::Type::kUint16 || to == DataType::Type::kInt32;
+      return to == DataType::Type::kUint8 ||
+             to == DataType::Type::kInt8 ||
+             to == DataType::Type::kUint16 ||
+             to == DataType::Type::kInt16 ||
+             to == DataType::Type::kInt32;
     case DataType::Type::kInt32:
-      return to == DataType::Type::kInt8 || to == DataType::Type::kInt16
-          || to == DataType::Type::kUint16;
+      return to == DataType::Type::kUint8 ||
+             to == DataType::Type::kInt8 ||
+             to == DataType::Type::kUint16 ||
+             to == DataType::Type::kInt16;
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
-      return to == DataType::Type::kInt8;
+      return to == DataType::Type::kUint8 || to == DataType::Type::kInt8;
     default:
       return false;
   }
@@ -77,10 +82,11 @@
  */
 static DataType::Type ImplicitConversion(DataType::Type type) {
   switch (type) {
-    case DataType::Type::kInt16:
-    case DataType::Type::kUint16:
-    case DataType::Type::kInt8:
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
       return DataType::Type::kInt32;
     default:
       return type;
@@ -1142,9 +1148,10 @@
 bool HInductionVarAnalysis::IsNarrowingLinear(InductionInfo* info) {
   return info != nullptr &&
       info->induction_class == kLinear &&
-      (info->type == DataType::Type::kInt8 ||
-       info->type == DataType::Type::kInt16 ||
+      (info->type == DataType::Type::kUint8 ||
+       info->type == DataType::Type::kInt8 ||
        info->type == DataType::Type::kUint16 ||
+       info->type == DataType::Type::kInt16 ||
        (info->type == DataType::Type::kInt32 && (info->op_a->type == DataType::Type::kInt64 ||
                                                  info->op_b->type == DataType::Type::kInt64)));
 }
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 92b584c..ab6fbae 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -159,9 +159,10 @@
 /** Corrects a value for type to account for arithmetic wrap-around in lower precision. */
 static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, DataType::Type type) {
   switch (type) {
-    case DataType::Type::kInt16:
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
     case DataType::Type::kUint16:
-    case DataType::Type::kInt8: {
+    case DataType::Type::kInt16: {
       // Constants within range only.
       // TODO: maybe some room for improvement, like allowing widening conversions
       int32_t min = DataType::MinValueOfIntegralType(type);
@@ -216,10 +217,11 @@
   // bounds check elimination, will have truncated higher precision induction
   // at their use point already).
   switch (info->type) {
-    case DataType::Type::kInt32:
-    case DataType::Type::kInt16:
-    case DataType::Type::kUint16:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
+    case DataType::Type::kInt32:
       break;
     default:
       return false;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 1a2494a..36ff2a9 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -251,7 +251,8 @@
   InstructionSet isa = codegen_->GetInstructionSet();
   switch (isa) {
     case kArm64:
-      if (!(type == DataType::Type::kInt8 ||
+      if (!(type == DataType::Type::kUint8 ||
+            type == DataType::Type::kInt8 ||
             type == DataType::Type::kUint16 ||
             type == DataType::Type::kInt16 ||
             type == DataType::Type::kInt32)) {
@@ -260,7 +261,8 @@
       break;
     case kMips:
     case kMips64:
-      if (!(type == DataType::Type::kInt8 ||
+      if (!(type == DataType::Type::kUint8 ||
+            type == DataType::Type::kInt8 ||
             type == DataType::Type::kUint16 ||
             type == DataType::Type::kInt16 ||
             type == DataType::Type::kInt32 ||
@@ -360,18 +362,36 @@
   }
 
   // Shift operations implicitly mask the shift amount according to the type width. Get rid of
-  // unnecessary explicit masking operations on the shift amount.
+  // unnecessary And/Or/Xor/Add/Sub/TypeConversion operations on the shift amount that do not
+  // affect the relevant bits.
   // Replace code looking like
-  //    AND masked_shift, shift, <superset of implicit mask>
-  //    SHL dst, value, masked_shift
+  //    AND adjusted_shift, shift, <superset of implicit mask>
+  //    [OR/XOR/ADD/SUB adjusted_shift, shift, <value not overlapping with implicit mask>]
+  //    [<conversion-from-integral-non-64-bit-type> adjusted_shift, shift]
+  //    SHL dst, value, adjusted_shift
   // with
   //    SHL dst, value, shift
-  if (shift_amount->IsAnd()) {
-    HAnd* and_insn = shift_amount->AsAnd();
-    HConstant* mask = and_insn->GetConstantRight();
-    if ((mask != nullptr) && ((Int64FromConstant(mask) & implicit_mask) == implicit_mask)) {
-      instruction->ReplaceInput(and_insn->GetLeastConstantLeft(), 1);
+  if (shift_amount->IsAnd() ||
+      shift_amount->IsOr() ||
+      shift_amount->IsXor() ||
+      shift_amount->IsAdd() ||
+      shift_amount->IsSub()) {
+    int64_t required_result = shift_amount->IsAnd() ? implicit_mask : 0;
+    HBinaryOperation* bin_op = shift_amount->AsBinaryOperation();
+    HConstant* mask = bin_op->GetConstantRight();
+    if (mask != nullptr && (Int64FromConstant(mask) & implicit_mask) == required_result) {
+      instruction->ReplaceInput(bin_op->GetLeastConstantLeft(), 1);
       RecordSimplification();
+      return;
+    }
+  } else if (shift_amount->IsTypeConversion()) {
+    DCHECK_NE(shift_amount->GetType(), DataType::Type::kBool);  // We never convert to bool.
+    DataType::Type source_type = shift_amount->InputAt(0)->GetType();
+    // Non-integral and 64-bit source types require an explicit type conversion.
+    if (DataType::IsIntegralType(source_type) && !DataType::Is64BitType(source_type)) {
+      instruction->ReplaceInput(shift_amount->AsTypeConversion()->GetInput(), 1);
+      RecordSimplification();
+      return;
     }
   }
 }
@@ -858,10 +878,11 @@
   }
   DataType::Type type1 = a->GetType();
   DataType::Type type2 = b->GetType();
-  return (type1 == DataType::Type::kInt8  && type2 == DataType::Type::kInt8) ||
-         (type1 == DataType::Type::kInt16 && type2 == DataType::Type::kInt16) ||
-         (type1 == DataType::Type::kUint16  && type2 == DataType::Type::kUint16) ||
-         (type1 == DataType::Type::kInt32   && type2 == DataType::Type::kInt32 &&
+  return (type1 == DataType::Type::kUint8  && type2 == DataType::Type::kUint8) ||
+         (type1 == DataType::Type::kInt8   && type2 == DataType::Type::kInt8) ||
+         (type1 == DataType::Type::kInt16  && type2 == DataType::Type::kInt16) ||
+         (type1 == DataType::Type::kUint16 && type2 == DataType::Type::kUint16) ||
+         (type1 == DataType::Type::kInt32  && type2 == DataType::Type::kInt32 &&
           to_type == DataType::Type::kInt64);
 }
 
@@ -1018,30 +1039,13 @@
   }
 }
 
-static bool IsTypeConversionImplicit(DataType::Type input_type, DataType::Type result_type) {
-  // Invariant: We should never generate a conversion to a Boolean value.
-  DCHECK_NE(DataType::Type::kBool, result_type);
-
-  // Besides conversion to the same type, widening integral conversions are implicit,
-  // excluding conversions to long and the byte->char conversion where we need to
-  // clear the high 16 bits of the 32-bit sign-extended representation of byte.
-  return result_type == input_type ||
-      (result_type == DataType::Type::kInt32 && (input_type == DataType::Type::kBool ||
-                                                 input_type == DataType::Type::kInt8 ||
-                                                 input_type == DataType::Type::kInt16 ||
-                                                 input_type == DataType::Type::kUint16)) ||
-      (result_type == DataType::Type::kUint16 && input_type == DataType::Type::kBool) ||
-      (result_type == DataType::Type::kInt16 && (input_type == DataType::Type::kBool ||
-                                                 input_type == DataType::Type::kInt8)) ||
-      (result_type == DataType::Type::kInt8 && input_type == DataType::Type::kBool);
-}
-
 static bool IsTypeConversionLossless(DataType::Type input_type, DataType::Type result_type) {
   // The conversion to a larger type is loss-less with the exception of two cases,
-  //   - conversion to Uint16, the only unsigned type, where we may lose some bits, and
+  //   - conversion to the unsigned type Uint16, where we may lose some bits, and
   //   - conversion from float to long, the only FP to integral conversion with smaller FP type.
   // For integral to FP conversions this holds because the FP mantissa is large enough.
-  DCHECK_NE(input_type, result_type);
+  // Note: The size check excludes Uint8 as the result type.
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type));
   return DataType::Size(result_type) > DataType::Size(input_type) &&
       result_type != DataType::Type::kUint16 &&
       !(result_type == DataType::Type::kInt64 && input_type == DataType::Type::kFloat32);
@@ -1051,7 +1055,7 @@
   HInstruction* input = instruction->GetInput();
   DataType::Type input_type = input->GetType();
   DataType::Type result_type = instruction->GetResultType();
-  if (IsTypeConversionImplicit(input_type, result_type)) {
+  if (DataType::IsTypeConversionImplicit(input_type, result_type)) {
     // Remove the implicit conversion; this includes conversion to the same type.
     instruction->ReplaceWith(input);
     instruction->GetBlock()->RemoveInstruction(instruction);
@@ -1080,7 +1084,7 @@
 
     if (is_first_conversion_lossless || integral_conversions_with_non_widening_second) {
       // If the merged conversion is implicit, do the simplification unconditionally.
-      if (IsTypeConversionImplicit(original_type, result_type)) {
+      if (DataType::IsTypeConversionImplicit(original_type, result_type)) {
         instruction->ReplaceWith(original_input);
         instruction->GetBlock()->RemoveInstruction(instruction);
         if (!input_conversion->HasUses()) {
@@ -1109,7 +1113,7 @@
       if (trailing_ones >= kBitsPerByte * DataType::Size(result_type)) {
         // The `HAnd` is useless, for example in `(byte) (x & 0xff)`, get rid of it.
         HInstruction* original_input = input_and->GetLeastConstantLeft();
-        if (IsTypeConversionImplicit(original_input->GetType(), result_type)) {
+        if (DataType::IsTypeConversionImplicit(original_input->GetType(), result_type)) {
           instruction->ReplaceWith(original_input);
           instruction->GetBlock()->RemoveInstruction(instruction);
           RecordSimplification();
@@ -1226,6 +1230,37 @@
       RecordSimplification();
       return;
     }
+    if (input_other->IsTypeConversion() &&
+        input_other->GetType() == DataType::Type::kInt64 &&
+        DataType::IsIntegralType(input_other->InputAt(0)->GetType()) &&
+        IsInt<32>(value) &&
+        input_other->HasOnlyOneNonEnvironmentUse()) {
+      // The AND can be reordered before the TypeConversion. Replace
+      //   LongConstant cst, <32-bit-constant-sign-extended-to-64-bits>
+      //   TypeConversion<Int64> tmp, src
+      //   AND dst, tmp, cst
+      // with
+      //   IntConstant cst, <32-bit-constant>
+      //   AND tmp, src, cst
+      //   TypeConversion<Int64> dst, tmp
+      // This helps 32-bit targets and does not hurt 64-bit targets.
+      // This also simplifies detection of other patterns, such as Uint8 loads.
+      HInstruction* new_and_input = input_other->InputAt(0);
+      // Implicit conversion Int64->Int64 would have been removed previously.
+      DCHECK_NE(new_and_input->GetType(), DataType::Type::kInt64);
+      HConstant* new_const = GetGraph()->GetConstant(DataType::Type::kInt32, value);
+      HAnd* new_and =
+          new (GetGraph()->GetArena()) HAnd(DataType::Type::kInt32, new_and_input, new_const);
+      instruction->GetBlock()->InsertInstructionBefore(new_and, instruction);
+      HTypeConversion* new_conversion =
+          new (GetGraph()->GetArena()) HTypeConversion(DataType::Type::kInt64, new_and);
+      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, new_conversion);
+      input_other->GetBlock()->RemoveInstruction(input_other);
+      RecordSimplification();
+      // Try to process the new And now, do not wait for the next round of simplifications.
+      instruction = new_and;
+      input_other = new_and_input;
+    }
     // Eliminate And from UShr+And if the And-mask contains all the bits that
     // can be non-zero after UShr. Transform Shr+And to UShr if the And-mask
     // precisely clears the shifted-in sign bits.
@@ -2149,8 +2184,12 @@
   HBoundsCheck* bounds_check = new (arena) HBoundsCheck(
       index, length, dex_pc, invoke->GetDexMethodIndex());
   invoke->GetBlock()->InsertInstructionBefore(bounds_check, invoke);
-  HArrayGet* array_get = new (arena) HArrayGet(
-      str, bounds_check, DataType::Type::kUint16, dex_pc, /* is_string_char_at */ true);
+  HArrayGet* array_get = new (arena) HArrayGet(str,
+                                               bounds_check,
+                                               DataType::Type::kUint16,
+                                               SideEffects::None(),  // Strings are immutable.
+                                               dex_pc,
+                                               /* is_string_char_at */ true);
   invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, array_get);
   bounds_check->CopyEnvironmentFrom(invoke->GetEnvironment());
   GetGraph()->SetHasBoundsChecks(true);
@@ -2312,6 +2351,21 @@
     case Intrinsics::kUnsafeFullFence:
       SimplifyMemBarrier(instruction, MemBarrierKind::kAnyAny);
       break;
+    case Intrinsics::kVarHandleFullFence:
+      SimplifyMemBarrier(instruction, MemBarrierKind::kAnyAny);
+      break;
+    case Intrinsics::kVarHandleAcquireFence:
+      SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny);
+      break;
+    case Intrinsics::kVarHandleReleaseFence:
+      SimplifyMemBarrier(instruction, MemBarrierKind::kAnyStore);
+      break;
+    case Intrinsics::kVarHandleLoadLoadFence:
+      SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny);
+      break;
+    case Intrinsics::kVarHandleStoreStoreFence:
+      SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore);
+      break;
     default:
       break;
   }
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index efd7cb4..7439893 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -30,6 +30,57 @@
 
 namespace arm {
 
+class InstructionSimplifierArmVisitor : public HGraphVisitor {
+ public:
+  InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph), stats_(stats) {}
+
+ private:
+  void RecordSimplification() {
+    if (stats_ != nullptr) {
+      stats_->RecordStat(kInstructionSimplificationsArch);
+    }
+  }
+
+  bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
+  bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge);
+  bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false);
+  }
+  bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
+    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true);
+  }
+
+  /**
+   * This simplifier uses a special-purpose BB visitor.
+   * (1) No need to visit Phi nodes.
+   * (2) Since statements can be removed in a "forward" fashion,
+   *     the visitor should test if each statement is still there.
+   */
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+    // TODO: fragile iteration, provide more robust iterators?
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (instruction->IsInBlock()) {
+        instruction->Accept(this);
+      }
+    }
+  }
+
+  void VisitAnd(HAnd* instruction) OVERRIDE;
+  void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
+  void VisitArraySet(HArraySet* instruction) OVERRIDE;
+  void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitOr(HOr* instruction) OVERRIDE;
+  void VisitShl(HShl* instruction) OVERRIDE;
+  void VisitShr(HShr* instruction) OVERRIDE;
+  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+  void VisitUShr(HUShr* instruction) OVERRIDE;
+
+  OptimizingCompilerStats* stats_;
+};
+
 bool InstructionSimplifierArmVisitor::TryMergeIntoShifterOperand(HInstruction* use,
                                                                  HInstruction* bitfield_op,
                                                                  bool do_merge) {
@@ -234,5 +285,10 @@
   }
 }
 
+void InstructionSimplifierArm::Run() {
+  InstructionSimplifierArmVisitor visitor(graph_, stats_);
+  visitor.VisitReversePostOrder();
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index e2ed257..2f65729 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -23,58 +23,6 @@
 namespace art {
 namespace arm {
 
-class InstructionSimplifierArmVisitor : public HGraphVisitor {
- public:
-  InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats)
-      : HGraphVisitor(graph), stats_(stats) {}
-
- private:
-  void RecordSimplification() {
-    if (stats_ != nullptr) {
-      stats_->RecordStat(kInstructionSimplificationsArch);
-    }
-  }
-
-  bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
-  bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge);
-  bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
-    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false);
-  }
-  bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
-    DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
-    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true);
-  }
-
-  /**
-   * This simplifier uses a special-purpose BB visitor.
-   * (1) No need to visit Phi nodes.
-   * (2) Since statements can be removed in a "forward" fashion,
-   *     the visitor should test if each statement is still there.
-   */
-  void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
-    // TODO: fragile iteration, provide more robust iterators?
-    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-      HInstruction* instruction = it.Current();
-      if (instruction->IsInBlock()) {
-        instruction->Accept(this);
-      }
-    }
-  }
-
-  void VisitAnd(HAnd* instruction) OVERRIDE;
-  void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
-  void VisitArraySet(HArraySet* instruction) OVERRIDE;
-  void VisitMul(HMul* instruction) OVERRIDE;
-  void VisitOr(HOr* instruction) OVERRIDE;
-  void VisitShl(HShl* instruction) OVERRIDE;
-  void VisitShr(HShr* instruction) OVERRIDE;
-  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
-  void VisitUShr(HUShr* instruction) OVERRIDE;
-
-  OptimizingCompilerStats* stats_;
-};
-
-
 class InstructionSimplifierArm : public HOptimization {
  public:
   InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
@@ -82,10 +30,7 @@
 
   static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm";
 
-  void Run() OVERRIDE {
-    InstructionSimplifierArmVisitor visitor(graph_, stats_);
-    visitor.VisitReversePostOrder();
-  }
+  void Run() OVERRIDE;
 };
 
 }  // namespace arm
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 1c3b79d..c639953 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -30,6 +30,63 @@
 
 using helpers::ShifterOperandSupportsExtension;
 
+class InstructionSimplifierArm64Visitor : public HGraphVisitor {
+ public:
+  InstructionSimplifierArm64Visitor(HGraph* graph, OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph), stats_(stats) {}
+
+ private:
+  void RecordSimplification() {
+    if (stats_ != nullptr) {
+      stats_->RecordStat(kInstructionSimplificationsArch);
+    }
+  }
+
+  bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
+  bool TryMergeIntoShifterOperand(HInstruction* use,
+                                  HInstruction* bitfield_op,
+                                  bool do_merge);
+  bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false);
+  }
+  bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
+    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true);
+  }
+
+  /**
+   * This simplifier uses a special-purpose BB visitor.
+   * (1) No need to visit Phi nodes.
+   * (2) Since statements can be removed in a "forward" fashion,
+   *     the visitor should test if each statement is still there.
+   */
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+    // TODO: fragile iteration, provide more robust iterators?
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (instruction->IsInBlock()) {
+        instruction->Accept(this);
+      }
+    }
+  }
+
+  // HInstruction visitors, sorted alphabetically.
+  void VisitAnd(HAnd* instruction) OVERRIDE;
+  void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
+  void VisitArraySet(HArraySet* instruction) OVERRIDE;
+  void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitOr(HOr* instruction) OVERRIDE;
+  void VisitShl(HShl* instruction) OVERRIDE;
+  void VisitShr(HShr* instruction) OVERRIDE;
+  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+  void VisitUShr(HUShr* instruction) OVERRIDE;
+  void VisitXor(HXor* instruction) OVERRIDE;
+  void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
+  void VisitVecStore(HVecStore* instruction) OVERRIDE;
+
+  OptimizingCompilerStats* stats_;
+};
+
 bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
                                                                    HInstruction* bitfield_op,
                                                                    bool do_merge) {
@@ -223,5 +280,10 @@
   }
 }
 
+void InstructionSimplifierArm64::Run() {
+  InstructionSimplifierArm64Visitor visitor(graph_, stats_);
+  visitor.VisitReversePostOrder();
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 4f16fc3..d180a8d 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -23,64 +23,6 @@
 namespace art {
 namespace arm64 {
 
-class InstructionSimplifierArm64Visitor : public HGraphVisitor {
- public:
-  InstructionSimplifierArm64Visitor(HGraph* graph, OptimizingCompilerStats* stats)
-      : HGraphVisitor(graph), stats_(stats) {}
-
- private:
-  void RecordSimplification() {
-    if (stats_ != nullptr) {
-      stats_->RecordStat(kInstructionSimplificationsArch);
-    }
-  }
-
-  bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
-  bool TryMergeIntoShifterOperand(HInstruction* use,
-                                  HInstruction* bitfield_op,
-                                  bool do_merge);
-  bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
-    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false);
-  }
-  bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
-    DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
-    return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true);
-  }
-
-  /**
-   * This simplifier uses a special-purpose BB visitor.
-   * (1) No need to visit Phi nodes.
-   * (2) Since statements can be removed in a "forward" fashion,
-   *     the visitor should test if each statement is still there.
-   */
-  void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
-    // TODO: fragile iteration, provide more robust iterators?
-    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-      HInstruction* instruction = it.Current();
-      if (instruction->IsInBlock()) {
-        instruction->Accept(this);
-      }
-    }
-  }
-
-  // HInstruction visitors, sorted alphabetically.
-  void VisitAnd(HAnd* instruction) OVERRIDE;
-  void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
-  void VisitArraySet(HArraySet* instruction) OVERRIDE;
-  void VisitMul(HMul* instruction) OVERRIDE;
-  void VisitOr(HOr* instruction) OVERRIDE;
-  void VisitShl(HShl* instruction) OVERRIDE;
-  void VisitShr(HShr* instruction) OVERRIDE;
-  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
-  void VisitUShr(HUShr* instruction) OVERRIDE;
-  void VisitXor(HXor* instruction) OVERRIDE;
-  void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
-  void VisitVecStore(HVecStore* instruction) OVERRIDE;
-
-  OptimizingCompilerStats* stats_;
-};
-
-
 class InstructionSimplifierArm64 : public HOptimization {
  public:
   InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats)
@@ -88,10 +30,7 @@
 
   static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64";
 
-  void Run() OVERRIDE {
-    InstructionSimplifierArm64Visitor visitor(graph_, stats_);
-    visitor.VisitReversePostOrder();
-  }
+  void Run() OVERRIDE;
 };
 
 }  // namespace arm64
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 11725f4..0f14d27 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -30,6 +30,16 @@
 
 namespace art {
 
+// Check that intrinsic enum values fit within space set aside in ArtMethod modifier flags.
+#define CHECK_INTRINSICS_ENUM_VALUES(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
+  static_assert( \
+      static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \
+      "Instrinsics enumeration space overflow.");
+#include "intrinsics_list.h"
+  INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES)
+#undef INTRINSICS_LIST
+#undef CHECK_INTRINSICS_ENUM_VALUES
+
 // Function that returns whether an intrinsic is static/direct or virtual.
 static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) {
   switch (i) {
@@ -109,6 +119,7 @@
   // InvokeStaticOrDirect.
   InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
   InvokeType invoke_type = invoke->GetInvokeType();
+
   switch (intrinsic_type) {
     case kStatic:
       return (invoke_type == kStatic);
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 6411e82..7abfd5b 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -256,25 +256,30 @@
   LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic()      \
              << " should have been converted to HIR";                    \
 }
-#define UNREACHABLE_INTRINSICS(Arch)                \
-UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits)    \
-UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits) \
-UNREACHABLE_INTRINSIC(Arch, FloatIsNaN)             \
-UNREACHABLE_INTRINSIC(Arch, DoubleIsNaN)            \
-UNREACHABLE_INTRINSIC(Arch, IntegerRotateLeft)      \
-UNREACHABLE_INTRINSIC(Arch, LongRotateLeft)         \
-UNREACHABLE_INTRINSIC(Arch, IntegerRotateRight)     \
-UNREACHABLE_INTRINSIC(Arch, LongRotateRight)        \
-UNREACHABLE_INTRINSIC(Arch, IntegerCompare)         \
-UNREACHABLE_INTRINSIC(Arch, LongCompare)            \
-UNREACHABLE_INTRINSIC(Arch, IntegerSignum)          \
-UNREACHABLE_INTRINSIC(Arch, LongSignum)             \
-UNREACHABLE_INTRINSIC(Arch, StringCharAt)           \
-UNREACHABLE_INTRINSIC(Arch, StringIsEmpty)          \
-UNREACHABLE_INTRINSIC(Arch, StringLength)           \
-UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence)        \
-UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence)       \
-UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence)
+#define UNREACHABLE_INTRINSICS(Arch)                  \
+UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits)      \
+UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits)   \
+UNREACHABLE_INTRINSIC(Arch, FloatIsNaN)               \
+UNREACHABLE_INTRINSIC(Arch, DoubleIsNaN)              \
+UNREACHABLE_INTRINSIC(Arch, IntegerRotateLeft)        \
+UNREACHABLE_INTRINSIC(Arch, LongRotateLeft)           \
+UNREACHABLE_INTRINSIC(Arch, IntegerRotateRight)       \
+UNREACHABLE_INTRINSIC(Arch, LongRotateRight)          \
+UNREACHABLE_INTRINSIC(Arch, IntegerCompare)           \
+UNREACHABLE_INTRINSIC(Arch, LongCompare)              \
+UNREACHABLE_INTRINSIC(Arch, IntegerSignum)            \
+UNREACHABLE_INTRINSIC(Arch, LongSignum)               \
+UNREACHABLE_INTRINSIC(Arch, StringCharAt)             \
+UNREACHABLE_INTRINSIC(Arch, StringIsEmpty)            \
+UNREACHABLE_INTRINSIC(Arch, StringLength)             \
+UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence)          \
+UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence)         \
+UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence)          \
+UNREACHABLE_INTRINSIC(Arch, VarHandleFullFence)       \
+UNREACHABLE_INTRINSIC(Arch, VarHandleAcquireFence)    \
+UNREACHABLE_INTRINSIC(Arch, VarHandleReleaseFence)    \
+UNREACHABLE_INTRINSIC(Arch, VarHandleLoadLoadFence)   \
+UNREACHABLE_INTRINSIC(Arch, VarHandleStoreStoreFence)
 
 template <typename IntrinsicLocationsBuilder, typename Codegenerator>
 bool IsCallFreeIntrinsic(HInvoke* invoke, Codegenerator* codegen) {
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index bd14f2b..54c2d43 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -276,6 +276,7 @@
       case DataType::Type::kReference:
         return GetGraph()->GetNullConstant();
       case DataType::Type::kBool:
+      case DataType::Type::kUint8:
       case DataType::Type::kInt8:
       case DataType::Type::kUint16:
       case DataType::Type::kInt16:
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 7e37018..2090a12 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -28,6 +28,46 @@
 
 namespace art {
 
+// TODO: Clean up the packed type detection so that we have the right type straight away
+// and do not need to go through this normalization.
+static inline void NormalizePackedType(/* inout */ DataType::Type* type,
+                                       /* inout */ bool* is_unsigned) {
+  switch (*type) {
+    case DataType::Type::kBool:
+      DCHECK(!*is_unsigned);
+      break;
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+      if (*is_unsigned) {
+        *is_unsigned = false;
+        *type = DataType::Type::kUint8;
+      } else {
+        *type = DataType::Type::kInt8;
+      }
+      break;
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
+      if (*is_unsigned) {
+        *is_unsigned = false;
+        *type = DataType::Type::kUint16;
+      } else {
+        *type = DataType::Type::kInt16;
+      }
+      break;
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      // We do not have kUint32 and kUint64 at the moment.
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      DCHECK(!*is_unsigned);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type " << *type;
+      UNREACHABLE();
+  }
+}
+
 // Enables vectorization (SIMDization) in the loop optimizer.
 static constexpr bool kEnableVectorization = true;
 
@@ -74,22 +114,20 @@
 // Forward declaration.
 static bool IsZeroExtensionAndGet(HInstruction* instruction,
                                   DataType::Type type,
-                                  /*out*/ HInstruction** operand,
-                                  bool to64 = false);
+                                  /*out*/ HInstruction** operand);
 
-// Detect a sign extension in instruction from the given type. The to64 parameter
-// denotes if result is long, and thus sign extension from int can be included.
+// Detect a sign extension in instruction from the given type.
 // Returns the promoted operand on success.
 static bool IsSignExtensionAndGet(HInstruction* instruction,
                                   DataType::Type type,
-                                  /*out*/ HInstruction** operand,
-                                  bool to64 = false) {
+                                  /*out*/ HInstruction** operand) {
   // Accept any already wider constant that would be handled properly by sign
   // extension when represented in the *width* of the given narrower data type
-  // (the fact that char normally zero extends does not matter here).
+  // (the fact that Uint16 normally zero extends does not matter here).
   int64_t value = 0;
   if (IsInt64AndGet(instruction, /*out*/ &value)) {
     switch (type) {
+      case DataType::Type::kUint8:
       case DataType::Type::kInt8:
         if (IsInt<8>(value)) {
           *operand = instruction;
@@ -103,43 +141,39 @@
           return true;
         }
         return false;
-      case DataType::Type::kInt32:
-        if (IsInt<32>(value)) {
-          *operand = instruction;
-          return to64;
-        }
-        return false;
       default:
         return false;
     }
   }
-  // An implicit widening conversion of a signed integer to an integral type sign-extends
-  // the two's-complement representation of the integer value to fill the wider format.
-  if (instruction->GetType() == type && (instruction->IsArrayGet() ||
-                                         instruction->IsStaticFieldGet() ||
-                                         instruction->IsInstanceFieldGet())) {
+  // An implicit widening conversion of any signed expression sign-extends.
+  if (instruction->GetType() == type) {
     switch (type) {
       case DataType::Type::kInt8:
       case DataType::Type::kInt16:
         *operand = instruction;
         return true;
-      case DataType::Type::kInt32:
-        *operand = instruction;
-        return to64;
       default:
         return false;
     }
   }
-  // Explicit type conversions.
+  // An explicit widening conversion of a signed expression sign-extends.
   if (instruction->IsTypeConversion()) {
-    DataType::Type from = instruction->InputAt(0)->GetType();
+    HInstruction* conv = instruction->InputAt(0);
+    DataType::Type from = conv->GetType();
     switch (instruction->GetType()) {
+      case DataType::Type::kInt32:
       case DataType::Type::kInt64:
-        return IsSignExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, /*to64*/ true);
+        if (type == from && (from == DataType::Type::kInt8 ||
+                             from == DataType::Type::kInt16 ||
+                             from == DataType::Type::kInt32)) {
+          *operand = conv;
+          return true;
+        }
+        return false;
       case DataType::Type::kInt16:
         return type == DataType::Type::kUint16 &&
                from == DataType::Type::kUint16 &&
-               IsZeroExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, to64);
+               IsZeroExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand);
       default:
         return false;
     }
@@ -147,19 +181,18 @@
   return false;
 }
 
-// Detect a zero extension in instruction from the given type. The to64 parameter
-// denotes if result is long, and thus zero extension from int can be included.
+// Detect a zero extension in instruction from the given type.
 // Returns the promoted operand on success.
 static bool IsZeroExtensionAndGet(HInstruction* instruction,
                                   DataType::Type type,
-                                  /*out*/ HInstruction** operand,
-                                  bool to64) {
+                                  /*out*/ HInstruction** operand) {
   // Accept any already wider constant that would be handled properly by zero
   // extension when represented in the *width* of the given narrower data type
-  // (the fact that byte/short/int normally sign extend does not matter here).
+  // (the fact that Int8/Int16 normally sign extend does not matter here).
   int64_t value = 0;
   if (IsInt64AndGet(instruction, /*out*/ &value)) {
     switch (type) {
+      case DataType::Type::kUint8:
       case DataType::Type::kInt8:
         if (IsUint<8>(value)) {
           *operand = instruction;
@@ -173,28 +206,26 @@
           return true;
         }
         return false;
-      case DataType::Type::kInt32:
-        if (IsUint<32>(value)) {
-          *operand = instruction;
-          return to64;
-        }
-        return false;
       default:
         return false;
     }
   }
-  // An implicit widening conversion of a char to an integral type zero-extends
-  // the representation of the char value to fill the wider format.
-  if (instruction->GetType() == type && (instruction->IsArrayGet() ||
-                                         instruction->IsStaticFieldGet() ||
-                                         instruction->IsInstanceFieldGet())) {
-    if (type == DataType::Type::kUint16) {
-      *operand = instruction;
-      return true;
+  // An implicit widening conversion of any unsigned expression zero-extends.
+  if (instruction->GetType() == type) {
+    switch (type) {
+      case DataType::Type::kUint8:
+      case DataType::Type::kUint16:
+        *operand = instruction;
+        return true;
+      default:
+        return false;
     }
   }
   // A sign (or zero) extension followed by an explicit removal of just the
   // higher sign bits is equivalent to a zero extension of the underlying operand.
+  //
+  // TODO: move this into simplifier and use new type system instead.
+  //
   if (instruction->IsAnd()) {
     int64_t mask = 0;
     HInstruction* a = instruction->InputAt(0);
@@ -205,27 +236,32 @@
         (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) ||
                                              IsZeroExtensionAndGet(a, type, /*out*/ operand)))) {
       switch ((*operand)->GetType()) {
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
           return mask == std::numeric_limits<uint8_t>::max();
         case DataType::Type::kUint16:
         case DataType::Type::kInt16:
           return mask == std::numeric_limits<uint16_t>::max();
-        case DataType::Type::kInt32:
-          return mask == std::numeric_limits<uint32_t>::max() && to64;
         default: return false;
       }
     }
   }
-  // Explicit type conversions.
+  // An explicit widening conversion of an unsigned expression zero-extends.
   if (instruction->IsTypeConversion()) {
-    DataType::Type from = instruction->InputAt(0)->GetType();
+    HInstruction* conv = instruction->InputAt(0);
+    DataType::Type from = conv->GetType();
     switch (instruction->GetType()) {
+      case DataType::Type::kInt32:
       case DataType::Type::kInt64:
-        return IsZeroExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, /*to64*/ true);
+        if (type == from && from == DataType::Type::kUint16) {
+          *operand = conv;
+          return true;
+        }
+        return false;
       case DataType::Type::kUint16:
         return type == DataType::Type::kInt16 &&
                from == DataType::Type::kInt16 &&
-               IsSignExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, to64);
+               IsSignExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand);
       default:
         return false;
     }
@@ -268,51 +304,10 @@
 
 // Compute relative vector length based on type difference.
 static size_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type, size_t vl) {
-  switch (other_type) {
-    case DataType::Type::kBool:
-    case DataType::Type::kInt8:
-      switch (vector_type) {
-        case DataType::Type::kBool:
-        case DataType::Type::kInt8: return vl;
-        default: break;
-      }
-      return vl;
-    case DataType::Type::kUint16:
-    case DataType::Type::kInt16:
-      switch (vector_type) {
-        case DataType::Type::kBool:
-        case DataType::Type::kInt8: return vl >> 1;
-        case DataType::Type::kUint16:
-        case DataType::Type::kInt16: return vl;
-        default: break;
-      }
-      break;
-    case DataType::Type::kInt32:
-      switch (vector_type) {
-        case DataType::Type::kBool:
-        case DataType::Type::kInt8: return vl >> 2;
-        case DataType::Type::kUint16:
-        case DataType::Type::kInt16: return vl >> 1;
-        case DataType::Type::kInt32: return vl;
-        default: break;
-      }
-      break;
-    case DataType::Type::kInt64:
-      switch (vector_type) {
-        case DataType::Type::kBool:
-        case DataType::Type::kInt8: return vl >> 3;
-        case DataType::Type::kUint16:
-        case DataType::Type::kInt16: return vl >> 2;
-        case DataType::Type::kInt32: return vl >> 1;
-        case DataType::Type::kInt64: return vl;
-        default: break;
-      }
-      break;
-    default:
-      break;
-  }
-  LOG(FATAL) << "Unsupported idiom conversion";
-  UNREACHABLE();
+  DCHECK(DataType::IsIntegralType(other_type));
+  DCHECK(DataType::IsIntegralType(vector_type));
+  DCHECK_GE(DataType::SizeShift(other_type), DataType::SizeShift(vector_type));
+  return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type));
 }
 
 // Detect up to two instructions a and b, and an acccumulated constant c.
@@ -360,6 +355,22 @@
   return false;
 }
 
+// Detect a + c for constant c.
+static bool IsAddConst(HInstruction* instruction,
+                       /*out*/ HInstruction** a,
+                       /*out*/ int64_t* c) {
+  if (instruction->IsAdd()) {
+    if (IsInt64AndGet(instruction->InputAt(0), c)) {
+      *a = instruction->InputAt(1);
+      return true;
+    } else if (IsInt64AndGet(instruction->InputAt(1), c)) {
+      *a = instruction->InputAt(0);
+      return true;
+    }
+  }
+  return false;
+}
+
 // Detect reductions of the following forms,
 //   x = x_phi + ..
 //   x = x_phi - ..
@@ -1100,19 +1111,19 @@
     return true;
   } else if (instruction->IsArrayGet()) {
     // Deal with vector restrictions.
-    if (instruction->AsArrayGet()->IsStringCharAt() &&
-        HasVectorRestrictions(restrictions, kNoStringCharAt)) {
+    bool is_string_char_at = instruction->AsArrayGet()->IsStringCharAt();
+    if (is_string_char_at && HasVectorRestrictions(restrictions, kNoStringCharAt)) {
       return false;
     }
     // Accept a right-hand-side array base[index] for
-    // (1) exact matching vector type,
+    // (1) matching vector type (exact match or signed/unsigned integral type of the same size),
     // (2) loop-invariant base,
     // (3) unit stride index,
     // (4) vectorizable right-hand-side value.
     HInstruction* base = instruction->InputAt(0);
     HInstruction* index = instruction->InputAt(1);
     HInstruction* offset = nullptr;
-    if (type == instruction->GetType() &&
+    if (DataType::ToSignedType(type) == DataType::ToSignedType(instruction->GetType()) &&
         node->loop_info->IsDefinedOutOfTheLoop(base) &&
         induction_range_.IsUnitStride(instruction, index, graph_, &offset)) {
       if (generate_code) {
@@ -1148,6 +1159,7 @@
       size_t size_vec = DataType::Size(type);
       size_t size_from = DataType::Size(from);
       size_t size_to = DataType::Size(to);
+      DataType::Type ctype = size_from == size_vec ? from : type;
       // Accept an integral conversion
       // (1a) narrowing into vector type, "wider" operations cannot bring in higher order bits, or
       // (1b) widening from at least vector type, and
@@ -1157,7 +1169,7 @@
            VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) ||
           (size_to >= size_from &&
            size_from >= size_vec &&
-           VectorizeUse(node, opa, generate_code, type, restrictions))) {
+           VectorizeUse(node, opa, generate_code, ctype, restrictions))) {
         if (generate_code) {
           if (vector_mode_ == kVector) {
             vector_map_->Put(instruction, vector_map_->Get(opa));  // operand pass-through
@@ -1275,6 +1287,7 @@
         }
         if (VectorizeUse(node, r, generate_code, type, restrictions)) {
           if (generate_code) {
+            NormalizePackedType(&type, &is_unsigned);
             GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type);
           }
           return true;
@@ -1334,6 +1347,7 @@
       // ARM 32-bit always supports advanced SIMD (64-bit SIMD).
       switch (type) {
         case DataType::Type::kBool:
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
           *restrictions |= kNoDiv | kNoReduction;
           return TrySetVectorLength(8);
@@ -1353,6 +1367,7 @@
       // ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD).
       switch (type) {
         case DataType::Type::kBool:
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
           *restrictions |= kNoDiv;
           return TrySetVectorLength(16);
@@ -1381,6 +1396,7 @@
       if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) {
         switch (type) {
           case DataType::Type::kBool:
+          case DataType::Type::kUint8:
           case DataType::Type::kInt8:
             *restrictions |=
                 kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD;
@@ -1410,6 +1426,7 @@
       if (features->AsMipsInstructionSetFeatures()->HasMsa()) {
         switch (type) {
           case DataType::Type::kBool:
+          case DataType::Type::kUint8:
           case DataType::Type::kInt8:
             *restrictions |= kNoDiv | kNoReduction | kNoSAD;
             return TrySetVectorLength(16);
@@ -1438,6 +1455,7 @@
       if (features->AsMips64InstructionSetFeatures()->HasMsa()) {
         switch (type) {
           case DataType::Type::kBool:
+          case DataType::Type::kUint8:
           case DataType::Type::kInt8:
             *restrictions |= kNoDiv | kNoReduction | kNoSAD;
             return TrySetVectorLength(16);
@@ -1534,11 +1552,16 @@
     HInstruction* base = org->InputAt(0);
     if (opb != nullptr) {
       vector = new (global_allocator_) HVecStore(
-          global_allocator_, base, opa, opb, type, vector_length_);
+          global_allocator_, base, opa, opb, type, org->GetSideEffects(), vector_length_);
     } else  {
       bool is_string_char_at = org->AsArrayGet()->IsStringCharAt();
-      vector = new (global_allocator_) HVecLoad(
-          global_allocator_, base, opa, type, vector_length_, is_string_char_at);
+      vector = new (global_allocator_) HVecLoad(global_allocator_,
+                                                base,
+                                                opa,
+                                                type,
+                                                org->GetSideEffects(),
+                                                vector_length_,
+                                                is_string_char_at);
     }
     // Known dynamically enforced alignment?
     if (vector_peeling_candidate_ != nullptr &&
@@ -1550,11 +1573,12 @@
     // Scalar store or load.
     DCHECK(vector_mode_ == kSequential);
     if (opb != nullptr) {
-      vector = new (global_allocator_) HArraySet(org->InputAt(0), opa, opb, type, kNoDexPc);
+      vector = new (global_allocator_) HArraySet(
+          org->InputAt(0), opa, opb, type, org->GetSideEffects(), kNoDexPc);
     } else  {
       bool is_string_char_at = org->AsArrayGet()->IsStringCharAt();
       vector = new (global_allocator_) HArrayGet(
-          org->InputAt(0), opa, type, kNoDexPc, is_string_char_at);
+          org->InputAt(0), opa, type, org->GetSideEffects(), kNoDexPc, is_string_char_at);
     }
   }
   vector_map_->Put(org, vector);
@@ -1731,6 +1755,7 @@
           case Intrinsics::kMathMinLongLong:
           case Intrinsics::kMathMinFloatFloat:
           case Intrinsics::kMathMinDoubleDouble: {
+            NormalizePackedType(&type, &is_unsigned);
             vector = new (global_allocator_)
                 HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
             break;
@@ -1739,6 +1764,7 @@
           case Intrinsics::kMathMaxLongLong:
           case Intrinsics::kMathMaxFloatFloat:
           case Intrinsics::kMathMaxDoubleDouble: {
+            NormalizePackedType(&type, &is_unsigned);
             vector = new (global_allocator_)
                 HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
             break;
@@ -1851,14 +1877,15 @@
           VectorizeUse(node, s, generate_code, type, restrictions)) {
         if (generate_code) {
           if (vector_mode_ == kVector) {
+            NormalizePackedType(&type, &is_unsigned);
             vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd(
                 global_allocator_,
                 vector_map_->Get(r),
                 vector_map_->Get(s),
                 type,
                 vector_length_,
-                is_unsigned,
-                is_rounded));
+                is_rounded,
+                is_unsigned));
             MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
           } else {
             GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
@@ -1896,9 +1923,14 @@
        (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt ||
         v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) {
     HInstruction* x = v->InputAt(0);
-    if (x->IsSub() && x->GetType() == reduction_type) {
-      a = x->InputAt(0);
-      b = x->InputAt(1);
+    if (x->GetType() == reduction_type) {
+      int64_t c = 0;
+      if (x->IsSub()) {
+        a = x->InputAt(0);
+        b = x->InputAt(1);
+      } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) {
+        b = graph_->GetConstant(reduction_type, -c);  // hidden SUB!
+      }
     }
   }
   if (a == nullptr || b == nullptr) {
@@ -1906,22 +1938,21 @@
   }
   // Accept same-type or consistent sign extension for narrower-type on operands a and b.
   // The same-type or narrower operands are called r (a or lower) and s (b or lower).
+  // We inspect the operands carefully to pick the most suited type.
   HInstruction* r = a;
   HInstruction* s = b;
   bool is_unsigned = false;
   DataType::Type sub_type = a->GetType();
-  if (a->IsTypeConversion()) {
-    HInstruction* hunt = a;
-    while (hunt->IsTypeConversion()) {
-      hunt = hunt->InputAt(0);
-    }
-    sub_type = hunt->GetType();
-  } else if (b->IsTypeConversion()) {
-    HInstruction* hunt = a;
-    while (hunt->IsTypeConversion()) {
-      hunt = hunt->InputAt(0);
-    }
-    sub_type = hunt->GetType();
+  if (DataType::Size(b->GetType()) < DataType::Size(sub_type)) {
+    sub_type = b->GetType();
+  }
+  if (a->IsTypeConversion() &&
+      DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(sub_type)) {
+    sub_type = a->InputAt(0)->GetType();
+  }
+  if (b->IsTypeConversion() &&
+      DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(sub_type)) {
+    sub_type = b->InputAt(0)->GetType();
   }
   if (reduction_type != sub_type &&
       (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) {
@@ -1942,6 +1973,7 @@
       VectorizeUse(node, r, generate_code, sub_type, restrictions) &&
       VectorizeUse(node, s, generate_code, sub_type, restrictions)) {
     if (generate_code) {
+      NormalizePackedType(&reduction_type, &is_unsigned);
       if (vector_mode_ == kVector) {
         vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate(
             global_allocator_,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 41ea998..cae5054 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -590,6 +590,7 @@
     case DataType::Type::kBool:
       DCHECK(IsUint<1>(value));
       FALLTHROUGH_INTENDED;
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c49cee3..411af2a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1760,14 +1760,26 @@
   static constexpr uint64_t kAllReads =
       ((1ULL << (kLastBitForReads + 1 - kFieldReadOffset)) - 1) << kFieldReadOffset;
 
-  // Translates type to bit flag.
+  // Translates type to bit flag. The type must correspond to a Java type.
   static uint64_t TypeFlag(DataType::Type type, int offset) {
-    CHECK_NE(type, DataType::Type::kVoid);
-    const uint64_t one = 1;
-    const int shift = static_cast<int>(type);  // 0-based consecutive enum
+    int shift;
+    switch (type) {
+      case DataType::Type::kReference: shift = 0; break;
+      case DataType::Type::kBool:      shift = 1; break;
+      case DataType::Type::kInt8:      shift = 2; break;
+      case DataType::Type::kUint16:    shift = 3; break;
+      case DataType::Type::kInt16:     shift = 4; break;
+      case DataType::Type::kInt32:     shift = 5; break;
+      case DataType::Type::kInt64:     shift = 6; break;
+      case DataType::Type::kFloat32:   shift = 7; break;
+      case DataType::Type::kFloat64:   shift = 8; break;
+      default:
+        LOG(FATAL) << "Unexpected data type " << type;
+        UNREACHABLE();
+    }
     DCHECK_LE(kFieldWriteOffset, shift);
     DCHECK_LT(shift, kArrayWriteOffset);
-    return one << (shift + offset);
+    return UINT64_C(1) << (shift + offset);
   }
 
   // Private constructor on direct flags value.
@@ -5185,7 +5197,7 @@
 class HTypeConversion FINAL : public HExpression<1> {
  public:
   // Instantiate a type conversion of `input` to `result_type`.
-  HTypeConversion(DataType::Type result_type, HInstruction* input, uint32_t dex_pc)
+  HTypeConversion(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HExpression(result_type, SideEffects::None(), dex_pc) {
     SetRawInputAt(0, input);
     // Invariant: We should never generate a conversion to a Boolean value.
@@ -5382,9 +5394,21 @@
   HArrayGet(HInstruction* array,
             HInstruction* index,
             DataType::Type type,
+            uint32_t dex_pc)
+     : HArrayGet(array,
+                 index,
+                 type,
+                 SideEffects::ArrayReadOfType(type),
+                 dex_pc,
+                 /* is_string_char_at */ false) {}
+
+  HArrayGet(HInstruction* array,
+            HInstruction* index,
+            DataType::Type type,
+            SideEffects side_effects,
             uint32_t dex_pc,
-            bool is_string_char_at = false)
-      : HExpression(type, SideEffects::ArrayReadOfType(type), dex_pc) {
+            bool is_string_char_at)
+      : HExpression(type, side_effects, dex_pc) {
     SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at);
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
@@ -5453,7 +5477,21 @@
             HInstruction* value,
             DataType::Type expected_component_type,
             uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc) {
+      : HArraySet(array,
+                  index,
+                  value,
+                  expected_component_type,
+                  // Make a best guess for side effects now, may be refined during SSA building.
+                  ComputeSideEffects(GetComponentType(value->GetType(), expected_component_type)),
+                  dex_pc) {}
+
+  HArraySet(HInstruction* array,
+            HInstruction* index,
+            HInstruction* value,
+            DataType::Type expected_component_type,
+            SideEffects side_effects,
+            uint32_t dex_pc)
+      : HTemplateInstruction(side_effects, dex_pc) {
     SetPackedField<ExpectedComponentTypeField>(expected_component_type);
     SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference);
     SetPackedFlag<kFlagValueCanBeNull>(true);
@@ -5461,8 +5499,6 @@
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
     SetRawInputAt(2, value);
-    // Make a best guess now, may be refined during SSA building.
-    ComputeSideEffects();
   }
 
   bool NeedsEnvironment() const OVERRIDE {
@@ -5501,24 +5537,26 @@
   HInstruction* GetValue() const { return InputAt(2); }
 
   DataType::Type GetComponentType() const {
+    return GetComponentType(GetValue()->GetType(), GetRawExpectedComponentType());
+  }
+
+  static DataType::Type GetComponentType(DataType::Type value_type,
+                                         DataType::Type expected_component_type) {
     // The Dex format does not type floating point index operations. Since the
-    // `expected_component_type_` is set during building and can therefore not
+    // `expected_component_type` comes from SSA building and can therefore not
     // be correct, we also check what is the value type. If it is a floating
     // point type, we must use that type.
-    DataType::Type value_type = GetValue()->GetType();
     return ((value_type == DataType::Type::kFloat32) || (value_type == DataType::Type::kFloat64))
         ? value_type
-        : GetRawExpectedComponentType();
+        : expected_component_type;
   }
 
   DataType::Type GetRawExpectedComponentType() const {
     return GetPackedField<ExpectedComponentTypeField>();
   }
 
-  void ComputeSideEffects() {
-    DataType::Type type = GetComponentType();
-    SetSideEffects(SideEffects::ArrayWriteOfType(type).Union(
-        SideEffectsForArchRuntimeCalls(type)));
+  static SideEffects ComputeSideEffects(DataType::Type type) {
+    return SideEffects::ArrayWriteOfType(type).Union(SideEffectsForArchRuntimeCalls(type));
   }
 
   static SideEffects SideEffectsForArchRuntimeCalls(DataType::Type value_type) {
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 0aac260..d01f8c0 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -232,8 +232,10 @@
   DataType::Type input_type = input->AsVecOperation()->GetPackedType();
   switch (input_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
       return type == DataType::Type::kBool ||
+             type == DataType::Type::kUint8 ||
              type == DataType::Type::kInt8;
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -471,10 +473,14 @@
                  HInstruction* right,
                  DataType::Type packed_type,
                  size_t vector_length,
-                 bool is_unsigned,
                  bool is_rounded,
-                 uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
+                 bool is_unsigned = false)
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, kNoDexPc) {
+    // The `is_unsigned` flag should be used exclusively with the Int32 or Int64.
+    // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types.
+    DCHECK(!is_unsigned ||
+           packed_type == DataType::Type::kInt32 ||
+           packed_type == DataType::Type::kInt64) << packed_type;
     DCHECK(HasConsistentPackedTypes(left, packed_type));
     DCHECK(HasConsistentPackedTypes(right, packed_type));
     SetPackedFlag<kFieldHAddIsUnsigned>(is_unsigned);
@@ -584,9 +590,13 @@
           HInstruction* right,
           DataType::Type packed_type,
           size_t vector_length,
-          bool is_unsigned,
-          uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
+          bool is_unsigned = false)
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, kNoDexPc) {
+    // The `is_unsigned` flag should be used exclusively with the Int32 or Int64.
+    // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types.
+    DCHECK(!is_unsigned ||
+           packed_type == DataType::Type::kInt32 ||
+           packed_type == DataType::Type::kInt64) << packed_type;
     DCHECK(HasConsistentPackedTypes(left, packed_type));
     DCHECK(HasConsistentPackedTypes(right, packed_type));
     SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned);
@@ -622,9 +632,13 @@
           HInstruction* right,
           DataType::Type packed_type,
           size_t vector_length,
-          bool is_unsigned,
-          uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
+          bool is_unsigned = false)
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, kNoDexPc) {
+    // The `is_unsigned` flag should be used exclusively with the Int32 or Int64.
+    // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types.
+    DCHECK(!is_unsigned ||
+           packed_type == DataType::Type::kInt32 ||
+           packed_type == DataType::Type::kInt64) << packed_type;
     DCHECK(HasConsistentPackedTypes(left, packed_type));
     DCHECK(HasConsistentPackedTypes(right, packed_type));
     SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned);
@@ -933,12 +947,13 @@
            HInstruction* base,
            HInstruction* index,
            DataType::Type packed_type,
+           SideEffects side_effects,
            size_t vector_length,
            bool is_string_char_at,
            uint32_t dex_pc = kNoDexPc)
       : HVecMemoryOperation(arena,
                             packed_type,
-                            SideEffects::ArrayReadOfType(packed_type),
+                            side_effects,
                             /* number_of_inputs */ 2,
                             vector_length,
                             dex_pc) {
@@ -977,11 +992,12 @@
             HInstruction* index,
             HInstruction* value,
             DataType::Type packed_type,
+            SideEffects side_effects,
             size_t vector_length,
             uint32_t dex_pc = kNoDexPc)
       : HVecMemoryOperation(arena,
                             packed_type,
-                            SideEffects::ArrayWriteOfType(packed_type),
+                            side_effects,
                             /* number_of_inputs */ 3,
                             vector_length,
                             dex_pc) {
diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc
index 3acdb20..7dbfcda 100644
--- a/compiler/optimizing/nodes_vector_test.cc
+++ b/compiler/optimizing/nodes_vector_test.cc
@@ -47,6 +47,16 @@
                                                    0,
                                                    DataType::Type::kInt32);
     entry_block_->AddInstruction(parameter_);
+    int8_parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+                                                        dex::TypeIndex(1),
+                                                        0,
+                                                        DataType::Type::kInt8);
+    entry_block_->AddInstruction(int8_parameter_);
+    int16_parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+                                                         dex::TypeIndex(2),
+                                                         0,
+                                                         DataType::Type::kInt16);
+    entry_block_->AddInstruction(int16_parameter_);
   }
 
   // General building fields.
@@ -58,6 +68,8 @@
   HBasicBlock* exit_block_;
 
   HInstruction* parameter_;
+  HInstruction* int8_parameter_;
+  HInstruction* int16_parameter_;
 };
 
 //
@@ -126,8 +138,14 @@
       HVecReplicateScalar(&allocator_, parameter_, DataType::Type::kInt32, 2);
   HVecOperation* v3 = new (&allocator_)
       HVecReplicateScalar(&allocator_, parameter_, DataType::Type::kInt16, 4);
-  HVecOperation* v4 = new (&allocator_)
-      HVecStore(&allocator_, parameter_, parameter_, v0, DataType::Type::kInt32, 4);
+  HVecOperation* v4 = new (&allocator_) HVecStore(
+      &allocator_,
+      parameter_,
+      parameter_,
+      v0,
+      DataType::Type::kInt32,
+      SideEffects::ArrayWriteOfType(DataType::Type::kInt32),
+      4);
 
   EXPECT_TRUE(v0->Equals(v0));
   EXPECT_TRUE(v1->Equals(v1));
@@ -175,12 +193,27 @@
 }
 
 TEST_F(NodesVectorTest, VectorAlignmentAndStringCharAtMatterOnLoad) {
-  HVecLoad* v0 = new (&allocator_) HVecLoad(
-      &allocator_, parameter_, parameter_, DataType::Type::kInt32, 4, /*is_string_char_at*/ false);
-  HVecLoad* v1 = new (&allocator_) HVecLoad(
-      &allocator_, parameter_, parameter_, DataType::Type::kInt32, 4, /*is_string_char_at*/ false);
-  HVecLoad* v2 = new (&allocator_) HVecLoad(
-      &allocator_, parameter_, parameter_, DataType::Type::kInt32, 4, /*is_string_char_at*/ true);
+  HVecLoad* v0 = new (&allocator_) HVecLoad(&allocator_,
+                                            parameter_,
+                                            parameter_,
+                                            DataType::Type::kInt32,
+                                            SideEffects::ArrayReadOfType(DataType::Type::kInt32),
+                                            4,
+                                            /*is_string_char_at*/ false);
+  HVecLoad* v1 = new (&allocator_) HVecLoad(&allocator_,
+                                            parameter_,
+                                            parameter_,
+                                            DataType::Type::kInt32,
+                                            SideEffects::ArrayReadOfType(DataType::Type::kInt32),
+                                            4,
+                                            /*is_string_char_at*/ false);
+  HVecLoad* v2 = new (&allocator_) HVecLoad(&allocator_,
+                                            parameter_,
+                                            parameter_,
+                                            DataType::Type::kInt32,
+                                            SideEffects::ArrayReadOfType(DataType::Type::kInt32),
+                                            4,
+                                            /*is_string_char_at*/ true);
 
   EXPECT_TRUE(v0->CanBeMoved());
   EXPECT_TRUE(v1->CanBeMoved());
@@ -209,99 +242,155 @@
 }
 
 TEST_F(NodesVectorTest, VectorSignMattersOnMin) {
-  HVecOperation* v0 = new (&allocator_)
+  HVecOperation* p0 = new (&allocator_)
       HVecReplicateScalar(&allocator_, parameter_, DataType::Type::kInt32, 4);
+  HVecOperation* p1 = new (&allocator_)
+      HVecReplicateScalar(&allocator_, int8_parameter_, DataType::Type::kInt8, 4);
+  HVecOperation* p2 = new (&allocator_)
+      HVecReplicateScalar(&allocator_, int16_parameter_, DataType::Type::kInt16, 4);
 
-  HVecMin* v1 = new (&allocator_)
-      HVecMin(&allocator_, v0, v0, DataType::Type::kInt32, 4, /*is_unsigned*/ true);
-  HVecMin* v2 = new (&allocator_)
-      HVecMin(&allocator_, v0, v0, DataType::Type::kInt32, 4, /*is_unsigned*/ false);
-  HVecMin* v3 = new (&allocator_)
-      HVecMin(&allocator_, v0, v0, DataType::Type::kInt32, 2, /*is_unsigned*/ true);
+  HVecMin* v0 = new (&allocator_) HVecMin(
+      &allocator_, p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ true);
+  HVecMin* v1 = new (&allocator_) HVecMin(
+      &allocator_, p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ false);
+  HVecMin* v2 = new (&allocator_) HVecMin(
+      &allocator_, p0, p0, DataType::Type::kInt32, 2, /*is_unsigned*/ true);
+  HVecMin* v3 = new (&allocator_) HVecMin(&allocator_, p1, p1, DataType::Type::kUint8, 16);
+  HVecMin* v4 = new (&allocator_) HVecMin(&allocator_, p1, p1, DataType::Type::kInt8, 16);
+  HVecMin* v5 = new (&allocator_) HVecMin(&allocator_, p2, p2, DataType::Type::kUint16, 8);
+  HVecMin* v6 = new (&allocator_) HVecMin(&allocator_, p2, p2, DataType::Type::kInt16, 8);
+  HVecMin* min_insns[] = { v0, v1, v2, v3, v4, v5, v6 };
 
-  EXPECT_FALSE(v0->CanBeMoved());
-  EXPECT_TRUE(v1->CanBeMoved());
-  EXPECT_TRUE(v2->CanBeMoved());
-  EXPECT_TRUE(v3->CanBeMoved());
+  EXPECT_FALSE(p0->CanBeMoved());
+  EXPECT_FALSE(p1->CanBeMoved());
+  EXPECT_FALSE(p2->CanBeMoved());
 
-  EXPECT_TRUE(v1->IsUnsigned());
-  EXPECT_FALSE(v2->IsUnsigned());
-  EXPECT_TRUE(v3->IsUnsigned());
+  for (HVecMin* min_insn : min_insns) {
+    EXPECT_TRUE(min_insn->CanBeMoved());
+  }
 
-  EXPECT_TRUE(v1->Equals(v1));
-  EXPECT_TRUE(v2->Equals(v2));
-  EXPECT_TRUE(v3->Equals(v3));
+  // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64.
+  EXPECT_TRUE(v0->IsUnsigned());
+  EXPECT_FALSE(v1->IsUnsigned());
+  EXPECT_TRUE(v2->IsUnsigned());
 
-  EXPECT_FALSE(v1->Equals(v2));  // different signs
-  EXPECT_FALSE(v1->Equals(v3));  // different vector lengths
+  for (HVecMin* min_insn1 : min_insns) {
+    for (HVecMin* min_insn2 : min_insns) {
+      EXPECT_EQ(min_insn1 == min_insn2, min_insn1->Equals(min_insn2));
+    }
+  }
 }
 
 TEST_F(NodesVectorTest, VectorSignMattersOnMax) {
-  HVecOperation* v0 = new (&allocator_)
+  HVecOperation* p0 = new (&allocator_)
       HVecReplicateScalar(&allocator_, parameter_, DataType::Type::kInt32, 4);
+  HVecOperation* p1 = new (&allocator_)
+      HVecReplicateScalar(&allocator_, int8_parameter_, DataType::Type::kInt8, 4);
+  HVecOperation* p2 = new (&allocator_)
+      HVecReplicateScalar(&allocator_, int16_parameter_, DataType::Type::kInt16, 4);
 
-  HVecMax* v1 = new (&allocator_)
-      HVecMax(&allocator_, v0, v0, DataType::Type::kInt32, 4, /*is_unsigned*/ true);
-  HVecMax* v2 = new (&allocator_)
-      HVecMax(&allocator_, v0, v0, DataType::Type::kInt32, 4, /*is_unsigned*/ false);
-  HVecMax* v3 = new (&allocator_)
-      HVecMax(&allocator_, v0, v0, DataType::Type::kInt32, 2, /*is_unsigned*/ true);
+  HVecMax* v0 = new (&allocator_) HVecMax(
+      &allocator_, p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ true);
+  HVecMax* v1 = new (&allocator_) HVecMax(
+      &allocator_, p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ false);
+  HVecMax* v2 = new (&allocator_) HVecMax(
+      &allocator_, p0, p0, DataType::Type::kInt32, 2, /*is_unsigned*/ true);
+  HVecMax* v3 = new (&allocator_) HVecMax(&allocator_, p1, p1, DataType::Type::kUint8, 16);
+  HVecMax* v4 = new (&allocator_) HVecMax(&allocator_, p1, p1, DataType::Type::kInt8, 16);
+  HVecMax* v5 = new (&allocator_) HVecMax(&allocator_, p2, p2, DataType::Type::kUint16, 8);
+  HVecMax* v6 = new (&allocator_) HVecMax(&allocator_, p2, p2, DataType::Type::kInt16, 8);
+  HVecMax* max_insns[] = { v0, v1, v2, v3, v4, v5, v6 };
 
-  EXPECT_FALSE(v0->CanBeMoved());
-  EXPECT_TRUE(v1->CanBeMoved());
-  EXPECT_TRUE(v2->CanBeMoved());
-  EXPECT_TRUE(v3->CanBeMoved());
+  EXPECT_FALSE(p0->CanBeMoved());
+  EXPECT_FALSE(p1->CanBeMoved());
+  EXPECT_FALSE(p2->CanBeMoved());
 
-  EXPECT_TRUE(v1->IsUnsigned());
-  EXPECT_FALSE(v2->IsUnsigned());
-  EXPECT_TRUE(v3->IsUnsigned());
+  for (HVecMax* max_insn : max_insns) {
+    EXPECT_TRUE(max_insn->CanBeMoved());
+  }
 
-  EXPECT_TRUE(v1->Equals(v1));
-  EXPECT_TRUE(v2->Equals(v2));
-  EXPECT_TRUE(v3->Equals(v3));
+  // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64.
+  EXPECT_TRUE(v0->IsUnsigned());
+  EXPECT_FALSE(v1->IsUnsigned());
+  EXPECT_TRUE(v2->IsUnsigned());
 
-  EXPECT_FALSE(v1->Equals(v2));  // different signs
-  EXPECT_FALSE(v1->Equals(v3));  // different vector lengths
+  for (HVecMax* max_insn1 : max_insns) {
+    for (HVecMax* max_insn2 : max_insns) {
+      EXPECT_EQ(max_insn1 == max_insn2, max_insn1->Equals(max_insn2));
+    }
+  }
 }
 
 TEST_F(NodesVectorTest, VectorAttributesMatterOnHalvingAdd) {
-  HVecOperation* v0 = new (&allocator_)
+  HVecOperation* p0 = new (&allocator_)
       HVecReplicateScalar(&allocator_, parameter_, DataType::Type::kInt32, 4);
+  HVecOperation* p1 = new (&allocator_)
+      HVecReplicateScalar(&allocator_, int8_parameter_, DataType::Type::kInt8, 4);
+  HVecOperation* p2 = new (&allocator_)
+      HVecReplicateScalar(&allocator_, int16_parameter_, DataType::Type::kInt16, 4);
 
+  HVecHalvingAdd* v0 = new (&allocator_) HVecHalvingAdd(
+      &allocator_, p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ true, /*is_unsigned*/ true);
   HVecHalvingAdd* v1 = new (&allocator_) HVecHalvingAdd(
-      &allocator_, v0, v0, DataType::Type::kInt32, 4, /*is_unsigned*/ true, /*is_rounded*/ true);
+      &allocator_, p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ false, /*is_unsigned*/ true);
   HVecHalvingAdd* v2 = new (&allocator_) HVecHalvingAdd(
-      &allocator_, v0, v0, DataType::Type::kInt32, 4, /*is_unsigned*/ true, /*is_rounded*/ false);
+      &allocator_, p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ true, /*is_unsigned*/ false);
   HVecHalvingAdd* v3 = new (&allocator_) HVecHalvingAdd(
-      &allocator_, v0, v0, DataType::Type::kInt32, 4, /*is_unsigned*/ false, /*is_rounded*/ true);
+      &allocator_, p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ false, /*is_unsigned*/ false);
   HVecHalvingAdd* v4 = new (&allocator_) HVecHalvingAdd(
-      &allocator_, v0, v0, DataType::Type::kInt32, 4, /*is_unsigned*/ false, /*is_rounded*/ false);
+      &allocator_, p0, p0, DataType::Type::kInt32, 2, /*is_rounded*/ true, /*is_unsigned*/ true);
   HVecHalvingAdd* v5 = new (&allocator_) HVecHalvingAdd(
-      &allocator_, v0, v0, DataType::Type::kInt32, 2, /*is_unsigned*/ true, /*is_rounded*/ true);
+      &allocator_, p1, p1, DataType::Type::kUint8, 16, /*is_rounded*/ true);
+  HVecHalvingAdd* v6 = new (&allocator_) HVecHalvingAdd(
+      &allocator_, p1, p1, DataType::Type::kUint8, 16, /*is_rounded*/ false);
+  HVecHalvingAdd* v7 = new (&allocator_) HVecHalvingAdd(
+      &allocator_, p1, p1, DataType::Type::kInt8, 16, /*is_rounded*/ true);
+  HVecHalvingAdd* v8 = new (&allocator_) HVecHalvingAdd(
+      &allocator_, p1, p1, DataType::Type::kInt8, 16, /*is_rounded*/ false);
+  HVecHalvingAdd* v9 = new (&allocator_) HVecHalvingAdd(
+      &allocator_, p2, p2, DataType::Type::kUint16, 8, /*is_rounded*/ true);
+  HVecHalvingAdd* v10 = new (&allocator_) HVecHalvingAdd(
+      &allocator_, p2, p2, DataType::Type::kUint16, 8, /*is_rounded*/ false);
+  HVecHalvingAdd* v11 = new (&allocator_) HVecHalvingAdd(
+      &allocator_, p2, p2, DataType::Type::kInt16, 2, /*is_rounded*/ true);
+  HVecHalvingAdd* v12 = new (&allocator_) HVecHalvingAdd(
+      &allocator_, p2, p2, DataType::Type::kInt16, 2, /*is_rounded*/ false);
+  HVecHalvingAdd* hadd_insns[] = { v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 };
 
-  EXPECT_FALSE(v0->CanBeMoved());
-  EXPECT_TRUE(v1->CanBeMoved());
-  EXPECT_TRUE(v2->CanBeMoved());
-  EXPECT_TRUE(v3->CanBeMoved());
-  EXPECT_TRUE(v4->CanBeMoved());
-  EXPECT_TRUE(v5->CanBeMoved());
+  EXPECT_FALSE(p0->CanBeMoved());
+  EXPECT_FALSE(p1->CanBeMoved());
+  EXPECT_FALSE(p2->CanBeMoved());
 
-  EXPECT_TRUE(v1->Equals(v1));
-  EXPECT_TRUE(v2->Equals(v2));
-  EXPECT_TRUE(v3->Equals(v3));
-  EXPECT_TRUE(v4->Equals(v4));
-  EXPECT_TRUE(v5->Equals(v5));
+  for (HVecHalvingAdd* hadd_insn : hadd_insns) {
+    EXPECT_TRUE(hadd_insn->CanBeMoved());
+  }
 
-  EXPECT_TRUE(v1->IsUnsigned() && v1->IsRounded());
-  EXPECT_TRUE(v2->IsUnsigned() && !v2->IsRounded());
-  EXPECT_TRUE(!v3->IsUnsigned() && v3->IsRounded());
-  EXPECT_TRUE(!v4->IsUnsigned() && !v4->IsRounded());
-  EXPECT_TRUE(v5->IsUnsigned() && v5->IsRounded());
+  // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64.
+  EXPECT_TRUE(v0->IsUnsigned());
+  EXPECT_TRUE(v1->IsUnsigned());
+  EXPECT_TRUE(!v2->IsUnsigned());
+  EXPECT_TRUE(!v3->IsUnsigned());
+  EXPECT_TRUE(v4->IsUnsigned());
 
-  EXPECT_FALSE(v1->Equals(v2));  // different attributes
-  EXPECT_FALSE(v1->Equals(v3));  // different attributes
-  EXPECT_FALSE(v1->Equals(v4));  // different attributes
-  EXPECT_FALSE(v1->Equals(v5));  // different vector lengths
+  EXPECT_TRUE(v0->IsRounded());
+  EXPECT_TRUE(!v1->IsRounded());
+  EXPECT_TRUE(v2->IsRounded());
+  EXPECT_TRUE(!v3->IsRounded());
+  EXPECT_TRUE(v4->IsRounded());
+  EXPECT_TRUE(v5->IsRounded());
+  EXPECT_TRUE(!v6->IsRounded());
+  EXPECT_TRUE(v7->IsRounded());
+  EXPECT_TRUE(!v8->IsRounded());
+  EXPECT_TRUE(v9->IsRounded());
+  EXPECT_TRUE(!v10->IsRounded());
+  EXPECT_TRUE(v11->IsRounded());
+  EXPECT_TRUE(!v12->IsRounded());
+
+  for (HVecHalvingAdd* hadd_insn1 : hadd_insns) {
+    for (HVecHalvingAdd* hadd_insn2 : hadd_insns) {
+      EXPECT_EQ(hadd_insn1 == hadd_insn2, hadd_insn1->Equals(hadd_insn2));
+    }
+  }
 }
 
 TEST_F(NodesVectorTest, VectorOperationMattersOnMultiplyAccumulate) {
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index f0057c3..1786aa7 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -112,6 +112,7 @@
         case DataType::Type::kReference:
         case DataType::Type::kInt32:
         case DataType::Type::kUint16:
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
         case DataType::Type::kBool:
         case DataType::Type::kInt16:
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 4ff7315..33df607 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1940,6 +1940,7 @@
         case DataType::Type::kReference:
         case DataType::Type::kInt32:
         case DataType::Type::kUint16:
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
         case DataType::Type::kBool:
         case DataType::Type::kInt16:
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 2012cd5..9803a7b 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -1116,6 +1116,7 @@
     case DataType::Type::kReference:
     case DataType::Type::kInt32:
     case DataType::Type::kUint16:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kBool:
     case DataType::Type::kInt16:
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index 110db47..b3c8f10 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -514,9 +514,10 @@
   DataType::Type type = instr->InputAt(0)->GetType();
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
       last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
       break;
@@ -633,9 +634,10 @@
 
   switch (type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32: {
       if (maybe_compressed_char_at) {
         last_visited_internal_latency_ += kArmMemoryLoadLatency;
@@ -733,9 +735,10 @@
 
   switch (value_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32: {
       if (index->IsConstant()) {
         last_visited_latency_ = kArmMemoryStoreLatency;
@@ -916,9 +919,10 @@
 
   switch (field_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
       last_visited_latency_ = kArmMemoryLoadLatency;
       break;
@@ -977,9 +981,10 @@
 
   switch (field_type) {
     case DataType::Type::kBool:
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
       if (is_volatile) {
         last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
         last_visited_latency_ = kArmMemoryBarrierLatency;
@@ -1047,6 +1052,7 @@
   DataType::Type input_type = instr->GetInputType();
 
   switch (result_type) {
+    case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
@@ -1072,6 +1078,7 @@
     case DataType::Type::kInt64:
       switch (input_type) {
         case DataType::Type::kBool:
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
         case DataType::Type::kUint16:
         case DataType::Type::kInt16:
@@ -1095,6 +1102,7 @@
     case DataType::Type::kFloat32:
       switch (input_type) {
         case DataType::Type::kBool:
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
         case DataType::Type::kUint16:
         case DataType::Type::kInt16:
@@ -1118,6 +1126,7 @@
     case DataType::Type::kFloat64:
       switch (input_type) {
         case DataType::Type::kBool:
+        case DataType::Type::kUint8:
         case DataType::Type::kInt8:
         case DataType::Type::kUint16:
         case DataType::Type::kInt16:
diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc
index ac5eb15..9731712 100644
--- a/compiler/optimizing/side_effects_test.cc
+++ b/compiler/optimizing/side_effects_test.cc
@@ -21,6 +21,19 @@
 
 namespace art {
 
+// Only runtime types other than void are allowed.
+static const DataType::Type kTestTypes[] = {
+    DataType::Type::kReference,
+    DataType::Type::kBool,
+    DataType::Type::kInt8,
+    DataType::Type::kUint16,
+    DataType::Type::kInt16,
+    DataType::Type::kInt32,
+    DataType::Type::kInt64,
+    DataType::Type::kFloat32,
+    DataType::Type::kFloat64,
+};
+
 /**
  * Tests for the SideEffects class.
  */
@@ -91,9 +104,7 @@
 
 TEST(SideEffectsTest, DependencesAndNoDependences) {
   // Apply test to each individual data type.
-  for (DataType::Type type = DataType::Type::kReference;
-       type < DataType::Type::kVoid;
-       type = static_cast<DataType::Type>(static_cast<uint8_t>(type) + 1u)) {
+  for (DataType::Type type : kTestTypes) {
     // Same data type and access type: proper write/read dep.
     testWriteAndReadDependence(
         SideEffects::FieldWriteOfType(type, false),
@@ -169,9 +180,7 @@
 TEST(SideEffectsTest, AllWritesAndReads) {
   SideEffects s = SideEffects::None();
   // Keep taking the union of different writes and reads.
-  for (DataType::Type type = DataType::Type::kReference;
-       type < DataType::Type::kVoid;
-       type = static_cast<DataType::Type>(static_cast<uint8_t>(type) + 1u)) {
+  for (DataType::Type type : kTestTypes) {
     s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile */ false));
     s = s.Union(SideEffects::ArrayWriteOfType(type));
     s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile */ false));
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 77b7a22..2356316 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -393,7 +393,7 @@
         }
         // Refine the side effects of this floating point aset. Note that we do this even if
         // no replacement occurs, since the right-hand-side may have been corrected already.
-        aset->ComputeSideEffects();
+        aset->SetSideEffects(HArraySet::ComputeSideEffects(aset->GetComponentType()));
       } else {
         // Array elements are integral and the value assigned to it initially
         // was integral too. Nothing to do.
diff --git a/dalvikvm/Android.bp b/dalvikvm/Android.bp
index 0405fe1..cca9ac4 100644
--- a/dalvikvm/Android.bp
+++ b/dalvikvm/Android.bp
@@ -36,7 +36,7 @@
             ],
             ldflags: ["-Wl,--export-dynamic"],
         },
-        linux: {
+        linux_glibc: {
             ldflags: ["-Wl,--export-dynamic"],
         },
     },
diff --git a/dexdump/Android.bp b/dexdump/Android.bp
index 60ce363..705043b 100644
--- a/dexdump/Android.bp
+++ b/dexdump/Android.bp
@@ -22,7 +22,7 @@
         "dexdump_main.cc",
         "dexdump.cc",
     ],
-    cflags: ["-Wall"],
+    cflags: ["-Wall", "-Werror"],
     shared_libs: [
         "libart",
         "libbase",
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 0db790b..7599d23 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -906,14 +906,27 @@
       // Call site information is too large to detail in disassembly so just output the index.
       outSize = snprintf(buf.get(), bufSize, "call_site@%0*x", width, index);
       break;
-    // SOME NOT SUPPORTED:
-    // case Instruction::kIndexVaries:
-    // case Instruction::kIndexInlineMethod:
-    default:
-      outSize = snprintf(buf.get(), bufSize, "<?>");
+    case Instruction::kIndexMethodHandleRef:
+      // Method handle information is too large to detail in disassembly so just output the index.
+      outSize = snprintf(buf.get(), bufSize, "method_handle@%0*x", width, index);
+      break;
+    case Instruction::kIndexProtoRef:
+      if (index < pDexFile->GetHeader().proto_ids_size_) {
+        const DexFile::ProtoId& protoId = pDexFile->GetProtoId(index);
+        const Signature signature = pDexFile->GetProtoSignature(protoId);
+        const std::string& proto = signature.ToString();
+        outSize = snprintf(buf.get(), bufSize, "%s // proto@%0*x", proto.c_str(), width, index);
+      } else {
+        outSize = snprintf(buf.get(), bufSize, "<?> // proto@%0*x", width, index);
+      }
       break;
   }  // switch
 
+  if (outSize == 0) {
+    // The index type has not been handled in the switch above.
+    outSize = snprintf(buf.get(), bufSize, "<?>");
+  }
+
   // Determine success of string construction.
   if (outSize >= bufSize) {
     // The buffer wasn't big enough; retry with computed size. Note: snprintf()
diff --git a/dexlist/Android.bp b/dexlist/Android.bp
index 52b1ee9..03943bf 100644
--- a/dexlist/Android.bp
+++ b/dexlist/Android.bp
@@ -16,7 +16,7 @@
     name: "dexlist",
     host_supported: true,
     srcs: ["dexlist.cc"],
-    cflags: ["-Wall"],
+    cflags: ["-Wall", "-Werror"],
     shared_libs: ["libart"],
 }
 
diff --git a/openjdkjvmti/OpenjdkJvmTi.cc b/openjdkjvmti/OpenjdkJvmTi.cc
index bac57f9..b30d45a 100644
--- a/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/openjdkjvmti/OpenjdkJvmTi.cc
@@ -163,18 +163,16 @@
     return ThreadUtil::ResumeThreadList(env, request_count, request_list, results);
   }
 
-  static jvmtiError StopThread(jvmtiEnv* env,
-                               jthread thread ATTRIBUTE_UNUSED,
-                               jobject exception ATTRIBUTE_UNUSED) {
+  static jvmtiError StopThread(jvmtiEnv* env, jthread thread, jobject exception) {
     ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_signal_thread);
-    return ERR(NOT_IMPLEMENTED);
+    return ThreadUtil::StopThread(env, thread, exception);
   }
 
-  static jvmtiError InterruptThread(jvmtiEnv* env, jthread thread ATTRIBUTE_UNUSED) {
+  static jvmtiError InterruptThread(jvmtiEnv* env, jthread thread) {
     ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_signal_thread);
-    return ERR(NOT_IMPLEMENTED);
+    return ThreadUtil::InterruptThread(env, thread);
   }
 
   static jvmtiError GetThreadInfo(jvmtiEnv* env, jthread thread, jvmtiThreadInfo* info_ptr) {
diff --git a/openjdkjvmti/art_jvmti.h b/openjdkjvmti/art_jvmti.h
index 10ddfc1..ad405e8 100644
--- a/openjdkjvmti/art_jvmti.h
+++ b/openjdkjvmti/art_jvmti.h
@@ -229,7 +229,7 @@
     .can_get_monitor_info                            = 1,
     .can_pop_frame                                   = 0,
     .can_redefine_classes                            = 1,
-    .can_signal_thread                               = 0,
+    .can_signal_thread                               = 1,
     .can_get_source_file_name                        = 1,
     .can_get_line_numbers                            = 1,
     .can_get_source_debug_extension                  = 1,
diff --git a/openjdkjvmti/ti_redefine.cc b/openjdkjvmti/ti_redefine.cc
index 1b4e910..5d9bf2c 100644
--- a/openjdkjvmti/ti_redefine.cc
+++ b/openjdkjvmti/ti_redefine.cc
@@ -1396,7 +1396,7 @@
     linker->SetEntryPointsToInterpreter(&method);
     method.SetCodeItemOffset(dex_file_->FindCodeItemOffset(class_def, dex_method_idx));
     // Clear all the intrinsics related flags.
-    method.ClearAccessFlags(art::kAccIntrinsic | (~art::kAccFlagsNotUsedByIntrinsic));
+    method.SetNotIntrinsic();
     // Notify the jit that this method is redefined.
     art::jit::Jit* jit = driver_->runtime_->GetJit();
     if (jit != nullptr) {
diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
index 907b515..9a809df 100644
--- a/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -949,4 +949,65 @@
   return OK;
 }
 
+jvmtiError ThreadUtil::StopThread(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                  jthread thread,
+                                  jobject exception) {
+  art::Thread* self = art::Thread::Current();
+  art::ScopedObjectAccess soa(self);
+  art::StackHandleScope<1> hs(self);
+  if (exception == nullptr) {
+    return ERR(INVALID_OBJECT);
+  }
+  art::ObjPtr<art::mirror::Object> obj(soa.Decode<art::mirror::Object>(exception));
+  if (!obj->GetClass()->IsThrowableClass()) {
+    return ERR(INVALID_OBJECT);
+  }
+  art::Handle<art::mirror::Throwable> exc(hs.NewHandle(obj->AsThrowable()));
+  art::MutexLock tll_mu(self, *art::Locks::thread_list_lock_);
+  art::Thread* target = nullptr;
+  jvmtiError err = ERR(INTERNAL);
+  if (!GetAliveNativeThread(thread, soa, &target, &err)) {
+    return err;
+  } else if (target->GetState() == art::ThreadState::kStarting || target->IsStillStarting()) {
+    return ERR(THREAD_NOT_ALIVE);
+  }
+  struct StopThreadClosure : public art::Closure {
+   public:
+    explicit StopThreadClosure(art::Handle<art::mirror::Throwable> except) : exception_(except) { }
+
+    void Run(art::Thread* me) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      // Make sure the thread is prepared to notice the exception.
+      art::Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(me);
+      me->SetAsyncException(exception_.Get());
+      // Wake up the thread if it is sleeping.
+      me->Notify();
+    }
+
+   private:
+    art::Handle<art::mirror::Throwable> exception_;
+  };
+  StopThreadClosure c(exc);
+  if (target->RequestSynchronousCheckpoint(&c)) {
+    return OK;
+  } else {
+    // Something went wrong, probably the thread died.
+    return ERR(THREAD_NOT_ALIVE);
+  }
+}
+
+jvmtiError ThreadUtil::InterruptThread(jvmtiEnv* env ATTRIBUTE_UNUSED, jthread thread) {
+  art::Thread* self = art::Thread::Current();
+  art::ScopedObjectAccess soa(self);
+  art::MutexLock tll_mu(self, *art::Locks::thread_list_lock_);
+  art::Thread* target = nullptr;
+  jvmtiError err = ERR(INTERNAL);
+  if (!GetAliveNativeThread(thread, soa, &target, &err)) {
+    return err;
+  } else if (target->GetState() == art::ThreadState::kStarting || target->IsStillStarting()) {
+    return ERR(THREAD_NOT_ALIVE);
+  }
+  target->Interrupt(self);
+  return OK;
+}
+
 }  // namespace openjdkjvmti
diff --git a/openjdkjvmti/ti_thread.h b/openjdkjvmti/ti_thread.h
index ceebff6..09b4cab 100644
--- a/openjdkjvmti/ti_thread.h
+++ b/openjdkjvmti/ti_thread.h
@@ -93,6 +93,9 @@
                                      const jthread* threads,
                                      jvmtiError* results);
 
+  static jvmtiError StopThread(jvmtiEnv* env, jthread thr, jobject exception);
+  static jvmtiError InterruptThread(jvmtiEnv* env, jthread thr);
+
   // Returns true if we decoded the thread and it is alive, false otherwise with an appropriate
   // error placed into 'err'. A thread is alive if it has had it's 'start' function called and has
   // (or at least could have) executed managed code and has not yet returned past it's first managed
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 7ff35ac..4181169 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -377,14 +377,14 @@
 }
 
 inline void ArtMethod::SetIntrinsic(uint32_t intrinsic) {
-  DCHECK(IsUint<8>(intrinsic));
   // Currently we only do intrinsics for static/final methods or methods of final
   // classes. We don't set kHasSingleImplementation for those methods.
   DCHECK(IsStatic() || IsFinal() || GetDeclaringClass()->IsFinal()) <<
       "Potential conflict with kAccSingleImplementation";
-  uint32_t new_value = (GetAccessFlags() & kAccFlagsNotUsedByIntrinsic) |
-      kAccIntrinsic |
-      (intrinsic << POPCOUNT(kAccFlagsNotUsedByIntrinsic));
+  static const int kAccFlagsShift = CTZ(kAccIntrinsicBits);
+  DCHECK_LE(intrinsic, kAccIntrinsicBits >> kAccFlagsShift);
+  uint32_t intrinsic_bits = intrinsic << kAccFlagsShift;
+  uint32_t new_value = (GetAccessFlags() & ~kAccIntrinsicBits) | kAccIntrinsic | intrinsic_bits;
   if (kIsDebugBuild) {
     uint32_t java_flags = (GetAccessFlags() & kAccJavaFlagsMask);
     bool is_constructor = IsConstructor();
diff --git a/runtime/art_method.h b/runtime/art_method.h
index fbdc32d..caef81c 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -117,26 +117,6 @@
     access_flags_.store(new_access_flags, std::memory_order_relaxed);
   }
 
-  // This setter guarantees atomicity.
-  void AddAccessFlags(uint32_t flag) {
-    uint32_t old_access_flags;
-    uint32_t new_access_flags;
-    do {
-      old_access_flags = access_flags_.load(std::memory_order_relaxed);
-      new_access_flags = old_access_flags | flag;
-    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
-  }
-
-  // This setter guarantees atomicity.
-  void ClearAccessFlags(uint32_t flag) {
-    uint32_t old_access_flags;
-    uint32_t new_access_flags;
-    do {
-      old_access_flags = access_flags_.load(std::memory_order_relaxed);
-      new_access_flags = old_access_flags & ~flag;
-    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
-  }
-
   static MemberOffset AccessFlagsOffset() {
     return MemberOffset(OFFSETOF_MEMBER(ArtMethod, access_flags_));
   }
@@ -196,12 +176,21 @@
   ALWAYS_INLINE void SetIntrinsic(uint32_t intrinsic) REQUIRES_SHARED(Locks::mutator_lock_);
 
   uint32_t GetIntrinsic() {
+    static const int kAccFlagsShift = CTZ(kAccIntrinsicBits);
+    static_assert(IsPowerOfTwo((kAccIntrinsicBits >> kAccFlagsShift) + 1),
+                  "kAccIntrinsicBits are not continuous");
+    static_assert((kAccIntrinsic & kAccIntrinsicBits) == 0,
+                  "kAccIntrinsic overlaps kAccIntrinsicBits");
     DCHECK(IsIntrinsic());
-    return (GetAccessFlags() >> POPCOUNT(kAccFlagsNotUsedByIntrinsic)) & kAccMaxIntrinsic;
+    return (GetAccessFlags() & kAccIntrinsicBits) >> kAccFlagsShift;
+  }
+
+  void SetNotIntrinsic() REQUIRES_SHARED(Locks::mutator_lock_) {
+    ClearAccessFlags(kAccIntrinsic | kAccIntrinsicBits);
   }
 
   bool IsCopied() {
-    static_assert((kAccCopied & kAccFlagsNotUsedByIntrinsic) == kAccCopied,
+    static_assert((kAccCopied & (kAccIntrinsic | kAccIntrinsicBits)) == 0,
                   "kAccCopied conflicts with intrinsic modifier");
     const bool copied = (GetAccessFlags() & kAccCopied) != 0;
     // (IsMiranda() || IsDefaultConflicting()) implies copied
@@ -211,7 +200,7 @@
   }
 
   bool IsMiranda() {
-    static_assert((kAccMiranda & kAccFlagsNotUsedByIntrinsic) == kAccMiranda,
+    static_assert((kAccMiranda & (kAccIntrinsic | kAccIntrinsicBits)) == 0,
                   "kAccMiranda conflicts with intrinsic modifier");
     return (GetAccessFlags() & kAccMiranda) != 0;
   }
@@ -245,7 +234,7 @@
 
   // This is set by the class linker.
   bool IsDefault() {
-    static_assert((kAccDefault & kAccFlagsNotUsedByIntrinsic) == kAccDefault,
+    static_assert((kAccDefault & (kAccIntrinsic | kAccIntrinsicBits)) == 0,
                   "kAccDefault conflicts with intrinsic modifier");
     return (GetAccessFlags() & kAccDefault) != 0;
   }
@@ -290,6 +279,22 @@
     AddAccessFlags(kAccSkipAccessChecks);
   }
 
+  bool PreviouslyWarm() {
+    if (IsIntrinsic()) {
+      // kAccPreviouslyWarm overlaps with kAccIntrinsicBits.
+      return true;
+    }
+    return (GetAccessFlags() & kAccPreviouslyWarm) != 0;
+  }
+
+  void SetPreviouslyWarm() {
+    if (IsIntrinsic()) {
+      // kAccPreviouslyWarm overlaps with kAccIntrinsicBits.
+      return;
+    }
+    AddAccessFlags(kAccPreviouslyWarm);
+  }
+
   // Should this method be run in the interpreter and count locks (e.g., failed structured-
   // locking verification)?
   bool MustCountLocks() {
@@ -299,6 +304,10 @@
     return (GetAccessFlags() & kAccMustCountLocks) != 0;
   }
 
+  void SetMustCountLocks() {
+    AddAccessFlags(kAccMustCountLocks);
+  }
+
   // Checks to see if the method was annotated with @dalvik.annotation.optimization.FastNative
   // -- Independent of kAccFastNative access flags.
   bool IsAnnotatedWithFastNative();
@@ -782,6 +791,37 @@
 
   template <ReadBarrierOption kReadBarrierOption> void GetAccessFlagsDCheck();
 
+  static inline bool IsValidIntrinsicUpdate(uint32_t modifier) {
+    return (((modifier & kAccIntrinsic) == kAccIntrinsic) &&
+            (((modifier & ~(kAccIntrinsic | kAccIntrinsicBits)) == 0)));
+  }
+
+  static inline bool OverlapsIntrinsicBits(uint32_t modifier) {
+    return (modifier & kAccIntrinsicBits) != 0;
+  }
+
+  // This setter guarantees atomicity.
+  void AddAccessFlags(uint32_t flag) {
+    DCHECK(!IsIntrinsic() || !OverlapsIntrinsicBits(flag) || IsValidIntrinsicUpdate(flag));
+    uint32_t old_access_flags;
+    uint32_t new_access_flags;
+    do {
+      old_access_flags = access_flags_.load(std::memory_order_relaxed);
+      new_access_flags = old_access_flags | flag;
+    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
+  }
+
+  // This setter guarantees atomicity.
+  void ClearAccessFlags(uint32_t flag) {
+    DCHECK(!IsIntrinsic() || !OverlapsIntrinsicBits(flag) || IsValidIntrinsicUpdate(flag));
+    uint32_t old_access_flags;
+    uint32_t new_access_flags;
+    do {
+      old_access_flags = access_flags_.load(std::memory_order_relaxed);
+      new_access_flags = old_access_flags & ~flag;
+    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
+  }
+
   DISALLOW_COPY_AND_ASSIGN(ArtMethod);  // Need to use CopyFrom to deal with 32 vs 64 bits.
 };
 
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 6d1de00..792c581 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -163,24 +163,36 @@
     moveUp(GuardState::kClosed, nullptr);
   }
 
+#if defined(__linux__)
+  // close always succeeds on linux, even if failure is reported.
+  UNUSED(result);
+#else
   if (result == -1) {
     return -errno;
-  } else {
-    fd_ = -1;
-    file_path_ = "";
-    return 0;
   }
+#endif
+
+  fd_ = -1;
+  file_path_ = "";
+  return 0;
 }
 
 int FdFile::Flush() {
   DCHECK(!read_only_mode_);
+
 #ifdef __linux__
   int rc = TEMP_FAILURE_RETRY(fdatasync(fd_));
 #else
   int rc = TEMP_FAILURE_RETRY(fsync(fd_));
 #endif
+
   moveUp(GuardState::kFlushed, "Flushing closed file.");
-  return (rc == -1) ? -errno : rc;
+  if (rc == 0) {
+    return 0;
+  }
+
+  // Don't report failure if we just tried to flush a pipe or socket.
+  return errno == EINVAL ? 0 : -errno;
 }
 
 int64_t FdFile::Read(char* buf, int64_t byte_count, int64_t offset) const {
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 91b08bc..3fb70f6 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -94,6 +94,7 @@
   int SetLength(int64_t new_length) OVERRIDE WARN_UNUSED;
   int64_t GetLength() const OVERRIDE;
   int64_t Write(const char* buf, int64_t byte_count, int64_t offset) OVERRIDE WARN_UNUSED;
+
   int Flush() OVERRIDE WARN_UNUSED;
 
   // Short for SetLength(0); Flush(); Close();
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 8b1a115..042fbc9 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -274,4 +274,15 @@
   EXPECT_EQ(reset_compare(tmp, tmp6), 0);
 }
 
+TEST_F(FdFileTest, PipeFlush) {
+  int pipefd[2];
+  ASSERT_EQ(0, pipe2(pipefd, O_CLOEXEC));
+
+  FdFile file(pipefd[1], true);
+  ASSERT_TRUE(file.WriteFully("foo", 3));
+  ASSERT_EQ(0, file.Flush());
+  ASSERT_EQ(0, file.FlushCloseOrErase());
+  close(pipefd[0]);
+}
+
 }  // namespace unix_file
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 772f042..eeb5569 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -8033,6 +8033,15 @@
   return type.Get();
 }
 
+mirror::MethodType* ClassLinker::ResolveMethodType(uint32_t proto_idx, ArtMethod* referrer) {
+  Thread* const self = Thread::Current();
+  StackHandleScope<2> hs(self);
+  const DexFile* dex_file = referrer->GetDexFile();
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(referrer->GetClassLoader()));
+  return ResolveMethodType(*dex_file, proto_idx, dex_cache, class_loader);
+}
+
 mirror::MethodHandle* ClassLinker::ResolveMethodHandleForField(
     Thread* self,
     const DexFile::MethodHandleItem& method_handle,
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index f97433c..e436b99 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -371,6 +371,9 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
+  mirror::MethodType* ResolveMethodType(uint32_t proto_idx,  ArtMethod* referrer)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Resolve a method handle with a given ID from the DexFile. The
   // result is not cached in the DexCache as the instance will only be
   // used once in most circumstances.
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index be157a3..2e776b0 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -66,8 +66,10 @@
   // Dex version 036 skipped because of an old dalvik bug on some versions of android where dex
   // files with that version number would erroneously be accepted and run.
   {'0', '3', '7', '\0'},
-  // Dex version 038: Android "O" and beyond.
-  {'0', '3', '8', '\0'}
+  // Dex version 038: Android "O".
+  {'0', '3', '8', '\0'},
+  // Dex verion 039: Beyond Android "O".
+  {'0', '3', '9', '\0'},
 };
 
 uint32_t DexFile::CalculateChecksum() const {
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index ac91d52..9c5fd10 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -47,7 +47,7 @@
   static const uint32_t kClassDefinitionOrderEnforcedVersion = 37;
 
   static const uint8_t kDexMagic[];
-  static constexpr size_t kNumDexVersions = 3;
+  static constexpr size_t kNumDexVersions = 4;
   static constexpr size_t kDexVersionLen = 4;
   static const uint8_t kDexMagicVersions[kNumDexVersions][kDexVersionLen];
 
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index a7bf59e..67cd428 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -75,7 +75,7 @@
   "AAACAAAAQAEAAAEgAAACAAAAVAEAAAYgAAACAAAAiAEAAAEQAAABAAAAqAEAAAIgAAAPAAAArgEA"
   "AAMgAAACAAAAiAIAAAQgAAADAAAAlAIAAAAgAAACAAAAqwIAAAAQAAABAAAAxAIAAA==";
 
-// kRawDex38 and 39 are dex'ed versions of the following Java source :
+// kRawDex{38,39,40,41} are dex'ed versions of the following Java source :
 //
 // public class Main {
 //     public static void main(String[] foo) {
@@ -108,6 +108,30 @@
   "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC"
   "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA==";
 
+static const char kRawDex40[] =
+  "ZGV4CjA0MAC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI"
+  "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB"
+  "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA"
+  "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA"
+  "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB"
+  "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW"
+  "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA"
+  "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA"
+  "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC"
+  "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA==";
+
+static const char kRawDex41[] =
+  "ZGV4CjA0MQC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI"
+  "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB"
+  "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA"
+  "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA"
+  "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB"
+  "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW"
+  "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA"
+  "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA"
+  "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC"
+  "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA==";
+
 static const char kRawDexZeroLength[] =
   "UEsDBAoAAAAAAOhxAkkAAAAAAAAAAAAAAAALABwAY2xhc3Nlcy5kZXhVVAkAA2QNoVdnDaFXdXgL"
   "AAEE5AMBAASIEwAAUEsBAh4DCgAAAAAA6HECSQAAAAAAAAAAAAAAAAsAGAAAAAAAAAAAAKCBAAAA"
@@ -323,10 +347,31 @@
   EXPECT_EQ(38u, header.GetVersion());
 }
 
-TEST_F(DexFileTest, Version39Rejected) {
+TEST_F(DexFileTest, Version39Accepted) {
+  ScratchFile tmp;
+  std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kRawDex39, tmp.GetFilename().c_str()));
+  ASSERT_TRUE(raw.get() != nullptr);
+
+  const DexFile::Header& header = raw->GetHeader();
+  EXPECT_EQ(39u, header.GetVersion());
+}
+
+TEST_F(DexFileTest, Version40Rejected) {
   ScratchFile tmp;
   const char* location = tmp.GetFilename().c_str();
-  DecodeAndWriteDexFile(kRawDex39, location);
+  DecodeAndWriteDexFile(kRawDex40, location);
+
+  ScopedObjectAccess soa(Thread::Current());
+  static constexpr bool kVerifyChecksum = true;
+  std::string error_msg;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  ASSERT_FALSE(DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files));
+}
+
+TEST_F(DexFileTest, Version41Rejected) {
+  ScratchFile tmp;
+  const char* location = tmp.GetFilename().c_str();
+  DecodeAndWriteDexFile(kRawDex41, location);
 
   ScopedObjectAccess soa(Thread::Current());
   static constexpr bool kVerifyChecksum = true;
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 9a17576..2f28dff 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -135,6 +135,8 @@
     kIndexVtableOffset,       // vtable offset (for static linked methods)
     kIndexMethodAndProtoRef,  // method and a proto reference index (for invoke-polymorphic)
     kIndexCallSiteRef,        // call site reference index
+    kIndexMethodHandleRef,    // constant method handle reference index
+    kIndexProtoRef,           // prototype reference index
   };
 
   enum Flags : uint8_t {
@@ -195,7 +197,9 @@
     kVerifyRuntimeOnly        = 0x0200000,
     kVerifyError              = 0x0400000,
     kVerifyRegHPrototype      = 0x0800000,
-    kVerifyRegBCallSite       = 0x1000000
+    kVerifyRegBCallSite       = 0x1000000,
+    kVerifyRegBMethodHandle   = 0x2000000,
+    kVerifyRegBPrototype      = 0x4000000,
   };
 
   // Collect the enums in a struct for better locality.
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index d0a4ae5..ef83bdc 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -273,8 +273,8 @@
   V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, 0, kVerifyRegBMethod | kVerifyVarArgRangeNonZero | kVerifyRegHPrototype) \
   V(0xFC, INVOKE_CUSTOM, "invoke-custom", k35c, kIndexCallSiteRef, kContinue | kThrow, 0, kVerifyRegBCallSite | kVerifyVarArg) \
   V(0xFD, INVOKE_CUSTOM_RANGE, "invoke-custom/range", k3rc, kIndexCallSiteRef, kContinue | kThrow, 0, kVerifyRegBCallSite | kVerifyVarArgRange) \
-  V(0xFE, UNUSED_FE, "unused-fe", k10x, kIndexUnknown, 0, 0, kVerifyError) \
-  V(0xFF, UNUSED_FF, "unused-ff", k10x, kIndexUnknown, 0, 0, kVerifyError)
+  V(0xFE, CONST_METHOD_HANDLE, "const-method-handle", k21c, kIndexMethodHandleRef, kContinue | kThrow, 0, kVerifyRegA | kVerifyRegBMethodHandle) \
+  V(0xFF, CONST_METHOD_TYPE, "const-method-type", k21c, kIndexProtoRef, kContinue | kThrow, 0, kVerifyRegA | kVerifyRegBPrototype)
 
 #define DEX_INSTRUCTION_FORMAT_LIST(V) \
   V(k10x) \
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 281dfd9..7c912d0 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -139,7 +139,8 @@
                         sizeof(void*) * kLockLevelCount);
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, flip_function, method_verifier, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, method_verifier, thread_local_mark_stack, sizeof(void*));
-    EXPECT_OFFSET_DIFF(Thread, tlsPtr_.thread_local_mark_stack, Thread, wait_mutex_, sizeof(void*),
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_mark_stack, async_exception, sizeof(void*));
+    EXPECT_OFFSET_DIFF(Thread, tlsPtr_.async_exception, Thread, wait_mutex_, sizeof(void*),
                        thread_tlsptr_end);
   }
 
diff --git a/runtime/image.cc b/runtime/image.cc
index 0236f47..aae572b 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -26,7 +26,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '4', '8', '\0' };  // Map boot image tables.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '4', 'A', '\0' };  // VarHandle fence intrinsics
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index bf287b1..7daf01c 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -278,7 +278,9 @@
 
   void AssertEmpty() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void Dump(std::ostream& os) const REQUIRES_SHARED(Locks::mutator_lock_);
+  void Dump(std::ostream& os) const
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::alloc_tracker_lock_);
 
   // Return the #of entries in the entire table.  This includes holes, and
   // so may be larger than the actual number of "live" entries.
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index c345013..5938113 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -594,6 +594,10 @@
                          uint16_t inst_data,
                          JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
+  // Make sure to check for async exceptions
+  if (UNLIKELY(self->ObserveAsyncException())) {
+    return false;
+  }
   // Invoke-polymorphic instructions always take a receiver. i.e, they are never static.
   const uint32_t vRegC = (is_range) ? inst->VRegC_4rcc() : inst->VRegC_45cc();
   const int invoke_method_idx = (is_range) ? inst->VRegB_4rcc() : inst->VRegB_45cc();
@@ -899,6 +903,10 @@
                     uint16_t inst_data,
                     JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
+  // Make sure to check for async exceptions
+  if (UNLIKELY(self->ObserveAsyncException())) {
+    return false;
+  }
   // invoke-custom is not supported in transactions. In transactions
   // there is a limited set of types supported. invoke-custom allows
   // running arbitrary code and instantiating arbitrary types.
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 50bd7e7..1c79619 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -169,6 +169,10 @@
                             const Instruction* inst,
                             uint16_t inst_data,
                             JValue* result) {
+  // Make sure to check for async exceptions before anything else.
+  if (UNLIKELY(self->ObserveAsyncException())) {
+    return false;
+  }
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
   const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
   ObjPtr<mirror::Object> receiver = (type == kStatic) ? nullptr : shadow_frame.GetVRegReference(vregC);
@@ -202,6 +206,25 @@
   }
 }
 
+static inline mirror::MethodHandle* ResolveMethodHandle(uint32_t method_handle_index,
+                                                        ArtMethod* referrer)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  return class_linker->ResolveMethodHandle(method_handle_index, referrer);
+}
+
+static inline mirror::MethodType* ResolveMethodType(Thread* self,
+                                                    uint32_t method_type_index,
+                                                    ArtMethod* referrer)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  const DexFile* dex_file = referrer->GetDexFile();
+  StackHandleScope<2> hs(self);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(referrer->GetClassLoader()));
+  return class_linker->ResolveMethodType(*dex_file, method_type_index, dex_cache, class_loader);
+}
+
 // Performs a signature polymorphic invoke (invoke-polymorphic/invoke-polymorphic-range).
 template<bool is_range>
 bool DoInvokePolymorphic(Thread* self,
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
index 74e6cd2..26de6b4 100644
--- a/runtime/interpreter/interpreter_intrinsics.cc
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -323,6 +323,25 @@
   return true;
 }
 
+#define VARHANDLE_FENCE_INTRINSIC(name, std_memory_operation)   \
+static ALWAYS_INLINE bool name(ShadowFrame* /* shadow_frame */, \
+                               const Instruction* /* inst */,   \
+                               uint16_t /* inst_data */,        \
+                               JValue* /* result_register */)   \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                     \
+    std::atomic_thread_fence(std_memory_operation);             \
+    return true;                                                \
+}
+
+// The VarHandle fence methods are static (unlike sun.misc.Unsafe versions).
+// The fences for the LoadLoadFence and StoreStoreFence are stronger
+// than strictly required, but the impact should be marginal.
+VARHANDLE_FENCE_INTRINSIC(MterpVarHandleFullFence, std::memory_order_seq_cst)
+VARHANDLE_FENCE_INTRINSIC(MterpVarHandleAcquireFence, std::memory_order_acquire)
+VARHANDLE_FENCE_INTRINSIC(MterpVarHandleReleaseFence, std::memory_order_release)
+VARHANDLE_FENCE_INTRINSIC(MterpVarHandleLoadLoadFence, std::memory_order_acquire)
+VARHANDLE_FENCE_INTRINSIC(MterpVarHandleStoreStoreFence, std::memory_order_release)
+
 // Macro to help keep track of what's left to implement.
 #define UNIMPLEMENTED_CASE(name)    \
     case Intrinsics::k##name:       \
@@ -470,6 +489,11 @@
     UNIMPLEMENTED_CASE(ReferenceGetReferent /* ()Ljava/lang/Object; */)
     UNIMPLEMENTED_CASE(IntegerValueOf /* (I)Ljava/lang/Integer; */)
     UNIMPLEMENTED_CASE(ThreadInterrupted /* ()Z */)
+    INTRINSIC_CASE(VarHandleFullFence)
+    INTRINSIC_CASE(VarHandleAcquireFence)
+    INTRINSIC_CASE(VarHandleReleaseFence)
+    INTRINSIC_CASE(VarHandleLoadLoadFence)
+    INTRINSIC_CASE(VarHandleStoreStoreFence)
     case Intrinsics::kNone:
       res = false;
       break;
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 850419b..74d7901 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -102,6 +102,13 @@
     }                                                                                          \
   } while (false)
 
+#define HANDLE_ASYNC_EXCEPTION()                                                               \
+  if (UNLIKELY(self->ObserveAsyncException())) {                                               \
+    HANDLE_PENDING_EXCEPTION();                                                                \
+    break;                                                                                     \
+  }                                                                                            \
+  do {} while (false)
+
 #define HANDLE_BACKWARD_BRANCH(offset)                                                         \
   do {                                                                                         \
     if (IsBackwardBranch(offset)) {                                                            \
@@ -525,8 +532,33 @@
         }
         break;
       }
+      case Instruction::CONST_METHOD_HANDLE: {
+        PREAMBLE();
+        ObjPtr<mirror::MethodHandle> mh =
+            Runtime::Current()->GetClassLinker()->ResolveMethodHandle(inst->VRegB_21c(), method);
+        if (UNLIKELY(mh == nullptr)) {
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), mh.Ptr());
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
+      case Instruction::CONST_METHOD_TYPE: {
+        PREAMBLE();
+        ObjPtr<mirror::MethodType> mt =
+            Runtime::Current()->GetClassLinker()->ResolveMethodType(inst->VRegB_21c(), method);
+        if (UNLIKELY(mt == nullptr)) {
+          HANDLE_PENDING_EXCEPTION();
+        } else {
+          shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), mt.Ptr());
+          inst = inst->Next_2xx();
+        }
+        break;
+      }
       case Instruction::MONITOR_ENTER: {
         PREAMBLE();
+        HANDLE_ASYNC_EXCEPTION();
         ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
         if (UNLIKELY(obj == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
@@ -539,6 +571,7 @@
       }
       case Instruction::MONITOR_EXIT: {
         PREAMBLE();
+        HANDLE_ASYNC_EXCEPTION();
         ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
         if (UNLIKELY(obj == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
@@ -686,6 +719,7 @@
       }
       case Instruction::THROW: {
         PREAMBLE();
+        HANDLE_ASYNC_EXCEPTION();
         ObjPtr<mirror::Object> exception =
             shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
         if (UNLIKELY(exception == nullptr)) {
@@ -704,6 +738,7 @@
       }
       case Instruction::GOTO: {
         PREAMBLE();
+        HANDLE_ASYNC_EXCEPTION();
         int8_t offset = inst->VRegA_10t(inst_data);
         BRANCH_INSTRUMENTATION(offset);
         inst = inst->RelativeAt(offset);
@@ -712,6 +747,7 @@
       }
       case Instruction::GOTO_16: {
         PREAMBLE();
+        HANDLE_ASYNC_EXCEPTION();
         int16_t offset = inst->VRegA_20t();
         BRANCH_INSTRUMENTATION(offset);
         inst = inst->RelativeAt(offset);
@@ -720,6 +756,7 @@
       }
       case Instruction::GOTO_32: {
         PREAMBLE();
+        HANDLE_ASYNC_EXCEPTION();
         int32_t offset = inst->VRegA_30t();
         BRANCH_INSTRUMENTATION(offset);
         inst = inst->RelativeAt(offset);
@@ -2435,10 +2472,8 @@
         inst = inst->Next_2xx();
         break;
       case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
+      case Instruction::UNUSED_79 ... Instruction::UNUSED_7A:
       case Instruction::UNUSED_F3 ... Instruction::UNUSED_F9:
-      case Instruction::UNUSED_FE ... Instruction::UNUSED_FF:
-      case Instruction::UNUSED_79:
-      case Instruction::UNUSED_7A:
         UnexpectedOpcode(inst, shadow_frame);
     }
   } while (!interpret_one_instruction);
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 6c24753..2318125 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -145,9 +145,14 @@
 
 extern "C" size_t MterpShouldSwitchInterpreters()
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const instrumentation::Instrumentation* const instrumentation =
-      Runtime::Current()->GetInstrumentation();
-  return instrumentation->NonJitProfilingActive() || Dbg::IsDebuggerActive();
+  const Runtime* const runtime = Runtime::Current();
+  const instrumentation::Instrumentation* const instrumentation = runtime->GetInstrumentation();
+  return instrumentation->NonJitProfilingActive() ||
+      Dbg::IsDebuggerActive() ||
+      // An async exception has been thrown. We need to go to the switch interpreter. MTerp doesn't
+      // know how to deal with these so we could end up never dealing with it if we are in an
+      // infinite loop.
+      UNLIKELY(Thread::Current()->IsAsyncExceptionPending());
 }
 
 
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 50aabdc..b767b19 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -123,7 +123,9 @@
 
   void DumpReferenceTables(std::ostream& os)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!Locks::jni_globals_lock_, !Locks::jni_weak_globals_lock_);
+      REQUIRES(!Locks::jni_globals_lock_,
+               !Locks::jni_weak_globals_lock_,
+               !Locks::alloc_tracker_lock_);
 
   bool SetCheckJniEnabled(bool enabled);
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 40a5212..ae08fe2 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -534,7 +534,7 @@
 
 static void ClearMethodCounter(ArtMethod* method, bool was_warm) {
   if (was_warm) {
-    method->AddAccessFlags(kAccPreviouslyWarm);
+    method->SetPreviouslyWarm();
   }
   // We reset the counter to 1 so that the profile knows that the method was executed at least once.
   // This is required for layout purposes.
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index af6a45f..2bf8d8b 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -349,7 +349,7 @@
           // Mark startup methods as hot if they have more than hot_method_sample_threshold
           // samples. This means they will get compiled by the compiler driver.
           if (method.GetProfilingInfo(kRuntimePointerSize) != nullptr ||
-              (method.GetAccessFlags() & kAccPreviouslyWarm) != 0 ||
+              method.PreviouslyWarm() ||
               counter >= hot_method_sample_threshold) {
             hot_methods->AddReference(method.GetDexFile(), method.GetDexMethodIndex());
           } else if (counter != 0) {
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index af933ae..2f6c5dc 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -45,7 +45,8 @@
   ~JNIEnvExt();
 
   void DumpReferenceTables(std::ostream& os)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::alloc_tracker_lock_);
 
   void SetCheckJniEnabled(bool enabled) REQUIRES(!Locks::jni_function_table_lock_);
 
@@ -53,7 +54,9 @@
   void PopFrame() REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<typename T>
-  T AddLocalReference(ObjPtr<mirror::Object> obj) REQUIRES_SHARED(Locks::mutator_lock_);
+  T AddLocalReference(ObjPtr<mirror::Object> obj)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::alloc_tracker_lock_);
 
   static Offset SegmentStateOffset(size_t pointer_size);
   static Offset LocalRefCookieOffset(size_t pointer_size);
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index 68ab4a4..4b790a0 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -79,6 +79,11 @@
 // virtual call.
 static constexpr uint32_t kAccSingleImplementation =  0x08000000;  // method (runtime)
 
+// Not currently used, except for intrinsic methods where these bits
+// are part of the intrinsic ordinal.
+static constexpr uint32_t kAccMayBeUnusedBits =       0x70000000;
+
+// Set by the compiler driver when compiling boot classes with instrinsic methods.
 static constexpr uint32_t kAccIntrinsic  =            0x80000000;  // method (runtime)
 
 // Special runtime-only flags.
@@ -89,8 +94,10 @@
 // class/ancestor overrides finalize()
 static constexpr uint32_t kAccClassIsFinalizable        = 0x80000000;
 
-static constexpr uint32_t kAccFlagsNotUsedByIntrinsic   = 0x00FFFFFF;
-static constexpr uint32_t kAccMaxIntrinsic              = 0x7F;
+// Continuous sequence of bits used to hold the ordinal of an intrinsic method. Flags
+// which overlap are not valid when kAccIntrinsic is set.
+static constexpr uint32_t kAccIntrinsicBits = kAccMayBeUnusedBits | kAccSingleImplementation |
+    kAccMustCountLocks | kAccCompileDontBother | kAccDefaultConflict | kAccPreviouslyWarm;
 
 // Valid (meaningful) bits for a field.
 static constexpr uint32_t kAccValidFieldFlags = kAccPublic | kAccPrivate | kAccProtected |
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index d85479a..7823413 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -1143,7 +1143,7 @@
   Handle<mirror::Object> h_obj(hs.NewHandle(obj));
 
   Runtime::Current()->GetRuntimeCallbacks()->ObjectWaitStart(h_obj, ms);
-  if (UNLIKELY(self->IsExceptionPending())) {
+  if (UNLIKELY(self->ObserveAsyncException() || self->IsExceptionPending())) {
     // See b/65558434 for information on handling of exceptions here.
     return;
   }
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index e6e588e..a6df27b 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -19,6 +19,8 @@
 #include "android-base/stringprintf.h"
 
 #include "base/mutex.h"
+#include "gc/allocation_record.h"
+#include "gc/heap.h"
 #include "indirect_reference_table.h"
 #include "mirror/array-inl.h"
 #include "mirror/array.h"
@@ -206,6 +208,54 @@
       }
     }
     os << StringPrintf("    %5d: ", idx) << ref << " " << className << extras << "\n";
+    if (runtime->GetHeap()->IsAllocTrackingEnabled()) {
+      MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
+
+      gc::AllocRecordObjectMap* records = runtime->GetHeap()->GetAllocationRecords();
+      DCHECK(records != nullptr);
+      // It's annoying that this is a list. But this code should be very uncommon to be executed.
+
+      auto print_stack = [&](ObjPtr<mirror::Object> to_print, const std::string& msg)
+          REQUIRES_SHARED(Locks::mutator_lock_)
+          REQUIRES(Locks::alloc_tracker_lock_) {
+        for (auto it = records->Begin(), end = records->End(); it != end; ++it) {
+          GcRoot<mirror::Object>& stack_for_object = it->first;
+          gc::AllocRecord& record = it->second;
+          if (stack_for_object.Read() == to_print.Ptr()) {
+            os << "          " << msg << "\n";
+            const gc::AllocRecordStackTrace* trace = record.GetStackTrace();
+            size_t depth = trace->GetDepth();
+            if (depth == 0) {
+              os << "            (No managed frames)\n";
+            } else {
+              for (size_t i = 0; i < depth; ++i) {
+                const gc::AllocRecordStackTraceElement& frame = trace->GetStackElement(i);
+                os << "            ";
+                if (frame.GetMethod() == nullptr) {
+                  os << "(missing method data)\n";
+                  continue;
+                }
+                os << frame.GetMethod()->PrettyMethod(true)
+                   << ":"
+                   << frame.ComputeLineNumber()
+                   << "\n";
+              }
+            }
+            break;
+          }
+        }
+      };
+      // Print the stack trace of the ref.
+      print_stack(ref, "Allocated at:");
+
+      // If it's a reference, see if we have data about the referent.
+      if (ref->IsReferenceInstance()) {
+        ObjPtr<mirror::Object> referent = ref->AsReference()->GetReferent();
+        if (referent != nullptr) {
+          print_stack(referent, "Referent allocated at:");
+        }
+      }
+    }
   }
 
   // Make a copy of the table and sort it, only adding non null and not cleared elements.
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index 010c6f8..6af5ca5 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -47,7 +47,9 @@
 
   size_t Size() const;
 
-  void Dump(std::ostream& os) REQUIRES_SHARED(Locks::mutator_lock_);
+  void Dump(std::ostream& os)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::alloc_tracker_lock_);
 
   void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -56,7 +58,8 @@
   typedef std::vector<GcRoot<mirror::Object>,
                       TrackingAllocator<GcRoot<mirror::Object>, kAllocatorTagReferenceTable>> Table;
   static void Dump(std::ostream& os, Table& entries)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::alloc_tracker_lock_);
   friend class IndirectReferenceTable;  // For Dump.
 
   std::string name_;
diff --git a/runtime/reference_table_test.cc b/runtime/reference_table_test.cc
index d830387..1e7fc3e 100644
--- a/runtime/reference_table_test.cc
+++ b/runtime/reference_table_test.cc
@@ -16,6 +16,8 @@
 
 #include "reference_table.h"
 
+#include <regex>
+
 #include "android-base/stringprintf.h"
 
 #include "art_method-inl.h"
@@ -30,6 +32,7 @@
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-current-inl.h"
+#include "well_known_classes.h"
 
 namespace art {
 
@@ -156,6 +159,7 @@
     rt.Dump(oss);
     EXPECT_NE(oss.str().find("java.lang.ref.WeakReference (referent is null)"), std::string::npos)
         << oss.str();
+    rt.Remove(empty_reference);
   }
 
   {
@@ -168,6 +172,86 @@
     EXPECT_NE(oss.str().find("java.lang.ref.WeakReference (referent is a java.lang.String)"),
               std::string::npos)
         << oss.str();
+    rt.Remove(non_empty_reference);
+  }
+
+  // Add two objects. Enable allocation tracking for the latter.
+  {
+    StackHandleScope<3> hs(soa.Self());
+    Handle<mirror::String> h_without_trace(hs.NewHandle(
+        mirror::String::AllocFromModifiedUtf8(soa.Self(), "Without")));
+
+    {
+      ScopedThreadSuspension sts(soa.Self(), ThreadState::kSuspended);
+      gc::AllocRecordObjectMap::SetAllocTrackingEnabled(true);
+    }
+
+    // To get a stack, actually make a call. Use substring, that's simple. Calling through JNI
+    // avoids having to create the low-level args array ourselves.
+    Handle<mirror::Object> h_with_trace;
+    {
+      jmethodID substr = soa.Env()->GetMethodID(WellKnownClasses::java_lang_String,
+                                                "substring",
+                                                "(II)Ljava/lang/String;");
+      ASSERT_TRUE(substr != nullptr);
+      jobject jobj = soa.Env()->AddLocalReference<jobject>(h_without_trace.Get());
+      ASSERT_TRUE(jobj != nullptr);
+      jobject result = soa.Env()->CallObjectMethod(jobj,
+                                                   substr,
+                                                   static_cast<jint>(0),
+                                                   static_cast<jint>(4));
+      ASSERT_TRUE(result != nullptr);
+      h_with_trace = hs.NewHandle(soa.Self()->DecodeJObject(result));
+    }
+
+    Handle<mirror::Object> h_ref;
+    {
+      jclass weak_ref_class = soa.Env()->FindClass("java/lang/ref/WeakReference");
+      ASSERT_TRUE(weak_ref_class != nullptr);
+      jmethodID init = soa.Env()->GetMethodID(weak_ref_class,
+                                              "<init>",
+                                              "(Ljava/lang/Object;)V");
+      ASSERT_TRUE(init != nullptr);
+      jobject referent = soa.Env()->AddLocalReference<jobject>(h_with_trace.Get());
+      jobject result = soa.Env()->NewObject(weak_ref_class, init, referent);
+      ASSERT_TRUE(result != nullptr);
+      h_ref = hs.NewHandle(soa.Self()->DecodeJObject(result));
+    }
+
+    rt.Add(h_without_trace.Get());
+    rt.Add(h_with_trace.Get());
+    rt.Add(h_ref.Get());
+
+    std::ostringstream oss;
+    rt.Dump(oss);
+
+    constexpr const char* kStackTracePattern =
+        R"(test reference table dump:\n)"
+        R"(  Last 3 entries \(of 3\):\n)"  // NOLINT
+        R"(        2: 0x[0-9a-f]* java.lang.ref.WeakReference \(referent is a java.lang.String\)\n)"  // NOLINT
+        R"(          Allocated at:\n)"
+        R"(            \(No managed frames\)\n)"  // NOLINT
+        R"(          Referent allocated at:\n)"
+        R"(            java.lang.String java.lang.String.fastSubstring\(int, int\):-2\n)"  // NOLINT
+        R"(            java.lang.String java.lang.String.substring\(int, int\):[0-9]*\n)"  // NOLINT
+        R"(        1: 0x[0-9a-f]* java.lang.String "With"\n)"
+        R"(          Allocated at:\n)"
+        R"(            java.lang.String java.lang.String.fastSubstring\(int, int\):-2\n)"  // NOLINT
+        R"(            java.lang.String java.lang.String.substring\(int, int\):[0-9]*\n)"  // NOLINT
+        R"(        0: 0x[0-9a-f]* java.lang.String "Without"\n)"
+        R"(  Summary:\n)"
+        R"(        2 of java.lang.String \(2 unique instances\)\n)"  // NOLINT
+        R"(        1 of java.lang.ref.WeakReference\n)";
+    std::regex stack_trace_regex(kStackTracePattern);
+    std::smatch stack_trace_match;
+    std::string str = oss.str();
+    bool found = std::regex_search(str, stack_trace_match, stack_trace_regex);
+    EXPECT_TRUE(found) << str;
+
+    {
+      ScopedThreadSuspension sts(soa.Self(), ThreadState::kSuspended);
+      gc::AllocRecordObjectMap::SetAllocTrackingEnabled(false);
+    }
   }
 }
 
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index f0b6ee4..b50879f 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -27,7 +27,12 @@
 
 #include <sstream>
 
-#include "android-base/stringprintf.h"
+#include <android-base/stringprintf.h>
+
+#if defined(ART_TARGET_ANDROID)
+#include <tombstoned/tombstoned.h>
+#endif
+
 #include "arch/instruction_set.h"
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
@@ -42,10 +47,6 @@
 #include "thread_list.h"
 #include "utils.h"
 
-#if defined(ART_TARGET_ANDROID)
-#include "tombstoned/tombstoned.h"
-#endif
-
 namespace art {
 
 static void DumpCmdLine(std::ostream& os) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 524e73d..2753bf7 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3475,6 +3475,10 @@
     visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&tlsPtr_.exception),
                        RootInfo(kRootNativeStack, thread_id));
   }
+  if (tlsPtr_.async_exception != nullptr) {
+    visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&tlsPtr_.async_exception),
+                       RootInfo(kRootNativeStack, thread_id));
+  }
   visitor->VisitRootIfNonNull(&tlsPtr_.monitor_enter_object, RootInfo(kRootNativeStack, thread_id));
   tlsPtr_.jni_env->locals.VisitRoots(visitor, RootInfo(kRootJNILocal, thread_id));
   tlsPtr_.jni_env->monitors.VisitRoots(visitor, RootInfo(kRootJNIMonitor, thread_id));
@@ -3694,6 +3698,34 @@
                                               method_type);
 }
 
+void Thread::SetAsyncException(ObjPtr<mirror::Throwable> new_exception) {
+  CHECK(new_exception != nullptr);
+  if (kIsDebugBuild) {
+    // Make sure we are in a checkpoint.
+    MutexLock mu(Thread::Current(), *Locks::thread_suspend_count_lock_);
+    CHECK(this == Thread::Current() || GetSuspendCount() >= 1)
+        << "It doesn't look like this was called in a checkpoint! this: "
+        << this << " count: " << GetSuspendCount();
+  }
+  tlsPtr_.async_exception = new_exception.Ptr();
+}
+
+bool Thread::ObserveAsyncException() {
+  DCHECK(this == Thread::Current());
+  if (tlsPtr_.async_exception != nullptr) {
+    if (tlsPtr_.exception != nullptr) {
+      LOG(WARNING) << "Overwriting pending exception with async exception. Pending exception is: "
+                   << tlsPtr_.exception->Dump();
+      LOG(WARNING) << "Async exception is " << tlsPtr_.async_exception->Dump();
+    }
+    tlsPtr_.exception = tlsPtr_.async_exception;
+    tlsPtr_.async_exception = nullptr;
+    return true;
+  } else {
+    return IsExceptionPending();
+  }
+}
+
 void Thread::SetException(ObjPtr<mirror::Throwable> new_exception) {
   CHECK(new_exception != nullptr);
   // TODO: DCHECK(!IsExceptionPending());
diff --git a/runtime/thread.h b/runtime/thread.h
index 2e4a3da..ab89778 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -401,6 +401,10 @@
     return tlsPtr_.exception != nullptr;
   }
 
+  bool IsAsyncExceptionPending() const {
+    return tlsPtr_.async_exception != nullptr;
+  }
+
   mirror::Throwable* GetException() const REQUIRES_SHARED(Locks::mutator_lock_) {
     return tlsPtr_.exception;
   }
@@ -412,10 +416,24 @@
 
   void SetException(ObjPtr<mirror::Throwable> new_exception) REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Set an exception that is asynchronously thrown from a different thread. This will be checked
+  // periodically and might overwrite the current 'Exception'. This can only be called from a
+  // checkpoint.
+  //
+  // The caller should also make sure that the thread has been deoptimized so that the exception
+  // could be detected on back-edges.
+  void SetAsyncException(ObjPtr<mirror::Throwable> new_exception)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   void ClearException() REQUIRES_SHARED(Locks::mutator_lock_) {
     tlsPtr_.exception = nullptr;
   }
 
+  // Move the current async-exception to the main exception. This should be called when the current
+  // thread is ready to deal with any async exceptions. Returns true if there is an async exception
+  // that needs to be dealt with, false otherwise.
+  bool ObserveAsyncException() REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Find catch block and perform long jump to appropriate exception handle
   NO_RETURN void QuickDeliverException() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -1518,7 +1536,8 @@
       thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr),
       mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr),
       thread_local_alloc_stack_end(nullptr),
-      flip_function(nullptr), method_verifier(nullptr), thread_local_mark_stack(nullptr) {
+      flip_function(nullptr), method_verifier(nullptr), thread_local_mark_stack(nullptr),
+      async_exception(nullptr) {
       std::fill(held_mutexes, held_mutexes + kLockLevelCount, nullptr);
     }
 
@@ -1675,6 +1694,9 @@
 
     // Thread-local mark stack for the concurrent copying collector.
     gc::accounting::AtomicStack<mirror::Object>* thread_local_mark_stack;
+
+    // The pending async-exception or null.
+    mirror::Throwable* async_exception;
   } tlsPtr_;
 
   // Guards the 'wait_monitor_' members.
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index cfdf20d..7246bae 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -43,6 +43,7 @@
 #include "mirror/class.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/method_handle_impl.h"
+#include "mirror/method_type.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "reg_type-inl.h"
@@ -431,7 +432,7 @@
         }
       }
       if ((verifier.encountered_failure_types_ & VerifyError::VERIFY_ERROR_LOCKING) != 0) {
-        method->AddAccessFlags(kAccMustCountLocks);
+        method->SetMustCountLocks();
       }
     }
   } else {
@@ -1169,6 +1170,15 @@
     case Instruction::kVerifyRegBWide:
       result = result && CheckWideRegisterIndex(inst->VRegB());
       break;
+    case Instruction::kVerifyRegBCallSite:
+      result = result && CheckCallSiteIndex(inst->VRegB());
+      break;
+    case Instruction::kVerifyRegBMethodHandle:
+      result = result && CheckMethodHandleIndex(inst->VRegB());
+      break;
+    case Instruction::kVerifyRegBPrototype:
+      result = result && CheckPrototypeIndex(inst->VRegB());
+      break;
   }
   switch (inst->GetVerifyTypeArgumentC()) {
     case Instruction::kVerifyRegC:
@@ -1260,6 +1270,16 @@
   return true;
 }
 
+inline bool MethodVerifier::CheckCallSiteIndex(uint32_t idx) {
+  uint32_t limit = dex_file_->NumCallSiteIds();
+  if (UNLIKELY(idx >= limit)) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad call site index " << idx << " (max "
+                                      << limit << ")";
+    return false;
+  }
+  return true;
+}
+
 inline bool MethodVerifier::CheckFieldIndex(uint32_t idx) {
   if (UNLIKELY(idx >= dex_file_->GetHeader().field_ids_size_)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad field index " << idx << " (max "
@@ -1278,6 +1298,16 @@
   return true;
 }
 
+inline bool MethodVerifier::CheckMethodHandleIndex(uint32_t idx) {
+  uint32_t limit = dex_file_->NumMethodHandles();
+  if (UNLIKELY(idx >= limit)) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad method handle index " << idx << " (max "
+                                      << limit << ")";
+    return false;
+  }
+  return true;
+}
+
 inline bool MethodVerifier::CheckNewInstance(dex::TypeIndex idx) {
   if (UNLIKELY(idx.index_ >= dex_file_->GetHeader().type_ids_size_)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad type index " << idx.index_ << " (max "
@@ -2320,6 +2350,18 @@
                                                          : reg_types_.JavaLangClass());
       break;
     }
+    case Instruction::CONST_METHOD_HANDLE:
+      work_line_->SetRegisterType<LockOp::kClear>(
+          this, inst->VRegA_21c(), reg_types_.JavaLangInvokeMethodHandle());
+      // TODO: add compiler support for const-method-{handle,type} (b/66890674)
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);
+      break;
+    case Instruction::CONST_METHOD_TYPE:
+      work_line_->SetRegisterType<LockOp::kClear>(
+          this, inst->VRegA_21c(), reg_types_.JavaLangInvokeMethodType());
+      // TODO: add compiler support for const-method-{handle,type} (b/66890674)
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);
+      break;
     case Instruction::MONITOR_ENTER:
       work_line_->PushMonitor(this, inst->VRegA_11x(), work_insn_idx_);
       // Check whether the previous instruction is a move-object with vAA as a source, creating
@@ -3454,7 +3496,6 @@
     /* These should never appear during verification. */
     case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
     case Instruction::UNUSED_F3 ... Instruction::UNUSED_F9:
-    case Instruction::UNUSED_FE ... Instruction::UNUSED_FF:
     case Instruction::UNUSED_79:
     case Instruction::UNUSED_7A:
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index da4102a..8afbe78 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -404,6 +404,10 @@
   /* Ensure that the wide register index is valid for this code item. */
   bool CheckWideRegisterIndex(uint32_t idx);
 
+  // Perform static checks on an instruction referencing a CallSite. All we do here is ensure that
+  // the call site index is in the valid range.
+  bool CheckCallSiteIndex(uint32_t idx);
+
   // Perform static checks on a field Get or set instruction. All we do here is ensure that the
   // field index is in the valid range.
   bool CheckFieldIndex(uint32_t idx);
@@ -412,6 +416,10 @@
   // method index is in the valid range.
   bool CheckMethodIndex(uint32_t idx);
 
+  // Perform static checks on an instruction referencing a constant method handle. All we do here
+  // is ensure that the method index is in the valid range.
+  bool CheckMethodHandleIndex(uint32_t idx);
+
   // Perform static checks on a "new-instance" instruction. Specifically, make sure the class
   // reference isn't for an array class.
   bool CheckNewInstance(dex::TypeIndex idx);
diff --git a/runtime/verifier/reg_type_cache-inl.h b/runtime/verifier/reg_type_cache-inl.h
index b57a2c8..197c976 100644
--- a/runtime/verifier/reg_type_cache-inl.h
+++ b/runtime/verifier/reg_type_cache-inl.h
@@ -19,6 +19,8 @@
 
 #include "class_linker.h"
 #include "mirror/class-inl.h"
+#include "mirror/method_handle_impl.h"
+#include "mirror/method_type.h"
 #include "mirror/string.h"
 #include "mirror/throwable.h"
 #include "reg_type.h"
@@ -131,6 +133,20 @@
   return *down_cast<const PreciseReferenceType*>(result);
 }
 
+inline const PreciseReferenceType& RegTypeCache::JavaLangInvokeMethodHandle() {
+  const RegType* result = &FromClass("Ljava/lang/invoke/MethodHandle;",
+                                     mirror::MethodHandle::StaticClass(), true);
+  DCHECK(result->IsPreciseReference());
+  return *down_cast<const PreciseReferenceType*>(result);
+}
+
+inline const PreciseReferenceType& RegTypeCache::JavaLangInvokeMethodType() {
+  const RegType* result = &FromClass("Ljava/lang/invoke/MethodType;",
+                                     mirror::MethodType::StaticClass(), true);
+  DCHECK(result->IsPreciseReference());
+  return *down_cast<const PreciseReferenceType*>(result);
+}
+
 inline const RegType&  RegTypeCache::JavaLangThrowable(bool precise) {
   const RegType* result = &FromClass("Ljava/lang/Throwable;",
                                      mirror::Throwable::GetJavaLangThrowable(), precise);
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index 7077c55..96eca05 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -126,6 +126,8 @@
 
   const PreciseReferenceType& JavaLangClass() REQUIRES_SHARED(Locks::mutator_lock_);
   const PreciseReferenceType& JavaLangString() REQUIRES_SHARED(Locks::mutator_lock_);
+  const PreciseReferenceType& JavaLangInvokeMethodHandle() REQUIRES_SHARED(Locks::mutator_lock_);
+  const PreciseReferenceType& JavaLangInvokeMethodType() REQUIRES_SHARED(Locks::mutator_lock_);
   const RegType& JavaLangThrowable(bool precise) REQUIRES_SHARED(Locks::mutator_lock_);
   const RegType& JavaLangObject(bool precise) REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/test/1929-exception-catch-exception/build b/test/1929-exception-catch-exception/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/1929-exception-catch-exception/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/1934-jvmti-signal-thread/expected.txt b/test/1934-jvmti-signal-thread/expected.txt
new file mode 100644
index 0000000..69a0e9e
--- /dev/null
+++ b/test/1934-jvmti-signal-thread/expected.txt
@@ -0,0 +1,27 @@
+Interrupt before start
+interrupting other thread before starting
+Caught exception java.lang.RuntimeException: JVMTI_ERROR_THREAD_NOT_ALIVE
+Stop before start
+stopping other thread before starting
+Caught exception java.lang.RuntimeException: JVMTI_ERROR_THREAD_NOT_ALIVE
+Interrupt recur
+Interrupting other thread recurring
+Other thread Interrupted. err: java.lang.Error: Interrupted!
+Stop Recur
+stopping other thread recurring
+Other thread Stopped by: java.lang.Error: AWESOME!
+Interrupt spinning
+Interrupting other thread spinning
+Other thread Interrupted.
+Stop spinning
+stopping other thread spinning
+Other thread Stopped by: java.lang.Error: AWESOME!
+Interrupt wait
+interrupting other thread waiting
+Other thread interrupted. err: java.lang.Error: Interrupted!
+Stop wait
+stopping other thread waiting
+Other thread Stopped by: java.lang.Error: AWESOME
+Stop in native
+stopping other thread
+Other thread Stopped by: java.lang.Error: AWESOME
diff --git a/test/1934-jvmti-signal-thread/info.txt b/test/1934-jvmti-signal-thread/info.txt
new file mode 100644
index 0000000..c8c9189
--- /dev/null
+++ b/test/1934-jvmti-signal-thread/info.txt
@@ -0,0 +1,3 @@
+Tests basic functions in the jvmti plugin.
+
+Tests that the GetBytecodes function works as expected.
diff --git a/test/1934-jvmti-signal-thread/run b/test/1934-jvmti-signal-thread/run
new file mode 100755
index 0000000..e92b873
--- /dev/null
+++ b/test/1934-jvmti-signal-thread/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-run "$@" --jvmti
diff --git a/test/1934-jvmti-signal-thread/signal_threads.cc b/test/1934-jvmti-signal-thread/signal_threads.cc
new file mode 100644
index 0000000..726a7a86
--- /dev/null
+++ b/test/1934-jvmti-signal-thread/signal_threads.cc
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pthread.h>
+
+#include <cstdio>
+#include <iostream>
+#include <vector>
+
+#include "android-base/logging.h"
+#include "jni.h"
+#include "jvmti.h"
+
+#include "scoped_local_ref.h"
+#include "scoped_primitive_array.h"
+
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
+
+namespace art {
+namespace Test1934SignalThreads {
+
+struct NativeMonitor {
+  jrawMonitorID continue_monitor;
+  bool should_continue;
+  jrawMonitorID start_monitor;
+  bool should_start;
+};
+
+extern "C" JNIEXPORT jlong JNICALL Java_art_Test1934_allocNativeMonitor(JNIEnv* env, jclass) {
+  NativeMonitor* mon;
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->Allocate(sizeof(NativeMonitor),
+                                                reinterpret_cast<unsigned char**>(&mon)))) {
+    return -1l;
+  }
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->CreateRawMonitor("test-1934 start",
+                                                        &mon->start_monitor))) {
+    return -1l;
+  }
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->CreateRawMonitor("test-1934 continue",
+                                                        &mon->continue_monitor))) {
+    return -1l;
+  }
+  mon->should_continue = false;
+  mon->should_start = false;
+  return static_cast<jlong>(reinterpret_cast<intptr_t>(mon));
+}
+
+extern "C" JNIEXPORT void Java_art_Test1934_nativeWaitForOtherThread(JNIEnv* env,
+                                                                     jclass,
+                                                                     jlong id) {
+  NativeMonitor* mon = reinterpret_cast<NativeMonitor*>(static_cast<intptr_t>(id));
+  // Start
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorEnter(mon->start_monitor))) {
+    return;
+  }
+  mon->should_start = true;
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->RawMonitorNotifyAll(mon->start_monitor))) {
+    JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorExit(mon->start_monitor));
+    return;
+  }
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorExit(mon->start_monitor))) {
+    return;
+  }
+
+  // Finish
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorEnter(mon->continue_monitor))) {
+    return;
+  }
+  while (!mon->should_continue) {
+    if (JvmtiErrorToException(env,
+                              jvmti_env,
+                              jvmti_env->RawMonitorWait(mon->continue_monitor, -1l))) {
+      JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorExit(mon->continue_monitor));
+      return;
+    }
+  }
+  JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorExit(mon->continue_monitor));
+}
+
+extern "C" JNIEXPORT void Java_art_Test1934_nativeDoInterleaved(JNIEnv* env,
+                                                                jclass,
+                                                                jlong id,
+                                                                jobject closure) {
+  NativeMonitor* mon = reinterpret_cast<NativeMonitor*>(static_cast<intptr_t>(id));
+  // Wait for start.
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorEnter(mon->start_monitor))) {
+    return;
+  }
+  while (!mon->should_start) {
+    if (JvmtiErrorToException(env,
+                              jvmti_env,
+                              jvmti_env->RawMonitorWait(mon->start_monitor, -1l))) {
+      return;
+    }
+  }
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorExit(mon->start_monitor))) {
+    return;
+  }
+
+  // Call closure.
+  ScopedLocalRef<jclass> runnable_klass(env, env->FindClass("java/lang/Runnable"));
+  if (env->ExceptionCheck()) {
+    return;
+  }
+  jmethodID doRun = env->GetMethodID(runnable_klass.get(), "run", "()V");
+  if (env->ExceptionCheck()) {
+    return;
+  }
+  env->CallVoidMethod(closure, doRun);
+
+  // Tell other thread to finish.
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorEnter(mon->continue_monitor))) {
+    return;
+  }
+  mon->should_continue = true;
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->RawMonitorNotifyAll(mon->continue_monitor))) {
+    JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorExit(mon->continue_monitor));
+    return;
+  }
+  JvmtiErrorToException(env, jvmti_env, jvmti_env->RawMonitorExit(mon->continue_monitor));
+}
+
+extern "C" JNIEXPORT void Java_art_Test1934_destroyNativeMonitor(JNIEnv*, jclass, jlong id) {
+  NativeMonitor* mon = reinterpret_cast<NativeMonitor*>(static_cast<intptr_t>(id));
+  jvmti_env->DestroyRawMonitor(mon->start_monitor);
+  jvmti_env->DestroyRawMonitor(mon->continue_monitor);
+  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(mon));
+}
+
+}  // namespace Test1934SignalThreads
+}  // namespace art
+
diff --git a/test/1934-jvmti-signal-thread/src/Main.java b/test/1934-jvmti-signal-thread/src/Main.java
new file mode 100644
index 0000000..539763c
--- /dev/null
+++ b/test/1934-jvmti-signal-thread/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    art.Test1934.run();
+  }
+}
diff --git a/test/1934-jvmti-signal-thread/src/art/Monitors.java b/test/1934-jvmti-signal-thread/src/art/Monitors.java
new file mode 100644
index 0000000..7fe2b60
--- /dev/null
+++ b/test/1934-jvmti-signal-thread/src/art/Monitors.java
@@ -0,0 +1,344 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.lang.reflect.Method;
+import java.util.concurrent.atomic.*;
+import java.util.function.Function;
+import java.util.stream.Stream;
+import java.util.Arrays;
+import java.util.Objects;
+
+public class Monitors {
+  public native static void setupMonitorEvents(
+      Class<?> method_klass,
+      Method monitor_contended_enter_event,
+      Method monitor_contended_entered_event,
+      Method monitor_wait_event,
+      Method monitor_waited_event,
+      Class<?> lock_klass,
+      Thread thr);
+  public native static void stopMonitorEvents();
+
+  public static class NamedLock {
+    public final String name;
+    private volatile int calledNotify;
+    public NamedLock(String name) {
+      this.name = name;
+      calledNotify = 0;
+    }
+
+    public String toString() {
+      return String.format("NamedLock[%s]", name);
+    }
+
+    public final void DoWait() throws Exception {
+      final int v = calledNotify;
+      while (v == calledNotify) {
+        wait();
+      }
+    }
+
+    public final void DoWait(long t) throws Exception {
+      final int v = calledNotify;
+      final long target = System.currentTimeMillis() + (t / 2);
+      while (v == calledNotify && (t < 0 || System.currentTimeMillis() < target)) {
+        wait(t);
+      }
+    }
+
+    public final void DoNotifyAll() throws Exception {
+      calledNotify++;
+      notifyAll();
+    }
+
+    public final void DoNotify() throws Exception {
+      calledNotify++;
+      notify();
+    }
+  }
+
+  public static final class MonitorUsage {
+    public final Object monitor;
+    public final Thread owner;
+    public final int entryCount;
+    public final Thread[] waiters;
+    public final Thread[] notifyWaiters;
+
+    public MonitorUsage(
+        Object monitor,
+        Thread owner,
+        int entryCount,
+        Thread[] waiters,
+        Thread[] notifyWaiters) {
+      this.monitor = monitor;
+      this.entryCount = entryCount;
+      this.owner = owner;
+      this.waiters = waiters;
+      this.notifyWaiters = notifyWaiters;
+    }
+
+    private static String toNameList(Thread[] ts) {
+      return Arrays.toString(Arrays.stream(ts).map((Thread t) -> t.getName()).toArray());
+    }
+
+    public String toString() {
+      return String.format(
+          "MonitorUsage{ monitor: %s, owner: %s, entryCount: %d, waiters: %s, notify_waiters: %s }",
+          monitor,
+          (owner != null) ? owner.getName() : "<NULL>",
+          entryCount,
+          toNameList(waiters),
+          toNameList(notifyWaiters));
+    }
+  }
+
+  public static native MonitorUsage getObjectMonitorUsage(Object monitor);
+  public static native Object getCurrentContendedMonitor(Thread thr);
+
+  public static class TestException extends Error {
+    public TestException() { super(); }
+    public TestException(String s) { super(s); }
+    public TestException(String s, Throwable c) { super(s, c); }
+  }
+
+  public static class LockController {
+    private static enum Action { HOLD, RELEASE, NOTIFY, NOTIFY_ALL, WAIT, TIMED_WAIT }
+
+    public final NamedLock lock;
+    public final long timeout;
+    private final AtomicStampedReference<Action> action;
+    private volatile Thread runner = null;
+    private volatile boolean started = false;
+    private volatile boolean held = false;
+    private static final AtomicInteger cnt = new AtomicInteger(0);
+    private volatile Throwable exe;
+
+    public LockController(NamedLock lock) {
+      this(lock, 10 * 1000);
+    }
+    public LockController(NamedLock lock, long timeout) {
+      this.lock = lock;
+      this.timeout = timeout;
+      this.action = new AtomicStampedReference(Action.HOLD, 0);
+      this.exe = null;
+    }
+
+    public boolean IsWorkerThread(Thread thd) {
+      return Objects.equals(runner, thd);
+    }
+
+    public boolean IsLocked() {
+      checkException();
+      return held;
+    }
+
+    public void checkException() {
+      if (exe != null) {
+        throw new TestException("Exception thrown by other thread!", exe);
+      }
+    }
+
+    private void setAction(Action a) {
+      int stamp = action.getStamp();
+      // Wait for it to be HOLD before updating.
+      while (!action.compareAndSet(Action.HOLD, a, stamp, stamp + 1)) {
+        stamp = action.getStamp();
+      }
+    }
+
+    public synchronized void suspendWorker() throws Exception {
+      checkException();
+      if (runner == null) {
+        throw new TestException("We don't have any runner holding  " + lock);
+      }
+      Suspension.suspend(runner);
+    }
+
+    public Object getWorkerContendedMonitor() throws Exception {
+      checkException();
+      if (runner == null) {
+        return null;
+      }
+      return getCurrentContendedMonitor(runner);
+    }
+
+    public synchronized void DoLock() {
+      if (IsLocked()) {
+        throw new Error("lock is already acquired or being acquired.");
+      }
+      if (runner != null) {
+        throw new Error("Already have thread!");
+      }
+      runner = new Thread(() -> {
+        started = true;
+        try {
+          synchronized (lock) {
+            held = true;
+            int[] stamp_h = new int[] { -1 };
+            Action cur_action = Action.HOLD;
+            try {
+              while (true) {
+                cur_action = action.get(stamp_h);
+                int stamp = stamp_h[0];
+                if (cur_action == Action.RELEASE) {
+                  // The other thread will deal with reseting action.
+                  break;
+                }
+                try {
+                  switch (cur_action) {
+                    case HOLD:
+                      Thread.yield();
+                      break;
+                    case NOTIFY:
+                      lock.DoNotify();
+                      break;
+                    case NOTIFY_ALL:
+                      lock.DoNotifyAll();
+                      break;
+                    case TIMED_WAIT:
+                      lock.DoWait(timeout);
+                      break;
+                    case WAIT:
+                      lock.DoWait();
+                      break;
+                    default:
+                      throw new Error("Unknown action " + action);
+                  }
+                } finally {
+                  // reset action back to hold if it isn't something else.
+                  action.compareAndSet(cur_action, Action.HOLD, stamp, stamp+1);
+                }
+              }
+            } catch (Exception e) {
+              throw new TestException("Got an error while performing action " + cur_action, e);
+            }
+          }
+        } finally {
+          held = false;
+          started = false;
+        }
+      }, "Locker thread " + cnt.getAndIncrement() + " for " + lock);
+      // Make sure we can get any exceptions this throws.
+      runner.setUncaughtExceptionHandler((t, e) -> { exe = e; });
+      runner.start();
+    }
+
+    public void waitForLockToBeHeld() throws Exception {
+      while (true) {
+        if (IsLocked() && Objects.equals(runner, Monitors.getObjectMonitorUsage(lock).owner)) {
+          return;
+        }
+      }
+    }
+
+    public synchronized void waitForNotifySleep() throws Exception {
+      if (runner == null) {
+        throw new Error("No thread trying to lock!");
+      }
+      do {
+        checkException();
+      } while (!started ||
+          !Arrays.asList(Monitors.getObjectMonitorUsage(lock).notifyWaiters).contains(runner));
+    }
+
+    public synchronized void waitForContendedSleep() throws Exception {
+      if (runner == null) {
+        throw new Error("No thread trying to lock!");
+      }
+      do {
+        checkException();
+      } while (!started ||
+          runner.getState() != Thread.State.BLOCKED ||
+          !Arrays.asList(Monitors.getObjectMonitorUsage(lock).waiters).contains(runner));
+    }
+
+    public synchronized void DoNotify() {
+      if (!IsLocked()) {
+        throw new Error("Not locked");
+      }
+      setAction(Action.NOTIFY);
+    }
+
+    public synchronized void DoNotifyAll() {
+      if (!IsLocked()) {
+        throw new Error("Not locked");
+      }
+      setAction(Action.NOTIFY_ALL);
+    }
+
+    public synchronized void DoTimedWait() throws Exception {
+      if (!IsLocked()) {
+        throw new Error("Not locked");
+      }
+      setAction(Action.TIMED_WAIT);
+    }
+
+    public synchronized void DoWait() throws Exception {
+      if (!IsLocked()) {
+        throw new Error("Not locked");
+      }
+      setAction(Action.WAIT);
+    }
+
+    public synchronized void interruptWorker() throws Exception {
+      if (!IsLocked()) {
+        throw new Error("Not locked");
+      }
+      runner.interrupt();
+    }
+
+    public synchronized void waitForActionToFinish() throws Exception {
+      checkException();
+      while (action.getReference() != Action.HOLD) { checkException(); }
+    }
+
+    public synchronized void DoUnlock() throws Exception {
+      Error throwing = null;
+      if (!IsLocked()) {
+        // We might just be racing some exception that was thrown by the worker thread. Cache the
+        // exception, we will throw one from the worker before this one.
+        throwing = new Error("Not locked!");
+      }
+      setAction(Action.RELEASE);
+      Thread run = runner;
+      runner = null;
+      while (held) {}
+      run.join();
+      action.set(Action.HOLD, 0);
+      // Make sure to throw any exception that occurred since it might not have unlocked due to our
+      // request.
+      checkException();
+      DoCleanup();
+      if (throwing != null) {
+        throw throwing;
+      }
+    }
+
+    public synchronized void DoCleanup() throws Exception {
+      if (runner != null) {
+        Thread run = runner;
+        runner = null;
+        while (held) {}
+        run.join();
+      }
+      action.set(Action.HOLD, 0);
+      exe = null;
+    }
+  }
+}
+
diff --git a/test/1934-jvmti-signal-thread/src/art/Suspension.java b/test/1934-jvmti-signal-thread/src/art/Suspension.java
new file mode 100644
index 0000000..16e62cc
--- /dev/null
+++ b/test/1934-jvmti-signal-thread/src/art/Suspension.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+public class Suspension {
+  // Suspends a thread using jvmti.
+  public native static void suspend(Thread thr);
+
+  // Resumes a thread using jvmti.
+  public native static void resume(Thread thr);
+
+  public native static boolean isSuspended(Thread thr);
+
+  public native static int[] suspendList(Thread... threads);
+  public native static int[] resumeList(Thread... threads);
+}
diff --git a/test/1934-jvmti-signal-thread/src/art/Test1934.java b/test/1934-jvmti-signal-thread/src/art/Test1934.java
new file mode 100644
index 0000000..552570a
--- /dev/null
+++ b/test/1934-jvmti-signal-thread/src/art/Test1934.java
@@ -0,0 +1,260 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.util.concurrent.Semaphore;
+import java.util.Objects;
+
+public class Test1934 {
+  public static final boolean PRINT_STACK_TRACE = false;
+
+  public static void run() throws Exception {
+    System.out.println("Interrupt before start");
+    testInterruptBeforeStart();
+
+    System.out.println("Stop before start");
+    testStopBeforeStart();
+
+    System.out.println("Interrupt recur");
+    testInterruptRecur();
+
+    System.out.println("Stop Recur");
+    testStopRecur();
+
+    System.out.println("Interrupt spinning");
+    testInterruptSpinning();
+
+    System.out.println("Stop spinning");
+    testStopSpinning();
+
+    System.out.println("Interrupt wait");
+    testInterruptWait();
+
+    System.out.println("Stop wait");
+    testStopWait();
+
+    System.out.println("Stop in native");
+    testStopInNative();
+  }
+
+  public static void testStopBeforeStart() throws Exception {
+    final Throwable[] out_err = new Throwable[] { null, };
+    final Object tst = new Object();
+    Thread target = new Thread(() -> { while (true) { } }, "waiting thread!");
+    target.setUncaughtExceptionHandler((t, e) -> { out_err[0] = e; });
+    System.out.println("stopping other thread before starting");
+    try {
+      Threads.stopThread(target, new Error("AWESOME"));
+      target.start();
+      target.join();
+      System.out.println("Other thread Stopped by: " + out_err[0]);
+      if (PRINT_STACK_TRACE && out_err[0] != null) {
+        out_err[0].printStackTrace();
+      }
+    } catch (Exception e) {
+      System.out.println("Caught exception " + e);
+    }
+  }
+
+  public static void testInterruptBeforeStart() throws Exception {
+    final Throwable[] out_err = new Throwable[] { null, };
+    final Object tst = new Object();
+    Thread target = new Thread(() -> { while (true) { } }, "waiting thread!");
+    target.setUncaughtExceptionHandler((t, e) -> { out_err[0] = e; });
+    System.out.println("interrupting other thread before starting");
+    try {
+      Threads.interruptThread(target);
+      target.start();
+      target.join();
+      System.out.println("Other thread interrupted. err: " + out_err[0]);
+      if (PRINT_STACK_TRACE && out_err[0] != null) {
+        out_err[0].printStackTrace();
+      }
+    } catch (Exception e) {
+      System.out.println("Caught exception " + e);
+    }
+  }
+
+  public static void testStopWait() throws Exception {
+    final Throwable[] out_err = new Throwable[] { null, };
+    final Object tst = new Object();
+    final Semaphore sem = new Semaphore(0);
+    Thread target = new Thread(() -> {
+      sem.release();
+      while (true) {
+        try {
+          synchronized (tst) {
+            tst.wait();
+          }
+        } catch (InterruptedException e) { throw new Error("Interrupted!", e); }
+      }
+    }, "waiting thread!");
+    target.setUncaughtExceptionHandler((t, e) -> { out_err[0] = e; });
+    target.start();
+    sem.acquire();
+    while (!Objects.equals(Monitors.getCurrentContendedMonitor(target), tst)) {}
+    System.out.println("stopping other thread waiting");
+    Threads.stopThread(target, new Error("AWESOME"));
+    target.join();
+    System.out.println("Other thread Stopped by: " + out_err[0]);
+    if (PRINT_STACK_TRACE && out_err[0] != null) {
+      out_err[0].printStackTrace();
+    }
+  }
+
+  public static void testInterruptWait() throws Exception {
+    final Throwable[] out_err = new Throwable[] { null, };
+    final Object tst = new Object();
+    final Semaphore sem = new Semaphore(0);
+    Thread target = new Thread(() -> {
+      sem.release();
+      while (true) {
+        try {
+          synchronized (tst) {
+            tst.wait();
+          }
+        } catch (InterruptedException e) { throw new Error("Interrupted!", e); }
+      }
+    }, "waiting thread!");
+    target.setUncaughtExceptionHandler((t, e) -> { out_err[0] = e; });
+    target.start();
+    sem.acquire();
+    while (!Objects.equals(Monitors.getCurrentContendedMonitor(target), tst)) {}
+    System.out.println("interrupting other thread waiting");
+    Threads.interruptThread(target);
+    target.join();
+    System.out.println("Other thread interrupted. err: " + out_err[0]);
+    if (PRINT_STACK_TRACE && out_err[0] != null) {
+      out_err[0].printStackTrace();
+    }
+  }
+
+  public static void doNothing() {}
+  public static native long allocNativeMonitor();
+  public static native void nativeWaitForOtherThread(long id);
+  public static native void nativeDoInterleaved(long id, Runnable op);
+  public static native void destroyNativeMonitor(long id);
+  public static void testStopInNative() throws Exception {
+    final Throwable[] out_err = new Throwable[] { null, };
+    final long native_monitor_id = allocNativeMonitor();
+    final Semaphore sem = new Semaphore(0);
+    Thread target = new Thread(() -> {
+      sem.release();
+      nativeWaitForOtherThread(native_monitor_id);
+      // We need to make sure we do something that can get the exception to be actually noticed.
+      doNothing();
+    }, "native waiting thread!");
+    target.setUncaughtExceptionHandler((t, e) -> { out_err[0] = e; });
+    target.start();
+    sem.acquire();
+    System.out.println("stopping other thread");
+    nativeDoInterleaved(
+        native_monitor_id,
+        () -> { Threads.stopThread(target, new Error("AWESOME")); });
+    target.join();
+    System.out.println("Other thread Stopped by: " + out_err[0]);
+    if (PRINT_STACK_TRACE && out_err[0] != null) {
+      out_err[0].printStackTrace();
+    }
+    destroyNativeMonitor(native_monitor_id);
+  }
+
+  public static void doRecur(Runnable r) {
+    if (r != null) {
+      r.run();
+    }
+    doRecur(r);
+  }
+
+  public static void testStopRecur() throws Exception {
+    final Throwable[] out_err = new Throwable[] { null, };
+    final Semaphore sem = new Semaphore(0);
+    Thread target = new Thread(() -> {
+      sem.release();
+      while (true) {
+        try {
+          doRecur(null);
+        } catch (StackOverflowError e) {}
+      }
+    }, "recuring thread!");
+    target.setUncaughtExceptionHandler((t, e) -> { out_err[0] = e; });
+    target.start();
+    sem.acquire();
+    System.out.println("stopping other thread recurring");
+    Threads.stopThread(target, new Error("AWESOME!"));
+    target.join();
+    System.out.println("Other thread Stopped by: " + out_err[0]);
+    if (PRINT_STACK_TRACE && out_err[0] != null) {
+      out_err[0].printStackTrace();
+    }
+  }
+
+  public static void testInterruptRecur() throws Exception {
+    final Throwable[] out_err = new Throwable[] { null, };
+    final Semaphore sem = new Semaphore(0);
+    Thread target = new Thread(() -> {
+      sem.release();
+      while (true) {
+        try {
+          doRecur(() -> {
+            if (Thread.currentThread().isInterrupted()) { throw new Error("Interrupted!"); }
+          });
+        } catch (StackOverflowError e) { }
+      }
+    }, "recuring thread!");
+    target.setUncaughtExceptionHandler((t, e) -> { out_err[0] = e; });
+    target.start();
+    sem.acquire();
+    System.out.println("Interrupting other thread recurring");
+    Threads.interruptThread(target);
+    target.join();
+    System.out.println("Other thread Interrupted. err: " + out_err[0]);
+    if (PRINT_STACK_TRACE && out_err[0] != null) {
+      out_err[0].printStackTrace();
+    }
+  }
+
+  public static void testStopSpinning() throws Exception {
+    final Throwable[] out_err = new Throwable[] { null, };
+    final Semaphore sem = new Semaphore(0);
+    Thread target = new Thread(() -> { sem.release(); while (true) {} }, "Spinning thread!");
+    target.setUncaughtExceptionHandler((t, e) -> { out_err[0] = e; });
+    target.start();
+    sem.acquire();
+    System.out.println("stopping other thread spinning");
+    Threads.stopThread(target, new Error("AWESOME!"));
+    target.join();
+    System.out.println("Other thread Stopped by: " + out_err[0]);
+    if (PRINT_STACK_TRACE && out_err[0] != null) {
+      out_err[0].printStackTrace();
+    }
+  }
+
+  public static void testInterruptSpinning() throws Exception {
+    final Semaphore sem = new Semaphore(0);
+    Thread target = new Thread(() -> {
+      sem.release();
+      while (!Thread.currentThread().isInterrupted()) { }
+    }, "Spinning thread!");
+    target.start();
+    sem.acquire();
+    System.out.println("Interrupting other thread spinning");
+    Threads.interruptThread(target);
+    target.join();
+    System.out.println("Other thread Interrupted.");
+  }
+}
diff --git a/test/1934-jvmti-signal-thread/src/art/Threads.java b/test/1934-jvmti-signal-thread/src/art/Threads.java
new file mode 100644
index 0000000..266813b
--- /dev/null
+++ b/test/1934-jvmti-signal-thread/src/art/Threads.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+public class Threads {
+  public static native void interruptThread(Thread t);
+  public static native void stopThread(Thread t, Throwable thr);
+}
diff --git a/test/442-checker-constant-folding/build b/test/442-checker-constant-folding/build
index 49292c9..947ec9a 100755
--- a/test/442-checker-constant-folding/build
+++ b/test/442-checker-constant-folding/build
@@ -20,4 +20,7 @@
 # Also disable desugar because it is missing in jack platform builds.
 export DESUGAR=false
 
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
 ./default-build "$@"
diff --git a/test/450-checker-types/build b/test/450-checker-types/build
index 49292c9..947ec9a 100755
--- a/test/450-checker-types/build
+++ b/test/450-checker-types/build
@@ -20,4 +20,7 @@
 # Also disable desugar because it is missing in jack platform builds.
 export DESUGAR=false
 
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
 ./default-build "$@"
diff --git a/test/458-checker-instruct-simplification/build b/test/458-checker-instruct-simplification/build
index 49292c9..947ec9a 100755
--- a/test/458-checker-instruct-simplification/build
+++ b/test/458-checker-instruct-simplification/build
@@ -20,4 +20,7 @@
 # Also disable desugar because it is missing in jack platform builds.
 export DESUGAR=false
 
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
 ./default-build "$@"
diff --git a/test/458-checker-instruct-simplification/smali/SmaliTests.smali b/test/458-checker-instruct-simplification/smali/SmaliTests.smali
index a8d7d94..d987398 100644
--- a/test/458-checker-instruct-simplification/smali/SmaliTests.smali
+++ b/test/458-checker-instruct-simplification/smali/SmaliTests.smali
@@ -331,70 +331,54 @@
 # Test simplification of the `~~var` pattern.
 # The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNot`.
 
-## CHECK-START: long SmaliTests.NotNot1(long) instruction_simplifier (before)
+## CHECK-START: long SmaliTests.$noinline$NotNot1(long) instruction_simplifier (before)
 ## CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
 ## CHECK-DAG:     <<Not1:j\d+>>     Not [<<Arg>>]
 ## CHECK-DAG:     <<Not2:j\d+>>     Not [<<Not1>>]
 ## CHECK-DAG:                       Return [<<Not2>>]
 
-## CHECK-START: long SmaliTests.NotNot1(long) instruction_simplifier (after)
+## CHECK-START: long SmaliTests.$noinline$NotNot1(long) instruction_simplifier (after)
 ## CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
 ## CHECK-DAG:                       Return [<<Arg>>]
 
-## CHECK-START: long SmaliTests.NotNot1(long) instruction_simplifier (after)
+## CHECK-START: long SmaliTests.$noinline$NotNot1(long) instruction_simplifier (after)
 ## CHECK-NOT:                       Not
 
-.method public static NotNot1(J)J
+.method public static $noinline$NotNot1(J)J
     .registers 4
     .param p0, "arg"    # J
 
     .prologue
-    sget-boolean v0, LMain;->doThrow:Z
 
-    # if (doThrow) throw new Error();
-    if-eqz v0, :cond_a
-    new-instance v0, Ljava/lang/Error;
-    invoke-direct {v0}, Ljava/lang/Error;-><init>()V
-    throw v0
-
-  :cond_a
     # return ~~arg
     not-long v0, p0
     not-long v0, v0
     return-wide v0
 .end method
 
-## CHECK-START: int SmaliTests.NotNot2(int) instruction_simplifier (before)
+## CHECK-START: int SmaliTests.$noinline$NotNot2(int) instruction_simplifier (before)
 ## CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Not1:i\d+>>     Not [<<Arg>>]
 ## CHECK-DAG:     <<Not2:i\d+>>     Not [<<Not1>>]
 ## CHECK-DAG:     <<Add:i\d+>>      Add [<<Not2>>,<<Not1>>]
 ## CHECK-DAG:                       Return [<<Add>>]
 
-## CHECK-START: int SmaliTests.NotNot2(int) instruction_simplifier (after)
+## CHECK-START: int SmaliTests.$noinline$NotNot2(int) instruction_simplifier (after)
 ## CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
 ## CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Not>>]
 ## CHECK-DAG:                       Return [<<Add>>]
 
-## CHECK-START: int SmaliTests.NotNot2(int) instruction_simplifier (after)
+## CHECK-START: int SmaliTests.$noinline$NotNot2(int) instruction_simplifier (after)
 ## CHECK:                           Not
 ## CHECK-NOT:                       Not
 
-.method public static NotNot2(I)I
+.method public static $noinline$NotNot2(I)I
     .registers 3
     .param p0, "arg"    # I
 
     .prologue
-    sget-boolean v1, LMain;->doThrow:Z
 
-    # if (doThrow) throw new Error();
-    if-eqz v1, :cond_a
-    new-instance v1, Ljava/lang/Error;
-    invoke-direct {v1}, Ljava/lang/Error;-><init>()V
-    throw v1
-
-  :cond_a
     # temp = ~arg; return temp + ~temp;
     not-int v0, p0
     not-int v1, v0
@@ -407,31 +391,31 @@
 # both negations can be removed but we only expect the simplifier to
 # remove the second.
 
-## CHECK-START: boolean SmaliTests.NotNotBool(boolean) instruction_simplifier (before)
+## CHECK-START: boolean SmaliTests.$noinline$NotNotBool(boolean) instruction_simplifier (before)
 ## CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
 ## CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
 ## CHECK-DAG:     <<Result:z\d+>>    InvokeStaticOrDirect
 ## CHECK-DAG:     <<NotResult:i\d+>> Xor [<<Result>>,<<Const1>>]
 ## CHECK-DAG:                        Return [<<NotResult>>]
 
-## CHECK-START: boolean SmaliTests.NotNotBool(boolean) instruction_simplifier (after)
+## CHECK-START: boolean SmaliTests.$noinline$NotNotBool(boolean) instruction_simplifier (after)
 ## CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
 ## CHECK-DAG:     <<Result:z\d+>>    InvokeStaticOrDirect
 ## CHECK-DAG:     <<NotResult:z\d+>> BooleanNot [<<Result>>]
 ## CHECK-DAG:                        Return [<<NotResult>>]
 
-## CHECK-START: boolean SmaliTests.NotNotBool(boolean) instruction_simplifier$after_inlining (before)
+## CHECK-START: boolean SmaliTests.$noinline$NotNotBool(boolean) instruction_simplifier$after_inlining (before)
 ## CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
 ## CHECK-DAG:     <<NotArg:z\d+>>    BooleanNot [<<Arg>>]
 ## CHECK-DAG:     <<NotNotArg:z\d+>> BooleanNot [<<NotArg>>]
 ## CHECK-DAG:                        Return [<<NotNotArg>>]
 
-## CHECK-START: boolean SmaliTests.NotNotBool(boolean) instruction_simplifier$after_inlining (after)
+## CHECK-START: boolean SmaliTests.$noinline$NotNotBool(boolean) instruction_simplifier$after_inlining (after)
 ## CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
 ## CHECK-DAG:     <<NotArg:z\d+>>    BooleanNot [<<Arg>>]
 ## CHECK-DAG:                        Return [<<Arg>>]
 
-## CHECK-START: boolean SmaliTests.NotNotBool(boolean) dead_code_elimination$final (after)
+## CHECK-START: boolean SmaliTests.$noinline$NotNotBool(boolean) dead_code_elimination$final (after)
 ## CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
 ## CHECK-DAG:                        Return [<<Arg>>]
 
@@ -447,20 +431,12 @@
 .end method
 
 
-.method public static NotNotBool(Z)Z
+.method public static $noinline$NotNotBool(Z)Z
     .registers 2
     .param p0, "arg"    # Z
 
     .prologue
-    sget-boolean v0, LMain;->doThrow:Z
 
-    # if (doThrow) throw new Error();
-    if-eqz v0, :cond_a
-    new-instance v0, Ljava/lang/Error;
-    invoke-direct {v0}, Ljava/lang/Error;-><init>()V
-    throw v0
-
-  :cond_a
     # return !Negate(arg)
     invoke-static {p0}, LSmaliTests;->NegateValue(Z)Z
     move-result v0
diff --git a/test/458-checker-instruct-simplification/src/Main.java b/test/458-checker-instruct-simplification/src/Main.java
index 5c36ce9..20858f5 100644
--- a/test/458-checker-instruct-simplification/src/Main.java
+++ b/test/458-checker-instruct-simplification/src/Main.java
@@ -18,8 +18,6 @@
 
 public class Main {
 
-  static boolean doThrow = false;
-
   public static void assertBooleanEquals(boolean expected, boolean result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -74,7 +72,6 @@
   /// CHECK-NOT:                        Add
 
   public static long $noinline$Add0(long arg) {
-    if (doThrow) { throw new Error(); }
     return 0 + arg;
   }
 
@@ -97,7 +94,6 @@
   /// CHECK-DAG:                        Return [<<Add>>]
 
   public static int $noinline$AddAddSubAddConst(int arg) {
-    if (doThrow) { throw new Error(); }
     return arg + 1 + 2 - 3 + 4;
   }
 
@@ -115,7 +111,6 @@
   /// CHECK-NOT:                      And
 
   public static int $noinline$AndAllOnes(int arg) {
-    if (doThrow) { throw new Error(); }
     return arg & -1;
   }
 
@@ -137,7 +132,6 @@
   /// CHECK-NOT:                       And
 
   public static int $noinline$UShr28And15(int arg) {
-    if (doThrow) { throw new Error(); }
     return (arg >>> 28) & 15;
   }
 
@@ -159,7 +153,6 @@
   /// CHECK-NOT:                       And
 
   public static long $noinline$UShr60And15(long arg) {
-    if (doThrow) { throw new Error(); }
     return (arg >>> 60) & 15;
   }
 
@@ -180,7 +173,6 @@
   /// CHECK-DAG:                       Return [<<And>>]
 
   public static int $noinline$UShr28And7(int arg) {
-    if (doThrow) { throw new Error(); }
     return (arg >>> 28) & 7;
   }
 
@@ -201,7 +193,6 @@
   /// CHECK-DAG:                       Return [<<And>>]
 
   public static long $noinline$UShr60And7(long arg) {
-    if (doThrow) { throw new Error(); }
     return (arg >>> 60) & 7;
   }
 
@@ -224,7 +215,6 @@
   /// CHECK-NOT:                       And
 
   public static int $noinline$Shr24And255(int arg) {
-    if (doThrow) { throw new Error(); }
     return (arg >> 24) & 255;
   }
 
@@ -247,7 +237,6 @@
   /// CHECK-NOT:                       And
 
   public static long $noinline$Shr56And255(long arg) {
-    if (doThrow) { throw new Error(); }
     return (arg >> 56) & 255;
   }
 
@@ -268,7 +257,6 @@
   /// CHECK-DAG:                       Return [<<And>>]
 
   public static int $noinline$Shr24And127(int arg) {
-    if (doThrow) { throw new Error(); }
     return (arg >> 24) & 127;
   }
 
@@ -289,7 +277,6 @@
   /// CHECK-DAG:                       Return [<<And>>]
 
   public static long $noinline$Shr56And127(long arg) {
-    if (doThrow) { throw new Error(); }
     return (arg >> 56) & 127;
   }
 
@@ -307,7 +294,6 @@
   /// CHECK-NOT:                      Div
 
   public static long $noinline$Div1(long arg) {
-    if (doThrow) { throw new Error(); }
     return arg / 1;
   }
 
@@ -326,7 +312,6 @@
   /// CHECK-NOT:                       Div
 
   public static int $noinline$DivN1(int arg) {
-    if (doThrow) { throw new Error(); }
     return arg / -1;
   }
 
@@ -344,7 +329,6 @@
   /// CHECK-NOT:                       Mul
 
   public static long $noinline$Mul1(long arg) {
-    if (doThrow) { throw new Error(); }
     return arg * 1;
   }
 
@@ -363,7 +347,6 @@
   /// CHECK-NOT:                       Mul
 
   public static int $noinline$MulN1(int arg) {
-    if (doThrow) { throw new Error(); }
     return arg * -1;
   }
 
@@ -383,7 +366,6 @@
   /// CHECK-NOT:                        Mul
 
   public static long $noinline$MulPowerOfTwo128(long arg) {
-    if (doThrow) { throw new Error(); }
     return arg * 128;
   }
 
@@ -404,7 +386,6 @@
   /// CHECK-DAG:                         Return [<<Mul>>]
 
   public static long $noinline$MulMulMulConst(long arg) {
-    if (doThrow) { throw new Error(); }
     return 10 * arg * 11 * 12;
   }
 
@@ -422,7 +403,6 @@
   /// CHECK-NOT:                       Or
 
   public static int $noinline$Or0(int arg) {
-    if (doThrow) { throw new Error(); }
     return arg | 0;
   }
 
@@ -439,7 +419,6 @@
   /// CHECK-NOT:                        Or
 
   public static long $noinline$OrSame(long arg) {
-    if (doThrow) { throw new Error(); }
     return arg | arg;
   }
 
@@ -457,7 +436,6 @@
   /// CHECK-NOT:                       Shl
 
   public static int $noinline$Shl0(int arg) {
-    if (doThrow) { throw new Error(); }
     return arg << 0;
   }
 
@@ -475,7 +453,6 @@
   /// CHECK-NOT:                       Shr
 
   public static long $noinline$Shr0(long arg) {
-    if (doThrow) { throw new Error(); }
     return arg >> 0;
   }
 
@@ -493,7 +470,6 @@
   /// CHECK-NOT:                       Shr
 
   public static long $noinline$Shr64(long arg) {
-    if (doThrow) { throw new Error(); }
     return arg >> 64;
   }
 
@@ -511,7 +487,6 @@
   /// CHECK-NOT:                       Sub
 
   public static long $noinline$Sub0(long arg) {
-    if (doThrow) { throw new Error(); }
     return arg - 0;
   }
 
@@ -530,7 +505,6 @@
   /// CHECK-NOT:                       Sub
 
   public static int $noinline$SubAliasNeg(int arg) {
-    if (doThrow) { throw new Error(); }
     return 0 - arg;
   }
 
@@ -549,7 +523,6 @@
   /// CHECK-DAG:                        Return [<<Sub>>]
 
   public static int $noinline$SubAddConst1(int arg) {
-    if (doThrow) { throw new Error(); }
     return 5 - arg + 6;
   }
 
@@ -568,7 +541,6 @@
   /// CHECK-DAG:                        Return [<<Sub>>]
 
   public static int $noinline$SubAddConst2(int arg) {
-    if (doThrow) { throw new Error(); }
     return 14 - (arg + 13);
   }
 
@@ -587,7 +559,6 @@
   /// CHECK-DAG:                        Return [<<Add>>]
 
   public static long $noinline$SubSubConst(long arg) {
-    if (doThrow) { throw new Error(); }
     return 17 - (18 - arg);
   }
 
@@ -605,7 +576,6 @@
   /// CHECK-NOT:                       UShr
 
   public static long $noinline$UShr0(long arg) {
-    if (doThrow) { throw new Error(); }
     return arg >>> 0;
   }
 
@@ -623,7 +593,6 @@
   /// CHECK-NOT:                       Xor
 
   public static int $noinline$Xor0(int arg) {
-    if (doThrow) { throw new Error(); }
     return arg ^ 0;
   }
 
@@ -642,7 +611,6 @@
   /// CHECK-NOT:                       Xor
 
   public static int $noinline$XorAllOnes(int arg) {
-    if (doThrow) { throw new Error(); }
     return arg ^ -1;
   }
 
@@ -670,7 +638,6 @@
   /// CHECK-DAG:                       Return [<<Neg>>]
 
   public static int $noinline$AddNegs1(int arg1, int arg2) {
-    if (doThrow) { throw new Error(); }
     return -arg1 + -arg2;
   }
 
@@ -716,7 +683,6 @@
   /// CHECK-DAG:                       Return [<<Or>>]
 
   public static int $noinline$AddNegs2(int arg1, int arg2) {
-    if (doThrow) { throw new Error(); }
     int temp1 = -arg1;
     int temp2 = -arg2;
     return (temp1 + temp2) | (temp1 + temp2);
@@ -756,7 +722,6 @@
   /// CHECK:                           Goto
 
   public static long $noinline$AddNegs3(long arg1, long arg2) {
-    if (doThrow) { throw new Error(); }
     long res = 0;
     long n_arg1 = -arg1;
     long n_arg2 = -arg2;
@@ -790,7 +755,6 @@
   /// CHECK-NOT:                       Add
 
   public static long $noinline$AddNeg1(long arg1, long arg2) {
-    if (doThrow) { throw new Error(); }
     return -arg1 + arg2;
   }
 
@@ -825,7 +789,6 @@
   /// CHECK-NOT:                       Sub
 
   public static long $noinline$AddNeg2(long arg1, long arg2) {
-    if (doThrow) { throw new Error(); }
     long temp = -arg2;
     return (arg1 + temp) | (arg1 + temp);
   }
@@ -849,7 +812,6 @@
   /// CHECK-NOT:                       Neg
 
   public static long $noinline$NegNeg1(long arg) {
-    if (doThrow) { throw new Error(); }
     return -(-arg);
   }
 
@@ -883,7 +845,6 @@
   /// CHECK:                           Return [<<Const0>>]
 
   public static int $noinline$NegNeg2(int arg) {
-    if (doThrow) { throw new Error(); }
     int temp = -arg;
     return temp + -temp;
   }
@@ -911,7 +872,6 @@
   /// CHECK-NOT:                       Sub
 
   public static long $noinline$NegNeg3(long arg) {
-    if (doThrow) { throw new Error(); }
     return 0 - -arg;
   }
 
@@ -938,7 +898,6 @@
   /// CHECK-NOT:                       Neg
 
   public static int $noinline$NegSub1(int arg1, int arg2) {
-    if (doThrow) { throw new Error(); }
     return -(arg1 - arg2);
   }
 
@@ -971,7 +930,6 @@
   /// CHECK-DAG:                       Return [<<Or>>]
 
   public static int $noinline$NegSub2(int arg1, int arg2) {
-    if (doThrow) { throw new Error(); }
     int temp = arg1 - arg2;
     return -temp | -temp;
   }
@@ -996,7 +954,6 @@
   /// CHECK-NOT:                       Xor
 
   public static long $noinline$NotNot1(long arg) {
-    if (doThrow) { throw new Error(); }
     return ~~arg;
   }
 
@@ -1022,7 +979,6 @@
   /// CHECK-NOT:                       Xor
 
   public static int $noinline$NotNot2(int arg) {
-    if (doThrow) { throw new Error(); }
     int temp = ~arg;
     return temp + ~temp;
   }
@@ -1050,7 +1006,6 @@
   /// CHECK-NOT:                       Sub
 
   public static int $noinline$SubNeg1(int arg1, int arg2) {
-    if (doThrow) { throw new Error(); }
     return -arg1 - arg2;
   }
 
@@ -1086,7 +1041,6 @@
   /// CHECK-NOT:                       Add
 
   public static int $noinline$SubNeg2(int arg1, int arg2) {
-    if (doThrow) { throw new Error(); }
     int temp = -arg1;
     return (temp - arg2) | (temp - arg2);
   }
@@ -1122,7 +1076,6 @@
   /// CHECK:                           Goto
 
   public static long $noinline$SubNeg3(long arg1, long arg2) {
-    if (doThrow) { throw new Error(); }
     long res = 0;
     long temp = -arg1;
     for (long i = 0; i < 1; i++) {
@@ -1146,7 +1099,6 @@
   /// CHECK-DAG:                       Return [<<True>>]
 
   public static boolean $noinline$EqualBoolVsIntConst(boolean arg) {
-    if (doThrow) { throw new Error(); }
     // Make calls that will be inlined to make sure the instruction simplifier
     // sees the simplification (dead code elimination will also try to simplify it).
     return (arg ? $inline$ReturnArg(0) : $inline$ReturnArg(1)) != 2;
@@ -1171,7 +1123,6 @@
   /// CHECK-DAG:                       Return [<<False>>]
 
   public static boolean $noinline$NotEqualBoolVsIntConst(boolean arg) {
-    if (doThrow) { throw new Error(); }
     // Make calls that will be inlined to make sure the instruction simplifier
     // sees the simplification (dead code elimination will also try to simplify it).
     return (arg ? $inline$ReturnArg(0) : $inline$ReturnArg(1)) == 2;
@@ -1232,7 +1183,6 @@
   }
 
   public static boolean $noinline$NotNotBool(boolean arg) {
-    if (doThrow) { throw new Error(); }
     return !(NegateValue(arg));
   }
 
@@ -1252,7 +1202,6 @@
   /// CHECK-NOT:                        Div
 
   public static float $noinline$Div2(float arg) {
-    if (doThrow) { throw new Error(); }
     return arg / 2.0f;
   }
 
@@ -1271,7 +1220,6 @@
   /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
   public static double $noinline$Div2(double arg) {
-    if (doThrow) { throw new Error(); }
     return arg / 2.0;
   }
 
@@ -1291,7 +1239,6 @@
   /// CHECK-NOT:                        Div
 
   public static float $noinline$DivMP25(float arg) {
-    if (doThrow) { throw new Error(); }
     return arg / -0.25f;
   }
 
@@ -1310,7 +1257,6 @@
   /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
   public static double $noinline$DivMP25(double arg) {
-    if (doThrow) { throw new Error(); }
     return arg / -0.25f;
   }
 
@@ -1330,7 +1276,6 @@
   /// CHECK-NEXT:                       Add [<<Arg>>,<<Shift>>]
 
   public static int $noinline$mulPow2Plus1(int arg) {
-    if (doThrow) { throw new Error(); }
     return arg * 9;
   }
 
@@ -1350,7 +1295,6 @@
   /// CHECK-NEXT:                       Sub [<<Shift>>,<<Arg>>]
 
   public static long $noinline$mulPow2Minus1(long arg) {
-    if (doThrow) { throw new Error(); }
     return arg * 31;
   }
 
@@ -1358,14 +1302,12 @@
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
-  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<Field>>,<<Const1>>]
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
   /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier$after_inlining (after)
-  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1373,7 +1315,6 @@
   /// CHECK-DAG:                        Return [<<Select>>]
 
   public static int $noinline$booleanFieldNotEqualOne() {
-    if (doThrow) { throw new Error(); }
     return (booleanField == $inline$true()) ? 13 : 54;
   }
 
@@ -1381,14 +1322,12 @@
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
-  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<NE:z\d+>>       Equal [<<Field>>,<<Const0>>]
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
   /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier$after_inlining (after)
-  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1396,7 +1335,6 @@
   /// CHECK-DAG:                        Return [<<Select>>]
 
   public static int $noinline$booleanFieldEqualZero() {
-    if (doThrow) { throw new Error(); }
     return (booleanField != $inline$false()) ? 13 : 54;
   }
 
@@ -1425,7 +1363,6 @@
   // LessThanOrEqual instructions.
 
   public static int $noinline$intConditionNotEqualOne(int i) {
-    if (doThrow) { throw new Error(); }
     return ((i > 42) == $inline$true()) ? 13 : 54;
   }
 
@@ -1454,7 +1391,6 @@
   // LessThanOrEqual instructions.
 
   public static int $noinline$intConditionEqualZero(int i) {
-    if (doThrow) { throw new Error(); }
     return ((i > 42) != $inline$false()) ? 13 : 54;
   }
 
@@ -1473,7 +1409,6 @@
   /// CHECK-DAG:                        Return [<<Select>>]
 
   public static int $noinline$floatConditionNotEqualOne(float f) {
-    if (doThrow) { throw new Error(); }
     return ((f > 42.0f) == true) ? 13 : 54;
   }
 
@@ -1490,7 +1425,6 @@
   /// CHECK-DAG:                        Return [<<Select>>]
 
   public static int $noinline$doubleConditionEqualZero(double d) {
-    if (doThrow) { throw new Error(); }
     return ((d > 42.0) != false) ? 13 : 54;
   }
 
@@ -1508,7 +1442,6 @@
   /// CHECK-NOT:                        TypeConversion
 
   public static int $noinline$intToDoubleToInt(int value) {
-    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by a conversion back.
     return (int) (double) value;
   }
@@ -1527,7 +1460,6 @@
   /// CHECK-NOT:                        TypeConversion
 
   public static String $noinline$intToDoubleToIntPrint(int value) {
-    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by a conversion back
     // with another use of the intermediate result.
     double d = (double) value;
@@ -1549,7 +1481,6 @@
   /// CHECK-NOT:                        TypeConversion
 
   public static int $noinline$byteToDoubleToInt(byte value) {
-    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion, use implicit conversion.
     return (int) (double) value;
   }
@@ -1570,7 +1501,6 @@
   /// CHECK-NOT:                        TypeConversion
 
   public static int $noinline$floatToDoubleToInt(float value) {
-    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion.
     return (int) (double) value;
   }
@@ -1586,7 +1516,6 @@
   /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
 
   public static String $noinline$floatToDoubleToIntPrint(float value) {
-    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion with
     // an extra use of the intermediate result.
     double d = (double) value;
@@ -1609,7 +1538,6 @@
   /// CHECK-NOT:                        TypeConversion
 
   public static short $noinline$byteToDoubleToShort(byte value) {
-    if (doThrow) { throw new Error(); }
     // Originally, this is byte->double->int->short. The first conversion is lossless,
     // so we merge this with the second one to byte->int which we omit as it's an implicit
     // conversion. Then we eliminate the resulting byte->short as an implicit conversion.
@@ -1633,7 +1561,6 @@
   /// CHECK-NOT:                        TypeConversion
 
   public static short $noinline$charToDoubleToShort(char value) {
-    if (doThrow) { throw new Error(); }
     // Originally, this is char->double->int->short. The first conversion is lossless,
     // so we merge this with the second one to char->int which we omit as it's an implicit
     // conversion. Then we are left with the resulting char->short conversion.
@@ -1653,7 +1580,6 @@
   /// CHECK-DAG:                        Return [<<Short>>]
 
   public static short $noinline$floatToIntToShort(float value) {
-    if (doThrow) { throw new Error(); }
     // Lossy FP to integral conversion followed by another conversion: no simplification.
     return (short) value;
   }
@@ -1671,7 +1597,6 @@
   /// CHECK-DAG:                        Return [<<Int>>]
 
   public static int $noinline$intToFloatToInt(int value) {
-    if (doThrow) { throw new Error(); }
     // Lossy integral to FP conversion followed another conversion: no simplification.
     return (int) (float) value;
   }
@@ -1689,7 +1614,6 @@
   /// CHECK-DAG:                        Return [<<Double>>]
 
   public static double $noinline$longToIntToDouble(long value) {
-    if (doThrow) { throw new Error(); }
     // Lossy long-to-int conversion followed an integral to FP conversion: no simplification.
     return (double) (int) value;
   }
@@ -1707,7 +1631,6 @@
   /// CHECK-DAG:                        Return [<<Long>>]
 
   public static long $noinline$longToIntToLong(long value) {
-    if (doThrow) { throw new Error(); }
     // Lossy long-to-int conversion followed an int-to-long conversion: no simplification.
     return (long) (int) value;
   }
@@ -1723,7 +1646,6 @@
   /// CHECK-DAG:                        Return [<<Arg>>]
 
   public static short $noinline$shortToCharToShort(short value) {
-    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion to original type.
     return (short) (char) value;
   }
@@ -1739,7 +1661,6 @@
   /// CHECK-DAG:                        Return [<<Arg>>]
 
   public static int $noinline$shortToLongToInt(short value) {
-    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion, use implicit conversion.
     return (int) (long) value;
   }
@@ -1756,7 +1677,6 @@
   /// CHECK-DAG:                        Return [<<Byte>>]
 
   public static byte $noinline$shortToCharToByte(short value) {
-    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion losing bits
     // from the original type. Simplify to use only one conversion.
     return (byte) (char) value;
@@ -1773,7 +1693,6 @@
   /// CHECK-DAG:      {{b\d+}}          TypeConversion [<<Char>>]
 
   public static String $noinline$shortToCharToBytePrint(short value) {
-    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion losing bits
     // from the original type with an extra use of the intermediate result.
     char c = (char) value;
@@ -1781,6 +1700,62 @@
     return "c=" + ((int) c) + ", b=" + ((int) b);  // implicit conversions.
   }
 
+  /// CHECK-START: long Main.$noinline$intAndSmallLongConstant(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:j\d+>>     LongConstant -12345678
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<And:j\d+>>      And [<<Long>>,<<Mask>>]
+  /// CHECK-DAG:                        Return [<<And>>]
+
+  /// CHECK-START: long Main.$noinline$intAndSmallLongConstant(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant -12345678
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Arg>>,<<Mask>>]
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<And>>]
+  /// CHECK-DAG:                        Return [<<Long>>]
+
+  public static long $noinline$intAndSmallLongConstant(int value) {
+    return value & -12345678L;  // Shall be simplified (constant is 32-bit).
+  }
+
+  /// CHECK-START: long Main.$noinline$intAndLargeLongConstant(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:j\d+>>     LongConstant 9876543210
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<And:j\d+>>      And [<<Long>>,<<Mask>>]
+  /// CHECK-DAG:                        Return [<<And>>]
+
+  /// CHECK-START: long Main.$noinline$intAndLargeLongConstant(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:j\d+>>     LongConstant 9876543210
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<And:j\d+>>      And [<<Long>>,<<Mask>>]
+  /// CHECK-DAG:                        Return [<<And>>]
+
+  public static long $noinline$intAndLargeLongConstant(int value) {
+    return value & 9876543210L;  // Shall not be simplified (constant is not 32-bit).
+  }
+
+  /// CHECK-START: long Main.$noinline$intShr28And15L(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Shift:i\d+>>    IntConstant 28
+  /// CHECK-DAG:      <<Mask:j\d+>>     LongConstant 15
+  /// CHECK-DAG:      <<Shifted:i\d+>>  Shr [<<Arg>>,<<Shift>>]
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Shifted>>]
+  /// CHECK-DAG:      <<And:j\d+>>      And [<<Long>>,<<Mask>>]
+  /// CHECK-DAG:                        Return [<<And>>]
+
+  /// CHECK-START: long Main.$noinline$intShr28And15L(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Shift:i\d+>>    IntConstant 28
+  /// CHECK-DAG:      <<Shifted:i\d+>>  UShr [<<Arg>>,<<Shift>>]
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Shifted>>]
+  /// CHECK-DAG:                        Return [<<Long>>]
+
+  public static long $noinline$intShr28And15L(int value) {
+    return (value >> 28) & 15L;
+  }
+
   /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:j\d+>>     LongConstant 255
@@ -1798,7 +1773,6 @@
   /// CHECK-NOT:                        And
 
   public static byte $noinline$longAnd0xffToByte(long value) {
-    if (doThrow) { throw new Error(); }
     return (byte) (value & 0xff);
   }
 
@@ -1818,7 +1792,6 @@
   /// CHECK-NOT:                        And
 
   public static char $noinline$intAnd0x1ffffToChar(int value) {
-    if (doThrow) { throw new Error(); }
     // Keeping all significant bits and one more.
     return (char) (value & 0x1ffff);
   }
@@ -1838,7 +1811,6 @@
   /// CHECK-DAG:                        Return [<<Short>>]
 
   public static short $noinline$intAnd0x17fffToShort(int value) {
-    if (doThrow) { throw new Error(); }
     // No simplification: clearing a significant bit.
     return (short) (value & 0x17fff);
   }
@@ -1857,7 +1829,6 @@
   /// CHECK-DAG:                        Return [<<Double>>]
 
   public static double $noinline$shortAnd0xffffToShortToDouble(short value) {
-    if (doThrow) { throw new Error(); }
     short same = (short) (value & 0xffff);
     return (double) same;
   }
@@ -1873,7 +1844,6 @@
   /// CHECK-DAG:      <<GE:z\d+>>       GreaterThanOrEqual [<<Arg>>,<<Const42>>]
 
   public static int $noinline$intReverseCondition(int i) {
-    if (doThrow) { throw new Error(); }
     return (42 > i) ? 13 : 54;
   }
 
@@ -1888,12 +1858,10 @@
   /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<Result>>,<<Const42>>]
 
   public static int $noinline$intReverseConditionNaN(int i) {
-    if (doThrow) { throw new Error(); }
     return (42 != Math.sqrt(i)) ? 13 : 54;
   }
 
   public static int $noinline$runSmaliTest(String name, boolean input) {
-    if (doThrow) { throw new Error(); }
     try {
       Class<?> c = Class.forName("SmaliTests");
       Method m = c.getMethod(name, boolean.class);
@@ -1904,7 +1872,6 @@
   }
 
   public static boolean $noinline$runSmaliTestBoolean(String name, boolean input) {
-    if (doThrow) { throw new Error(); }
     try {
       Class<?> c = Class.forName("SmaliTests");
       Method m = c.getMethod(name, boolean.class);
@@ -1915,7 +1882,6 @@
   }
 
   public static int $noinline$runSmaliTestInt(String name, int arg) {
-    if (doThrow) { throw new Error(); }
     try {
       Class<?> c = Class.forName("SmaliTests");
       Method m = c.getMethod(name, int.class);
@@ -1926,7 +1892,6 @@
   }
 
   public static long $noinline$runSmaliTestLong(String name, long arg) {
-    if (doThrow) { throw new Error(); }
     try {
       Class<?> c = Class.forName("SmaliTests");
       Method m = c.getMethod(name, long.class);
@@ -1951,7 +1916,6 @@
   /// CHECK-DAG:                        Return [<<Shl>>]
 
   public static int $noinline$intUnnecessaryShiftMasking(int value, int shift) {
-    if (doThrow) { throw new Error(); }
     return value << (shift & 31);
   }
 
@@ -1970,7 +1934,6 @@
   /// CHECK-DAG:                        Return [<<Shr>>]
 
   public static long $noinline$longUnnecessaryShiftMasking(long value, int shift) {
-    if (doThrow) { throw new Error(); }
     return value >> (shift & 63);
   }
 
@@ -1989,7 +1952,6 @@
   /// CHECK-DAG:                        Return [<<UShr>>]
 
   public static int $noinline$intUnnecessaryWiderShiftMasking(int value, int shift) {
-    if (doThrow) { throw new Error(); }
     return value >>> (shift & 0xff);
   }
 
@@ -2010,7 +1972,6 @@
   /// CHECK-DAG:                        Return [<<Shl>>]
 
   public static long $noinline$longSmallerShiftMasking(long value, int shift) {
-    if (doThrow) { throw new Error(); }
     return value << (shift & 3);
   }
 
@@ -2033,11 +1994,86 @@
   /// CHECK-DAG:                        Return [<<Add>>]
 
   public static int $noinline$otherUseOfUnnecessaryShiftMasking(int value, int shift) {
-    if (doThrow) { throw new Error(); }
     int temp = shift & 31;
     return (value >> temp) + temp;
   }
 
+  /// CHECK-START: int Main.$noinline$intUnnecessaryShiftModifications(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const32:i\d+>>  IntConstant 32
+  /// CHECK-DAG:      <<Const64:i\d+>>  IntConstant 64
+  /// CHECK-DAG:      <<Const96:i\d+>>  IntConstant 96
+  /// CHECK-DAG:      <<Const128:i\d+>> IntConstant 128
+  /// CHECK-DAG:      <<Or:i\d+>>       Or [<<Shift>>,<<Const32>>]
+  /// CHECK-DAG:      <<Xor:i\d+>>      Xor [<<Shift>>,<<Const64>>]
+  /// CHECK-DAG:      <<Add:i\d+>>      Add [<<Shift>>,<<Const96>>]
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<Shift>>,<<Const128>>]
+  /// CHECK-DAG:      <<Conv:b\d+>>     TypeConversion [<<Shift>>]
+  /// CHECK-DAG:                        Shl [<<Value>>,<<Or>>]
+  /// CHECK-DAG:                        Shr [<<Value>>,<<Xor>>]
+  /// CHECK-DAG:                        UShr [<<Value>>,<<Add>>]
+  /// CHECK-DAG:                        Shl [<<Value>>,<<Sub>>]
+  /// CHECK-DAG:                        Shr [<<Value>>,<<Conv>>]
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryShiftModifications(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:                        Shl [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Shr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        UShr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Shl [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Shr [<<Value>>,<<Shift>>]
+
+  public static int $noinline$intUnnecessaryShiftModifications(int value, int shift) {
+    int c128 = 128;
+    return (value << (shift | 32)) +
+           (value >> (shift ^ 64)) +
+           (value >>> (shift + 96)) +
+           (value << (shift - c128)) +  // Needs a named constant to generate Sub.
+           (value >> ((byte) shift));
+  }
+
+  /// CHECK-START: int Main.$noinline$intNecessaryShiftModifications(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const33:i\d+>>  IntConstant 33
+  /// CHECK-DAG:      <<Const65:i\d+>>  IntConstant 65
+  /// CHECK-DAG:      <<Const97:i\d+>>  IntConstant 97
+  /// CHECK-DAG:      <<Const129:i\d+>> IntConstant 129
+  /// CHECK-DAG:      <<Or:i\d+>>       Or [<<Shift>>,<<Const33>>]
+  /// CHECK-DAG:      <<Xor:i\d+>>      Xor [<<Shift>>,<<Const65>>]
+  /// CHECK-DAG:      <<Add:i\d+>>      Add [<<Shift>>,<<Const97>>]
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<Shift>>,<<Const129>>]
+  /// CHECK-DAG:                        Shl [<<Value>>,<<Or>>]
+  /// CHECK-DAG:                        Shr [<<Value>>,<<Xor>>]
+  /// CHECK-DAG:                        UShr [<<Value>>,<<Add>>]
+  /// CHECK-DAG:                        Shl [<<Value>>,<<Sub>>]
+
+  /// CHECK-START: int Main.$noinline$intNecessaryShiftModifications(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const33:i\d+>>  IntConstant 33
+  /// CHECK-DAG:      <<Const65:i\d+>>  IntConstant 65
+  /// CHECK-DAG:      <<Const97:i\d+>>  IntConstant 97
+  /// CHECK-DAG:      <<Const129:i\d+>> IntConstant 129
+  /// CHECK-DAG:      <<Or:i\d+>>       Or [<<Shift>>,<<Const33>>]
+  /// CHECK-DAG:      <<Xor:i\d+>>      Xor [<<Shift>>,<<Const65>>]
+  /// CHECK-DAG:      <<Add:i\d+>>      Add [<<Shift>>,<<Const97>>]
+  /// CHECK-DAG:      <<Sub:i\d+>>      Sub [<<Shift>>,<<Const129>>]
+  /// CHECK-DAG:                        Shl [<<Value>>,<<Or>>]
+  /// CHECK-DAG:                        Shr [<<Value>>,<<Xor>>]
+  /// CHECK-DAG:                        UShr [<<Value>>,<<Add>>]
+  /// CHECK-DAG:                        Shl [<<Value>>,<<Sub>>]
+
+  public static int $noinline$intNecessaryShiftModifications(int value, int shift) {
+    int c129 = 129;
+    return (value << (shift | 33)) +
+           (value >> (shift ^ 65)) +
+           (value >>> (shift + 97)) +
+           (value << (shift - c129));  // Needs a named constant to generate Sub.
+  }
+
   /// CHECK-START: int Main.$noinline$intAddSubSimplifyArg1(int, int) instruction_simplifier (before)
   /// CHECK:          <<X:i\d+>>        ParameterValue
   /// CHECK:          <<Y:i\d+>>        ParameterValue
@@ -2052,7 +2088,6 @@
   /// CHECK-DAG:                        Return [<<Y>>]
 
   public static int $noinline$intAddSubSimplifyArg1(int x, int y) {
-    if (doThrow) { throw new Error(); }
     int sum = x + y;
     return sum - x;
   }
@@ -2071,7 +2106,6 @@
   /// CHECK-DAG:                        Return [<<X>>]
 
   public static int $noinline$intAddSubSimplifyArg2(int x, int y) {
-    if (doThrow) { throw new Error(); }
     int sum = x + y;
     return sum - y;
   }
@@ -2090,7 +2124,6 @@
   /// CHECK-DAG:                        Return [<<X>>]
 
   public static int $noinline$intSubAddSimplifyLeft(int x, int y) {
-    if (doThrow) { throw new Error(); }
     int sub = x - y;
     return sub + y;
   }
@@ -2109,7 +2142,6 @@
   /// CHECK-DAG:                        Return [<<X>>]
 
   public static int $noinline$intSubAddSimplifyRight(int x, int y) {
-    if (doThrow) { throw new Error(); }
     int sub = x - y;
     return y + sub;
   }
@@ -2129,7 +2161,6 @@
   /// CHECK-DAG:                        Return [<<Res>>]
 
   public static float $noinline$floatAddSubSimplifyArg1(float x, float y) {
-    if (doThrow) { throw new Error(); }
     float sum = x + y;
     return sum - x;
   }
@@ -2149,7 +2180,6 @@
   /// CHECK-DAG:                        Return [<<Res>>]
 
   public static float $noinline$floatAddSubSimplifyArg2(float x, float y) {
-    if (doThrow) { throw new Error(); }
     float sum = x + y;
     return sum - y;
   }
@@ -2169,7 +2199,6 @@
   /// CHECK-DAG:                        Return [<<Res>>]
 
   public static float $noinline$floatSubAddSimplifyLeft(float x, float y) {
-    if (doThrow) { throw new Error(); }
     float sub = x - y;
     return sub + y;
   }
@@ -2189,7 +2218,6 @@
   /// CHECK-DAG:                        Return [<<Res>>]
 
   public static float $noinline$floatSubAddSimplifyRight(float x, float y) {
-    if (doThrow) { throw new Error(); }
     float sub = x - y;
     return y + sub;
   }
@@ -2231,9 +2259,9 @@
     assertIntEquals(1, $noinline$NegSub1(arg, arg + 1));
     assertIntEquals(1, $noinline$NegSub2(arg, arg + 1));
     assertLongEquals(arg, $noinline$NotNot1(arg));
-    assertLongEquals(arg, $noinline$runSmaliTestLong("NotNot1", arg));
+    assertLongEquals(arg, $noinline$runSmaliTestLong("$noinline$NotNot1", arg));
     assertIntEquals(-1, $noinline$NotNot2(arg));
-    assertIntEquals(-1, $noinline$runSmaliTestInt("NotNot2", arg));
+    assertIntEquals(-1, $noinline$runSmaliTestInt("$noinline$NotNot2", arg));
     assertIntEquals(-(arg + arg + 1), $noinline$SubNeg1(arg, arg + 1));
     assertIntEquals(-(arg + arg + 1), $noinline$SubNeg2(arg, arg + 1));
     assertLongEquals(-(2 * arg + 1), $noinline$SubNeg3(arg, arg + 1));
@@ -2242,9 +2270,9 @@
     assertBooleanEquals(false, $noinline$NotEqualBoolVsIntConst(false));
     assertBooleanEquals(false, $noinline$NotEqualBoolVsIntConst(false));
     assertBooleanEquals(true, $noinline$NotNotBool(true));
-    assertBooleanEquals(true, $noinline$runSmaliTestBoolean("NotNotBool", true));
+    assertBooleanEquals(true, $noinline$runSmaliTestBoolean("$noinline$NotNotBool", true));
     assertBooleanEquals(false, $noinline$NotNotBool(false));
-    assertBooleanEquals(false, $noinline$runSmaliTestBoolean("NotNotBool", false));
+    assertBooleanEquals(false, $noinline$runSmaliTestBoolean("$noinline$NotNotBool", false));
     assertFloatEquals(50.0f, $noinline$Div2(100.0f));
     assertDoubleEquals(75.0, $noinline$Div2(150.0));
     assertFloatEquals(-400.0f, $noinline$DivMP25(100.0f));
@@ -2323,6 +2351,12 @@
     assertStringEquals("c=1023, b=-1", $noinline$shortToCharToBytePrint((short) 1023));
     assertStringEquals("c=65535, b=-1", $noinline$shortToCharToBytePrint((short) -1));
 
+    assertLongEquals(0x55411410L, $noinline$intAndSmallLongConstant(0x55555555));
+    assertLongEquals(0xffffffffaa028aa2L, $noinline$intAndSmallLongConstant(0xaaaaaaaa));
+    assertLongEquals(0x44101440L, $noinline$intAndLargeLongConstant(0x55555555));
+    assertLongEquals(0x208a002aaL, $noinline$intAndLargeLongConstant(0xaaaaaaaa));
+    assertLongEquals(7L, $noinline$intShr28And15L(0x76543210));
+
     assertIntEquals(0x21, $noinline$longAnd0xffToByte(0x1234432112344321L));
     assertIntEquals(0, $noinline$longAnd0xffToByte(Long.MIN_VALUE));
     assertIntEquals(-1, $noinline$longAnd0xffToByte(Long.MAX_VALUE));
@@ -2363,14 +2397,22 @@
     assertIntEquals(26, $noinline$runSmaliTestInt("SubSubConst3", 5));
     assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3));
     assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3 + 32));
-    assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50));
-    assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50 + 64));
+    assertLongEquals(0xffffffffffffeaf3L,
+                     $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50));
+    assertLongEquals(0xffffffffffffeaf3L,
+                     $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50 + 64));
     assertIntEquals(0x2af37b, $noinline$intUnnecessaryWiderShiftMasking(0xabcdef01, 10));
     assertIntEquals(0x2af37b, $noinline$intUnnecessaryWiderShiftMasking(0xabcdef01, 10 + 128));
-    assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2));
-    assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2 + 256));
+    assertLongEquals(0xaf37bc048d159e24L,
+                     $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2));
+    assertLongEquals(0xaf37bc048d159e24L,
+                     $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2 + 256));
     assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13));
     assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13 + 512));
+    assertIntEquals(0x5f49eb48, $noinline$intUnnecessaryShiftModifications(0xabcdef01, 2));
+    assertIntEquals(0xbd4c29b0, $noinline$intUnnecessaryShiftModifications(0xabcdef01, 3));
+    assertIntEquals(0xc0fed1ca, $noinline$intNecessaryShiftModifications(0xabcdef01, 2));
+    assertIntEquals(0x03578ebc, $noinline$intNecessaryShiftModifications(0xabcdef01, 3));
 
     assertIntEquals(654321, $noinline$intAddSubSimplifyArg1(arg, 654321));
     assertIntEquals(arg, $noinline$intAddSubSimplifyArg2(arg, 654321));
diff --git a/test/463-checker-boolean-simplifier/build b/test/463-checker-boolean-simplifier/build
index 49292c9..947ec9a 100755
--- a/test/463-checker-boolean-simplifier/build
+++ b/test/463-checker-boolean-simplifier/build
@@ -20,4 +20,7 @@
 # Also disable desugar because it is missing in jack platform builds.
 export DESUGAR=false
 
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
 ./default-build "$@"
diff --git a/test/476-checker-ctor-fence-redun-elim/build b/test/476-checker-ctor-fence-redun-elim/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/476-checker-ctor-fence-redun-elim/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/482-checker-loop-back-edge-use/build b/test/482-checker-loop-back-edge-use/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/482-checker-loop-back-edge-use/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/484-checker-register-hints/build b/test/484-checker-register-hints/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/484-checker-register-hints/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/530-checker-lse/build b/test/530-checker-lse/build
new file mode 100755
index 0000000..42b99ad
--- /dev/null
+++ b/test/530-checker-lse/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/536-checker-intrinsic-optimization/build b/test/536-checker-intrinsic-optimization/build
index 49292c9..947ec9a 100755
--- a/test/536-checker-intrinsic-optimization/build
+++ b/test/536-checker-intrinsic-optimization/build
@@ -20,4 +20,7 @@
 # Also disable desugar because it is missing in jack platform builds.
 export DESUGAR=false
 
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
 ./default-build "$@"
diff --git a/test/549-checker-types-merge/build b/test/549-checker-types-merge/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/549-checker-types-merge/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/565-checker-doublenegbitwise/build b/test/565-checker-doublenegbitwise/build
index 49292c9..947ec9a 100755
--- a/test/565-checker-doublenegbitwise/build
+++ b/test/565-checker-doublenegbitwise/build
@@ -20,4 +20,7 @@
 # Also disable desugar because it is missing in jack platform builds.
 export DESUGAR=false
 
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
 ./default-build "$@"
diff --git a/test/565-checker-rotate/build b/test/565-checker-rotate/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/565-checker-rotate/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/566-checker-signum/build b/test/566-checker-signum/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/566-checker-signum/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/567-checker-compare/build b/test/567-checker-compare/build
new file mode 100644
index 0000000..1d269dc
--- /dev/null
+++ b/test/567-checker-compare/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx 
+
+./default-build "$@"
diff --git a/test/570-checker-osr/build b/test/570-checker-osr/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/570-checker-osr/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/586-checker-null-array-get/build b/test/586-checker-null-array-get/build
index 49292c9..947ec9a 100755
--- a/test/586-checker-null-array-get/build
+++ b/test/586-checker-null-array-get/build
@@ -20,4 +20,7 @@
 # Also disable desugar because it is missing in jack platform builds.
 export DESUGAR=false
 
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
 ./default-build "$@"
diff --git a/test/593-checker-boolean-2-integral-conv/build b/test/593-checker-boolean-2-integral-conv/build
index 49292c9..947ec9a 100755
--- a/test/593-checker-boolean-2-integral-conv/build
+++ b/test/593-checker-boolean-2-integral-conv/build
@@ -20,4 +20,7 @@
 # Also disable desugar because it is missing in jack platform builds.
 export DESUGAR=false
 
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
 ./default-build "$@"
diff --git a/test/611-checker-simplify-if/build b/test/611-checker-simplify-if/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/611-checker-simplify-if/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/618-checker-induction/build b/test/618-checker-induction/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/618-checker-induction/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 418be30..f6d3bba 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -497,6 +497,13 @@
     }
   }
 
+  // Mixed of 16-bit and 8-bit array references.
+  static void castAndNarrow(byte[] x, char[] y) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = (byte) ((short) y[i] +  1);
+    }
+  }
+
   public static void main(String[] args) {
     expectEquals(10, earlyExitFirst(-1));
     for (int i = 0; i <= 10; i++) {
@@ -650,6 +657,15 @@
       expectEquals(2805, f[i]);
     }
 
+    char[] cx = new char[259];
+    for (int i = 0; i < 259; i++) {
+      cx[i] = (char) (i - 100);
+    }
+    castAndNarrow(b1, cx);
+    for (int i = 0; i < 259; i++) {
+      expectEquals((byte)((short) cx[i] + 1), b1[i]);
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/624-checker-stringops/build b/test/624-checker-stringops/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/624-checker-stringops/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/645-checker-abs-simd/src/Main.java b/test/645-checker-abs-simd/src/Main.java
index c49d85d..57c51a6 100644
--- a/test/645-checker-abs-simd/src/Main.java
+++ b/test/645-checker-abs-simd/src/Main.java
@@ -131,6 +131,28 @@
     }
   }
 
+  /// CHECK-START: void Main.doitCastedChar(char[]) loop_optimization (before)
+  /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitCastedChar(char[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  private static void doitCastedChar(char[] x) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = (char) Math.abs((short) x[i]);
+    }
+  }
+
   /// CHECK-START: void Main.doitInt(int[]) loop_optimization (before)
   /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
@@ -298,7 +320,7 @@
       xc[i] = (char) i;
     }
     doitChar(xc);
-    for (int i = 0; i < 1024 *64; i++) {
+    for (int i = 0; i < 1024 * 64; i++) {
       expectEquals32((char) Math.abs((char) i), xc[i]);
     }
     short[] xs = new short[1024 * 64];
@@ -309,6 +331,13 @@
     for (int i = 0; i < 1024 * 64; i++) {
       expectEquals32((short) Math.abs((short) i), xs[i]);
     }
+    for (int i = 0; i < 1024 * 64; i++) {
+      xc[i] = (char) i;
+    }
+    doitCastedChar(xc);
+    for (int i = 0; i < 1024 * 64; i++) {
+      expectEquals32((char) Math.abs((short) i), xc[i]);
+    }
     // Set up minint32, maxint32 and some others.
     int[] xi = new int[8];
     xi[0] = 0x80000000;
diff --git a/test/646-checker-hadd-alt-byte/src/Main.java b/test/646-checker-hadd-alt-byte/src/Main.java
index 7be3151..87f7688 100644
--- a/test/646-checker-hadd-alt-byte/src/Main.java
+++ b/test/646-checker-hadd-alt-byte/src/Main.java
@@ -43,21 +43,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -83,21 +83,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -121,21 +121,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -162,21 +162,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -200,7 +200,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
@@ -208,7 +208,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
@@ -216,7 +216,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
@@ -241,7 +241,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
@@ -249,7 +249,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
@@ -257,7 +257,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
diff --git a/test/646-checker-hadd-alt-char/src/Main.java b/test/646-checker-hadd-alt-char/src/Main.java
index 2799ea7..292ea1b 100644
--- a/test/646-checker-hadd-alt-char/src/Main.java
+++ b/test/646-checker-hadd-alt-char/src/Main.java
@@ -43,21 +43,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -83,21 +83,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   // Note: HAnd has no impact (already a zero extension).
@@ -124,21 +124,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -165,21 +165,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   // Note: HAnd has no impact (already a zero extension).
@@ -206,7 +206,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
@@ -214,7 +214,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
@@ -222,7 +222,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(char[] b1, char[] bo) {
     int min_length = Math.min(bo.length, b1.length);
@@ -247,7 +247,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
@@ -255,7 +255,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   // Note: HAnd has no impact (already a zero extension).
diff --git a/test/646-checker-hadd-alt-short/src/Main.java b/test/646-checker-hadd-alt-short/src/Main.java
index 6cd102f..da94829 100644
--- a/test/646-checker-hadd-alt-short/src/Main.java
+++ b/test/646-checker-hadd-alt-short/src/Main.java
@@ -43,21 +43,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -83,21 +83,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -121,21 +121,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -162,21 +162,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -200,7 +200,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
@@ -208,7 +208,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
@@ -216,7 +216,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
@@ -241,7 +241,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
@@ -249,7 +249,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
@@ -257,7 +257,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
diff --git a/test/646-checker-hadd-byte/src/Main.java b/test/646-checker-hadd-byte/src/Main.java
index a9e844c..f0adca3 100644
--- a/test/646-checker-hadd-byte/src/Main.java
+++ b/test/646-checker-hadd-byte/src/Main.java
@@ -40,21 +40,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -80,21 +80,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -118,21 +118,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -159,21 +159,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -197,7 +197,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
@@ -205,7 +205,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
@@ -213,7 +213,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
@@ -238,7 +238,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
@@ -246,7 +246,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
@@ -254,7 +254,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
diff --git a/test/646-checker-hadd-char/src/Main.java b/test/646-checker-hadd-char/src/Main.java
index 22eb7cb..94030cc 100644
--- a/test/646-checker-hadd-char/src/Main.java
+++ b/test/646-checker-hadd-char/src/Main.java
@@ -40,21 +40,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -80,21 +80,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   // Note: HAnd has no impact (already a zero extension).
@@ -121,21 +121,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -162,21 +162,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   // Note: HAnd has no impact (already a zero extension).
@@ -203,7 +203,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
@@ -211,7 +211,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
@@ -219,7 +219,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(char[] b1, char[] bo) {
     int min_length = Math.min(bo.length, b1.length);
@@ -244,7 +244,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
@@ -252,7 +252,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
@@ -260,7 +260,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   // Note: HAnd has no impact (already a zero extension).
diff --git a/test/646-checker-hadd-short/src/Main.java b/test/646-checker-hadd-short/src/Main.java
index 756f8a8..4ed2356 100644
--- a/test/646-checker-hadd-short/src/Main.java
+++ b/test/646-checker-hadd-short/src/Main.java
@@ -40,21 +40,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -81,21 +81,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -122,21 +122,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -160,21 +160,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -198,21 +198,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -240,21 +240,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed_alt2(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -282,21 +282,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -323,21 +323,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned_alt(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
@@ -362,7 +362,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
@@ -370,7 +370,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
@@ -378,7 +378,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
@@ -403,7 +403,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
@@ -411,7 +411,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
@@ -419,7 +419,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
index 9643b90..44472a8 100644
--- a/test/651-checker-byte-simd-minmax/src/Main.java
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -31,21 +31,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -69,21 +69,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMinUnsigned(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -104,21 +104,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -142,21 +142,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMaxUnsigned(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -178,7 +178,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>]       loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get>>,<<Repl>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get>>,<<Repl>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin100(byte[] x, byte[] y) {
     int min = Math.min(x.length, y.length);
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
index 8a0262c..93f21f8 100644
--- a/test/651-checker-char-simd-minmax/src/Main.java
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -31,21 +31,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(char[] x, char[] y, char[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -66,21 +66,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(char[] x, char[] y, char[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -102,7 +102,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>]       loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get>>,<<Repl>>] unsigned:true loop:<<Loop>>  outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get>>,<<Repl>>] packed_type:Uint16 loop:<<Loop>>  outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin100(char[] x, char[] y) {
     int min = Math.min(x.length, y.length);
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
index ffbf73b..00569e4 100644
--- a/test/651-checker-short-simd-minmax/src/Main.java
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -31,21 +31,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -69,21 +69,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMinUnsigned(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -104,21 +104,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -142,21 +142,21 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-MIPS64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMaxUnsigned(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -178,7 +178,7 @@
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>]       loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get>>,<<Repl>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get>>,<<Repl>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin100(short[] x, short[] y) {
     int min = Math.min(x.length, y.length);
diff --git a/test/660-checker-simd-sad-short3/expected.txt b/test/660-checker-simd-sad-short3/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-short3/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-short3/info.txt b/test/660-checker-simd-sad-short3/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-short3/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-short3/src/Main.java b/test/660-checker-simd-sad-short3/src/Main.java
new file mode 100644
index 0000000..c8850b4
--- /dev/null
+++ b/test/660-checker-simd-sad-short3/src/Main.java
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ *
+ * Some special cases: parameters, constants, invariants, casted computations.
+ */
+public class Main {
+
+  /// CHECK-START: int Main.sadShort2IntParamRight(short[], short) loop_optimization (before)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons1:i\d+>>  IntConstant 1                  loop:none
+  /// CHECK-DAG: <<Param:s\d+>>  ParameterValue                 loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Sub:i\d+>>    Sub [<<Get>>,<<Param>>]        loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: int Main.sadShort2IntParamRight(short[], short) loop_optimization (after)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
+  /// CHECK-DAG: <<Param:s\d+>>  ParameterValue                 loop:none
+  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<Param>>] loop:none
+  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
+  private static int sadShort2IntParamRight(short[] s, short param) {
+    int sad = 0;
+    for (int i = 0; i < s.length; i++) {
+      sad += Math.abs(s[i] - param);
+    }
+    return sad;
+  }
+
+  /// CHECK-START: int Main.sadShort2IntParamLeft(short[], short) loop_optimization (before)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons1:i\d+>>  IntConstant 1                  loop:none
+  /// CHECK-DAG: <<Param:s\d+>>  ParameterValue                 loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Sub:i\d+>>    Sub [<<Param>>,<<Get>>]        loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: int Main.sadShort2IntParamLeft(short[], short) loop_optimization (after)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
+  /// CHECK-DAG: <<Param:s\d+>>  ParameterValue                 loop:none
+  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<Param>>] loop:none
+  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
+  private static int sadShort2IntParamLeft(short[] s, short param) {
+    int sad = 0;
+    for (int i = 0; i < s.length; i++) {
+      sad += Math.abs(param - s[i]);
+    }
+    return sad;
+  }
+
+  /// CHECK-START: int Main.sadShort2IntConstRight(short[]) loop_optimization (before)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons1:i\d+>>  IntConstant 1                  loop:none
+  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant -32767             loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>    Add [<<Get>>,<<ConsI>>]        loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Add>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: int Main.sadShort2IntConstRight(short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
+  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 32767              loop:none
+  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<ConsI>>] loop:none
+  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
+  private static int sadShort2IntConstRight(short[] s) {
+    int sad = 0;
+    for (int i = 0; i < s.length; i++) {
+      sad += Math.abs(s[i] - 32767);
+    }
+    return sad;
+  }
+
+  /// CHECK-START: int Main.sadShort2IntConstLeft(short[]) loop_optimization (before)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons1:i\d+>>  IntConstant 1                  loop:none
+  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 32767              loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Sub:i\d+>>    Sub [<<ConsI>>,<<Get>>]        loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: int Main.sadShort2IntConstLeft(short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
+  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 32767              loop:none
+  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<ConsI>>] loop:none
+  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
+  private static int sadShort2IntConstLeft(short[] s) {
+    int sad = 0;
+    for (int i = 0; i < s.length; i++) {
+      sad += Math.abs(32767 - s[i]);
+    }
+    return sad;
+  }
+
+  /// CHECK-START: int Main.sadShort2IntInvariantRight(short[], int) loop_optimization (before)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons1:i\d+>>  IntConstant 1                  loop:none
+  /// CHECK-DAG: <<Conv:s\d+>>   TypeConversion [{{i\d+}}]      loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Sub:i\d+>>    Sub [<<Get>>,<<Conv>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: int Main.sadShort2IntInvariantRight(short[], int) loop_optimization (after)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
+  /// CHECK-DAG: <<Conv:s\d+>>   TypeConversion [{{i\d+}}]      loop:none
+  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<Conv>>]  loop:none
+  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
+  private static int sadShort2IntInvariantRight(short[] s, int val) {
+    int sad = 0;
+    short x = (short) (val + 1);
+    for (int i = 0; i < s.length; i++) {
+      sad += Math.abs(s[i] - x);
+    }
+    return sad;
+  }
+
+  /// CHECK-START: int Main.sadShort2IntInvariantLeft(short[], int) loop_optimization (before)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons1:i\d+>>  IntConstant 1                  loop:none
+  /// CHECK-DAG: <<Conv:s\d+>>   TypeConversion [{{i\d+}}]      loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Sub:i\d+>>    Sub [<<Conv>>,<<Get>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: int Main.sadShort2IntInvariantLeft(short[], int) loop_optimization (after)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
+  /// CHECK-DAG: <<Conv:s\d+>>   TypeConversion [{{i\d+}}]      loop:none
+  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<Conv>>]  loop:none
+  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
+  private static int sadShort2IntInvariantLeft(short[] s, int val) {
+    int sad = 0;
+    short x = (short) (val + 1);
+    for (int i = 0; i < s.length; i++) {
+      sad += Math.abs(x - s[i]);
+    }
+    return sad;
+  }
+
+  /// CHECK-START: int Main.sadShort2IntCastedExprRight(short[]) loop_optimization (before)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons1:i\d+>>  IntConstant 1                  loop:none
+  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 110                loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>    [<<Get>>,<<ConsI>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Conv:s\d+>>   TypeConversion [<<Add>>]       loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Sub:i\d+>>    Sub [<<Get>>,<<Conv>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: int Main.sadShort2IntCastedExprRight(short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
+  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 110                loop:none
+  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<ConsI>>] loop:none
+  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>    VecAdd [<<Load>>,<<Rep>>]      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load>>,<<Add>>] loop:<<Loop>> outer_loop:none
+  private static int sadShort2IntCastedExprRight(short[] s) {
+    int sad = 0;
+    for (int i = 0; i < s.length; i++) {
+      short x = (short) (s[i] + 110);  // narrower part sign extends
+      sad += Math.abs(s[i] - x);
+    }
+    return sad;
+  }
+
+  /// CHECK-START: int Main.sadShort2IntCastedExprLeft(short[]) loop_optimization (before)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons1:i\d+>>  IntConstant 1                  loop:none
+  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 110                loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>    [<<Get>>,<<ConsI>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Conv:s\d+>>   TypeConversion [<<Add>>]       loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Sub:i\d+>>    Sub [<<Conv>>,<<Get>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: int Main.sadShort2IntCastedExprLeft(short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
+  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
+  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 110                loop:none
+  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<ConsI>>] loop:none
+  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:d\d+>>    VecAdd [<<Load>>,<<Rep>>]      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Add>>,<<Load>>] loop:<<Loop>> outer_loop:none
+  private static int sadShort2IntCastedExprLeft(short[] s) {
+    int sad = 0;
+    for (int i = 0; i < s.length; i++) {
+      short x = (short) (s[i] + 110);  // narrower part sign extends
+      sad += Math.abs(x - s[i]);
+    }
+    return sad;
+  }
+
+  public static void main(String[] args) {
+    short[] interesting = {
+      (short) 0x0000,
+      (short) 0x0001,
+      (short) 0x0002,
+      (short) 0x0003,
+      (short) 0x0004,
+      (short) 0x1234,
+      (short) 0x8000,
+      (short) 0x8001,
+      (short) 0x8002,
+      (short) 0x8003,
+      (short) 0x8004,
+      (short) 0x8004,
+      (short) 0x7000,
+      (short) 0x7fff,
+      (short) 0xf000,
+      (short) 0xffff
+    };
+    short[] s = new short[64];
+    for (int i = 0; i < 64; i++) {
+      s[i] = interesting[i % interesting.length];
+    }
+
+    expectEquals(1067200, sadShort2IntParamRight(s, (short)-1));
+    expectEquals(1067200, sadShort2IntParamRight(s, (short) 0));
+    expectEquals(1067208, sadShort2IntParamRight(s, (short) 1));
+    expectEquals(1067224, sadShort2IntParamRight(s, (short) 2));
+    expectEquals(2635416, sadShort2IntParamRight(s, (short) 0x7fff));
+    expectEquals(1558824, sadShort2IntParamRight(s, (short) 0x8000));
+
+    expectEquals(1067200, sadShort2IntParamLeft(s, (short)-1));
+    expectEquals(1067200, sadShort2IntParamLeft(s, (short) 0));
+    expectEquals(1067208, sadShort2IntParamLeft(s, (short) 1));
+    expectEquals(1067224, sadShort2IntParamLeft(s, (short) 2));
+    expectEquals(2635416, sadShort2IntParamLeft(s, (short) 0x7fff));
+    expectEquals(1558824, sadShort2IntParamLeft(s, (short) 0x8000));
+
+    expectEquals(2635416, sadShort2IntConstRight(s));
+    expectEquals(2635416, sadShort2IntConstLeft(s));
+
+    expectEquals(1067200, sadShort2IntInvariantRight(s, -2));
+    expectEquals(1067200, sadShort2IntInvariantRight(s, -1));
+    expectEquals(1067208, sadShort2IntInvariantRight(s, 0));
+    expectEquals(1067224, sadShort2IntInvariantRight(s, 1));
+    expectEquals(2635416, sadShort2IntInvariantRight(s, 0x7ffe));
+    expectEquals(1558824, sadShort2IntInvariantRight(s, 0x7fff));
+
+    expectEquals(1067200, sadShort2IntInvariantLeft(s, -2));
+    expectEquals(1067200, sadShort2IntInvariantLeft(s, -1));
+    expectEquals(1067208, sadShort2IntInvariantLeft(s, 0));
+    expectEquals(1067224, sadShort2IntInvariantLeft(s, 1));
+    expectEquals(2635416, sadShort2IntInvariantLeft(s, 0x7ffe));
+    expectEquals(1558824, sadShort2IntInvariantLeft(s, 0x7fff));
+
+    expectEquals(268304, sadShort2IntCastedExprLeft(s));
+    expectEquals(268304, sadShort2IntCastedExprRight(s));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/709-checker-varhandles/build b/test/709-checker-varhandles/build
new file mode 100755
index 0000000..2b0b2c1
--- /dev/null
+++ b/test/709-checker-varhandles/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-build "$@" --experimental method-handles
diff --git a/test/709-checker-varhandles/expected.txt b/test/709-checker-varhandles/expected.txt
new file mode 100644
index 0000000..651da72
--- /dev/null
+++ b/test/709-checker-varhandles/expected.txt
@@ -0,0 +1,2 @@
+starting
+passed
diff --git a/test/709-checker-varhandles/info.txt b/test/709-checker-varhandles/info.txt
new file mode 100644
index 0000000..2221240
--- /dev/null
+++ b/test/709-checker-varhandles/info.txt
@@ -0,0 +1 @@
+Test support for intrinsics in Java 9 java.lang.invoke.VarHandle.
diff --git a/test/709-checker-varhandles/src-art/Main.java b/test/709-checker-varhandles/src-art/Main.java
new file mode 100644
index 0000000..46aaa38
--- /dev/null
+++ b/test/709-checker-varhandles/src-art/Main.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.invoke.VarHandle;
+
+/**
+ * Checker test on the 1.8 unsafe operations. Note, this is by no means an
+ * exhaustive unit test for these CAS (compare-and-swap) and fence operations.
+ * Instead, this test ensures the methods are recognized as intrinsic and behave
+ * as expected.
+ */
+public class Main {
+
+  //
+  // Fences (native).
+  //
+
+  /// CHECK-START: void Main.fullFence() intrinsics_recognition (after)
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:VarHandleFullFence
+  //
+  /// CHECK-START: void Main.fullFence() instruction_simplifier (after)
+  /// CHECK-NOT: InvokeStaticOrDirect intrinsic:VarHandleFullFence
+  //
+  /// CHECK-START: void Main.fullFence() instruction_simplifier (after)
+  /// CHECK-DAG: MemoryBarrier kind:AnyAny
+  private static void fullFence() {
+      VarHandle.fullFence();
+  }
+
+  /// CHECK-START: void Main.acquireFence() intrinsics_recognition (after)
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:VarHandleAcquireFence
+  //
+  /// CHECK-START: void Main.acquireFence() instruction_simplifier (after)
+  /// CHECK-NOT: InvokeStaticOrDirect intrinsic:VarHandleAcquireFence
+  //
+  /// CHECK-START: void Main.acquireFence() instruction_simplifier (after)
+  /// CHECK-DAG: MemoryBarrier kind:LoadAny
+  private static void acquireFence() {
+      VarHandle.acquireFence();
+  }
+
+  /// CHECK-START: void Main.releaseFence() intrinsics_recognition (after)
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:VarHandleReleaseFence
+  //
+  /// CHECK-START: void Main.releaseFence() instruction_simplifier (after)
+  /// CHECK-NOT: InvokeStaticOrDirect intrinsic:VarHandleReleaseFence
+  //
+  /// CHECK-START: void Main.releaseFence() instruction_simplifier (after)
+  /// CHECK-DAG: MemoryBarrier kind:AnyStore
+  private static void releaseFence() {
+      VarHandle.releaseFence();
+  }
+
+  /// CHECK-START: void Main.loadLoadFence() intrinsics_recognition (after)
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:VarHandleLoadLoadFence
+  //
+  /// CHECK-START: void Main.loadLoadFence() instruction_simplifier (after)
+  /// CHECK-NOT: InvokeStaticOrDirect intrinsic:VarHandleLoadLoadFence
+  //
+  /// CHECK-START: void Main.loadLoadFence() instruction_simplifier (after)
+  /// CHECK-DAG: MemoryBarrier kind:LoadAny
+  private static void loadLoadFence() {
+      VarHandle.loadLoadFence();
+  }
+
+  /// CHECK-START: void Main.storeStoreFence() intrinsics_recognition (after)
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:VarHandleStoreStoreFence
+  //
+  /// CHECK-START: void Main.storeStoreFence() instruction_simplifier (after)
+  /// CHECK-NOT: InvokeStaticOrDirect intrinsic:VarHandleStoreStoreFence
+  //
+  /// CHECK-START: void Main.storeStoreFence() instruction_simplifier (after)
+  /// CHECK-DAG: MemoryBarrier kind:StoreStore
+  private static void storeStoreFence() {
+      VarHandle.storeStoreFence();
+  }
+
+  //
+  // Driver.
+  //
+
+  public static void main(String[] args) {
+    System.out.println("starting");
+    acquireFence();
+    releaseFence();
+    loadLoadFence();
+    storeStoreFence();
+    fullFence();
+    System.out.println("passed");
+  }
+}
diff --git a/test/910-methods/build b/test/910-methods/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/910-methods/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/910-methods/check b/test/910-methods/check
index 8358500..f9552ad 100644
--- a/test/910-methods/check
+++ b/test/910-methods/check
@@ -19,4 +19,8 @@
   patch -p0 expected.txt < expected_jack.diff
 fi
 
+if [[ "$DX" == 'd8' ]]; then
+  patch -p0 expected.txt < expected_d8.diff
+fi
+
 ./default-check "$@"
diff --git a/test/910-methods/expected_d8.diff b/test/910-methods/expected_d8.diff
new file mode 100644
index 0000000..2c5d085
--- /dev/null
+++ b/test/910-methods/expected_d8.diff
@@ -0,0 +1,4 @@
+7c7
+< Location end: 39
+---
+> Location end: 36
diff --git a/test/911-get-stack-trace/build b/test/911-get-stack-trace/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/911-get-stack-trace/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/913-heaps/build b/test/913-heaps/build
new file mode 100644
index 0000000..42b99ad
--- /dev/null
+++ b/test/913-heaps/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See b/65168732
+export DX=$ANDROID_HOST_OUT/bin/dx
+
+./default-build "$@"
diff --git a/test/979-const-method-handle/build b/test/979-const-method-handle/build
new file mode 100644
index 0000000..966ecf4
--- /dev/null
+++ b/test/979-const-method-handle/build
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+${DX} --dex --min-sdk-version=27 --output=classes.dex classes
+
+zip $TEST_NAME.jar classes.dex
diff --git a/test/979-const-method-handle/classes/Main.class b/test/979-const-method-handle/classes/Main.class
new file mode 100644
index 0000000..8d6b7d8
--- /dev/null
+++ b/test/979-const-method-handle/classes/Main.class
Binary files differ
diff --git a/test/979-const-method-handle/classes/constmethodhandle/ConstTest.class b/test/979-const-method-handle/classes/constmethodhandle/ConstTest.class
new file mode 100644
index 0000000..a21b0a3
--- /dev/null
+++ b/test/979-const-method-handle/classes/constmethodhandle/ConstTest.class
Binary files differ
diff --git a/test/979-const-method-handle/expected.txt b/test/979-const-method-handle/expected.txt
new file mode 100644
index 0000000..573b80d
--- /dev/null
+++ b/test/979-const-method-handle/expected.txt
@@ -0,0 +1,2 @@
+MethodHandle MethodHandle(Object)Class => class java.lang.Float
+MethodType (char,short,int,long,float,double,Object)boolean
diff --git a/test/979-const-method-handle/info.txt b/test/979-const-method-handle/info.txt
new file mode 100644
index 0000000..e8514ce
--- /dev/null
+++ b/test/979-const-method-handle/info.txt
@@ -0,0 +1,7 @@
+This test checks const-method-handle and const-method-type bytecodes.
+
+The class files in this test come from:
+
+  dalvik/dx/tests/142-const-method-handle
+
+and are built using ASM bytecode manipulation library.
diff --git a/test/988-method-trace/gen_srcs.py b/test/988-method-trace/gen_srcs.py
index c1ce35c..8f1082f 100755
--- a/test/988-method-trace/gen_srcs.py
+++ b/test/988-method-trace/gen_srcs.py
@@ -38,7 +38,8 @@
 IDX_CLASS_NAME = -3
 
 # Exclude all hidden API.
-KLASS_BLACK_LIST = ['sun.misc.Unsafe', 'libcore.io.Memory', 'java.lang.StringFactory']
+KLASS_BLACK_LIST = ['sun.misc.Unsafe', 'libcore.io.Memory', 'java.lang.StringFactory',
+                    'java.lang.invoke.VarHandle' ]  # TODO(b/65872996): Enable when VarHandle is visible.
 METHOD_BLACK_LIST = [('java.lang.ref.Reference', 'getReferent'),
                      ('java.lang.String', 'getCharsNoCheck'),
                      ('java.lang.System', 'arraycopy')]  # arraycopy has a manual test.
diff --git a/test/Android.bp b/test/Android.bp
index 2af03e3..31474d5 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -74,7 +74,7 @@
     ],
 
     target: {
-        linux: {
+        linux_glibc: {
             ldflags: [
                 // Allow jni_compiler_test to find Java_MyClassNatives_bar
                 // within itself using dlopen(NULL, ...).
@@ -143,7 +143,7 @@
                 "-Wno-missing-noreturn",
             ],
         },
-        linux: {
+        linux_glibc: {
             cflags: [
                 // gtest issue
                 "-Wno-used-but-marked-unused",
@@ -226,6 +226,7 @@
         "ti-agent/redefinition_helper.cc",
         "ti-agent/suspension_helper.cc",
         "ti-agent/stack_trace_helper.cc",
+        "ti-agent/threads_helper.cc",
         "ti-agent/trace_helper.cc",
         "ti-agent/exceptions_helper.cc",
         // This is the list of non-special OnLoad things and excludes BCI and anything that depends
@@ -275,6 +276,7 @@
         "1927-exception-event/exception_event.cc",
         "1930-monitor-info/monitor.cc",
         "1932-monitor-events-misc/monitor_misc.cc",
+        "1934-jvmti-signal-thread/signal_threads.cc",
     ],
     shared_libs: [
         "libbase",
diff --git a/test/dexdump/const-method-handle.dex b/test/dexdump/const-method-handle.dex
new file mode 100644
index 0000000..1fe28e5
--- /dev/null
+++ b/test/dexdump/const-method-handle.dex
Binary files differ
diff --git a/test/dexdump/const-method-handle.lst b/test/dexdump/const-method-handle.lst
new file mode 100644
index 0000000..961d427
--- /dev/null
+++ b/test/dexdump/const-method-handle.lst
@@ -0,0 +1,9 @@
+#const-method-handle.dex
+0x000003c0 8 Main <init> ()V Main.java 22
+0x000003d8 50 Main main ([Ljava/lang/String;)V Main.java 26
+0x0000041c 8 constmethodhandle.ConstTest <init> ()V ConstTest.java 22
+0x00000434 94 constmethodhandle.ConstTest displayMethodHandle (Ljava/lang/invoke/MethodHandle;)V ConstTest.java 24
+0x000004a4 50 constmethodhandle.ConstTest displayMethodType (Ljava/lang/invoke/MethodType;)V ConstTest.java 29
+0x000004e8 30 constmethodhandle.ConstTest main ([Ljava/lang/String;)V ConstTest.java -1
+0x00000518 6 constmethodhandle.ConstTest test1 ()Ljava/lang/invoke/MethodHandle; ConstTest.java -1
+0x00000530 6 constmethodhandle.ConstTest test2 ()Ljava/lang/invoke/MethodType; ConstTest.java -1
diff --git a/test/dexdump/const-method-handle.txt b/test/dexdump/const-method-handle.txt
new file mode 100644
index 0000000..6b33502
--- /dev/null
+++ b/test/dexdump/const-method-handle.txt
@@ -0,0 +1,275 @@
+Processing 'const-method-handle.dex'...
+Opened 'const-method-handle.dex', DEX version '039'
+DEX file header:
+magic               : 'dex\n039\0'
+checksum            : 16656a27
+signature           : 1953...5aa5
+file_size           : 2524
+header_size         : 112
+link_size           : 0
+link_off            : 0 (0x000000)
+string_ids_size     : 57
+string_ids_off      : 112 (0x000070)
+type_ids_size       : 26
+type_ids_off        : 340 (0x000154)
+proto_ids_size      : 18
+proto_ids_off       : 444 (0x0001bc)
+field_ids_size      : 2
+field_ids_off       : 660 (0x000294)
+method_ids_size     : 23
+method_ids_off      : 676 (0x0002a4)
+class_defs_size     : 2
+class_defs_off      : 860 (0x00035c)
+data_size           : 1588
+data_off            : 936 (0x0003a8)
+
+Class #0 header:
+class_idx           : 5
+access_flags        : 1 (0x0001)
+superclass_idx      : 11
+interfaces_off      : 0 (0x000000)
+source_file_idx     : 29
+annotations_off     : 1336 (0x000538)
+class_data_off      : 2270 (0x0008de)
+static_fields_size  : 0
+instance_fields_size: 0
+direct_methods_size : 2
+virtual_methods_size: 0
+
+Class #0 annotations:
+Annotations on method #1 'main'
+  VISIBILITY_SYSTEM Ldalvik/annotation/Throws; value={ Ljava/lang/Throwable; }
+
+Class #0            -
+  Class descriptor  : 'LMain;'
+  Access flags      : 0x0001 (PUBLIC)
+  Superclass        : 'Ljava/lang/Object;'
+  Interfaces        -
+  Static fields     -
+  Instance fields   -
+  Direct methods    -
+    #0              : (in LMain;)
+      name          : '<init>'
+      type          : '()V'
+      access        : 0x10001 (PUBLIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 1
+      outs          : 1
+      insns size    : 4 16-bit code units
+0003b0:                                        |[0003b0] Main.<init>:()V
+0003c0: 7010 0a00 0000                         |0000: invoke-direct {v0}, Ljava/lang/Object;.<init>:()V // method@000a
+0003c6: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=22
+      locals        : 
+        0x0000 - 0x0004 reg=0 this LMain; 
+
+    #1              : (in LMain;)
+      name          : 'main'
+      type          : '([Ljava/lang/String;)V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 6
+      ins           : 1
+      outs          : 4
+      insns size    : 25 16-bit code units
+0003c8:                                        |[0003c8] Main.main:([Ljava/lang/String;)V
+0003d8: 7100 1500 0000                         |0000: invoke-static {}, Ljava/lang/invoke/MethodHandles;.lookup:()Ljava/lang/invoke/MethodHandles$Lookup; // method@0015
+0003de: 0c00                                   |0003: move-result-object v0
+0003e0: 1c01 0600                              |0004: const-class v1, Lconstmethodhandle/ConstTest; // type@0006
+0003e4: 1a02 3000                              |0006: const-string v2, "main" // string@0030
+0003e8: 6203 0100                              |0008: sget-object v3, Ljava/lang/Void;.TYPE:Ljava/lang/Class; // field@0001
+0003ec: 1c04 1900                              |000a: const-class v4, [Ljava/lang/String; // type@0019
+0003f0: 7120 1600 4300                         |000c: invoke-static {v3, v4}, Ljava/lang/invoke/MethodType;.methodType:(Ljava/lang/Class;Ljava/lang/Class;)Ljava/lang/invoke/MethodType; // method@0016
+0003f6: 0c03                                   |000f: move-result-object v3
+0003f8: 6e40 1400 1032                         |0010: invoke-virtual {v0, v1, v2, v3}, Ljava/lang/invoke/MethodHandles$Lookup;.findStatic:(Ljava/lang/Class;Ljava/lang/String;Ljava/lang/invoke/MethodType;)Ljava/lang/invoke/MethodHandle; // method@0014
+0003fe: 0c00                                   |0013: move-result-object v0
+000400: fa20 1200 5000 1000                    |0014: invoke-polymorphic {v0, v5}, Ljava/lang/invoke/MethodHandle;.invokeExact:([Ljava/lang/Object;)Ljava/lang/Object;, ([Ljava/lang/String;)V // method@0012, proto@0010
+000408: 0e00                                   |0018: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=26
+        0x000c line=27
+        0x0014 line=28
+        0x0018 line=29
+      locals        : 
+        0x0000 - 0x0019 reg=5 (null) [Ljava/lang/String; 
+
+  Virtual methods   -
+  source_file_idx   : 29 (Main.java)
+
+Class #1 header:
+class_idx           : 6
+access_flags        : 1 (0x0001)
+superclass_idx      : 11
+interfaces_off      : 0 (0x000000)
+source_file_idx     : 3
+annotations_off     : 1360 (0x000550)
+class_data_off      : 2284 (0x0008ec)
+static_fields_size  : 0
+instance_fields_size: 0
+direct_methods_size : 6
+virtual_methods_size: 0
+
+Class #1 annotations:
+Annotations on method #3 'displayMethodHandle'
+  VISIBILITY_SYSTEM Ldalvik/annotation/Throws; value={ Ljava/lang/Throwable; }
+
+Class #1            -
+  Class descriptor  : 'Lconstmethodhandle/ConstTest;'
+  Access flags      : 0x0001 (PUBLIC)
+  Superclass        : 'Ljava/lang/Object;'
+  Interfaces        -
+  Static fields     -
+  Instance fields   -
+  Direct methods    -
+    #0              : (in Lconstmethodhandle/ConstTest;)
+      name          : '<init>'
+      type          : '()V'
+      access        : 0x10001 (PUBLIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 1
+      outs          : 1
+      insns size    : 4 16-bit code units
+00040c:                                        |[00040c] constmethodhandle.ConstTest.<init>:()V
+00041c: 7010 0a00 0000                         |0000: invoke-direct {v0}, Ljava/lang/Object;.<init>:()V // method@000a
+000422: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=22
+      locals        : 
+        0x0000 - 0x0004 reg=0 this Lconstmethodhandle/ConstTest; 
+
+    #1              : (in Lconstmethodhandle/ConstTest;)
+      name          : 'displayMethodHandle'
+      type          : '(Ljava/lang/invoke/MethodHandle;)V'
+      access        : 0x000a (PRIVATE STATIC)
+      code          -
+      registers     : 4
+      ins           : 1
+      outs          : 2
+      insns size    : 47 16-bit code units
+000424:                                        |[000424] constmethodhandle.ConstTest.displayMethodHandle:(Ljava/lang/invoke/MethodHandle;)V
+000434: 6200 0000                              |0000: sget-object v0, Ljava/lang/System;.out:Ljava/io/PrintStream; // field@0000
+000438: 2201 0d00                              |0002: new-instance v1, Ljava/lang/StringBuilder; // type@000d
+00043c: 7010 0c00 0100                         |0004: invoke-direct {v1}, Ljava/lang/StringBuilder;.<init>:()V // method@000c
+000442: 1a02 1e00                              |0007: const-string v2, "MethodHandle " // string@001e
+000446: 6e20 0e00 2100                         |0009: invoke-virtual {v1, v2}, Ljava/lang/StringBuilder;.append:(Ljava/lang/String;)Ljava/lang/StringBuilder; // method@000e
+00044c: 0c01                                   |000c: move-result-object v1
+00044e: 6e20 0d00 3100                         |000d: invoke-virtual {v1, v3}, Ljava/lang/StringBuilder;.append:(Ljava/lang/Object;)Ljava/lang/StringBuilder; // method@000d
+000454: 0c01                                   |0010: move-result-object v1
+000456: 1a02 0000                              |0011: const-string v2, " => " // string@0000
+00045a: 6e20 0e00 2100                         |0013: invoke-virtual {v1, v2}, Ljava/lang/StringBuilder;.append:(Ljava/lang/String;)Ljava/lang/StringBuilder; // method@000e
+000460: 0c01                                   |0016: move-result-object v1
+000462: 1402 0030 4046                         |0017: const v2, #float 12300 // #46403000
+000468: 7110 0900 0200                         |001a: invoke-static {v2}, Ljava/lang/Float;.valueOf:(F)Ljava/lang/Float; // method@0009
+00046e: 0c02                                   |001d: move-result-object v2
+000470: fa20 1100 2300 0100                    |001e: invoke-polymorphic {v3, v2}, Ljava/lang/invoke/MethodHandle;.invoke:([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Object;)Ljava/lang/Class; // method@0011, proto@0001
+000478: 0c02                                   |0022: move-result-object v2
+00047a: 6e20 0d00 2100                         |0023: invoke-virtual {v1, v2}, Ljava/lang/StringBuilder;.append:(Ljava/lang/Object;)Ljava/lang/StringBuilder; // method@000d
+000480: 0c01                                   |0026: move-result-object v1
+000482: 6e10 0f00 0100                         |0027: invoke-virtual {v1}, Ljava/lang/StringBuilder;.toString:()Ljava/lang/String; // method@000f
+000488: 0c01                                   |002a: move-result-object v1
+00048a: 6e20 0800 1000                         |002b: invoke-virtual {v0, v1}, Ljava/io/PrintStream;.println:(Ljava/lang/String;)V // method@0008
+000490: 0e00                                   |002e: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=24
+        0x001a line=25
+        0x002b line=24
+        0x002e line=26
+      locals        : 
+        0x0000 - 0x002f reg=3 (null) Ljava/lang/invoke/MethodHandle; 
+
+    #2              : (in Lconstmethodhandle/ConstTest;)
+      name          : 'displayMethodType'
+      type          : '(Ljava/lang/invoke/MethodType;)V'
+      access        : 0x000a (PRIVATE STATIC)
+      code          -
+      registers     : 4
+      ins           : 1
+      outs          : 2
+      insns size    : 25 16-bit code units
+000494:                                        |[000494] constmethodhandle.ConstTest.displayMethodType:(Ljava/lang/invoke/MethodType;)V
+0004a4: 6200 0000                              |0000: sget-object v0, Ljava/lang/System;.out:Ljava/io/PrintStream; // field@0000
+0004a8: 2201 0d00                              |0002: new-instance v1, Ljava/lang/StringBuilder; // type@000d
+0004ac: 7010 0c00 0100                         |0004: invoke-direct {v1}, Ljava/lang/StringBuilder;.<init>:()V // method@000c
+0004b2: 1a02 1f00                              |0007: const-string v2, "MethodType " // string@001f
+0004b6: 6e20 0e00 2100                         |0009: invoke-virtual {v1, v2}, Ljava/lang/StringBuilder;.append:(Ljava/lang/String;)Ljava/lang/StringBuilder; // method@000e
+0004bc: 0c01                                   |000c: move-result-object v1
+0004be: 6e20 0d00 3100                         |000d: invoke-virtual {v1, v3}, Ljava/lang/StringBuilder;.append:(Ljava/lang/Object;)Ljava/lang/StringBuilder; // method@000d
+0004c4: 0c01                                   |0010: move-result-object v1
+0004c6: 6e10 0f00 0100                         |0011: invoke-virtual {v1}, Ljava/lang/StringBuilder;.toString:()Ljava/lang/String; // method@000f
+0004cc: 0c01                                   |0014: move-result-object v1
+0004ce: 6e20 0800 1000                         |0015: invoke-virtual {v0, v1}, Ljava/io/PrintStream;.println:(Ljava/lang/String;)V // method@0008
+0004d4: 0e00                                   |0018: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=29
+        0x0018 line=30
+      locals        : 
+        0x0000 - 0x0019 reg=3 (null) Ljava/lang/invoke/MethodType; 
+
+    #3              : (in Lconstmethodhandle/ConstTest;)
+      name          : 'main'
+      type          : '([Ljava/lang/String;)V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 1
+      outs          : 1
+      insns size    : 15 16-bit code units
+0004d8:                                        |[0004d8] constmethodhandle.ConstTest.main:([Ljava/lang/String;)V
+0004e8: 7100 0600 0000                         |0000: invoke-static {}, Lconstmethodhandle/ConstTest;.test1:()Ljava/lang/invoke/MethodHandle; // method@0006
+0004ee: 0c00                                   |0003: move-result-object v0
+0004f0: 7110 0300 0000                         |0004: invoke-static {v0}, Lconstmethodhandle/ConstTest;.displayMethodHandle:(Ljava/lang/invoke/MethodHandle;)V // method@0003
+0004f6: 7100 0700 0000                         |0007: invoke-static {}, Lconstmethodhandle/ConstTest;.test2:()Ljava/lang/invoke/MethodType; // method@0007
+0004fc: 0c00                                   |000a: move-result-object v0
+0004fe: 7110 0400 0000                         |000b: invoke-static {v0}, Lconstmethodhandle/ConstTest;.displayMethodType:(Ljava/lang/invoke/MethodType;)V // method@0004
+000504: 0e00                                   |000e: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #4              : (in Lconstmethodhandle/ConstTest;)
+      name          : 'test1'
+      type          : '()Ljava/lang/invoke/MethodHandle;'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 3 16-bit code units
+000508:                                        |[000508] constmethodhandle.ConstTest.test1:()Ljava/lang/invoke/MethodHandle;
+000518: fe00 0000                              |0000: const-method-handle v0, method_handle@0000
+00051c: 1100                                   |0002: return-object v0
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #5              : (in Lconstmethodhandle/ConstTest;)
+      name          : 'test2'
+      type          : '()Ljava/lang/invoke/MethodType;'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 3 16-bit code units
+000520:                                        |[000520] constmethodhandle.ConstTest.test2:()Ljava/lang/invoke/MethodType;
+000530: ff00 1100                              |0000: const-method-type v0, (CSIJFDLjava/lang/Object;)Z // proto@0011
+000534: 1100                                   |0002: return-object v0
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+  Virtual methods   -
+  source_file_idx   : 3 (ConstTest.java)
+
+Method handle #0:
+  type        : invoke-instance
+  target      : Ljava/lang/Object; getClass
+  target_type : (Ljava/lang/Object;)Ljava/lang/Class;
diff --git a/test/dexdump/const-method-handle.xml b/test/dexdump/const-method-handle.xml
new file mode 100644
index 0000000..f1cf9f8
--- /dev/null
+++ b/test/dexdump/const-method-handle.xml
@@ -0,0 +1,91 @@
+<api>
+<package name=""
+>
+<class name="Main"
+ extends="java.lang.Object"
+ interface="false"
+ abstract="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+<constructor name="Main"
+ type="Main"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</constructor>
+<method name="main"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+<parameter name="arg0" type="java.lang.String[]">
+</parameter>
+</method>
+</class>
+</package>
+<package name="constmethodhandle"
+>
+<class name="ConstTest"
+ extends="java.lang.Object"
+ interface="false"
+ abstract="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+<constructor name="ConstTest"
+ type="constmethodhandle.ConstTest"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</constructor>
+<method name="main"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+<parameter name="arg0" type="java.lang.String[]">
+</parameter>
+</method>
+<method name="test1"
+ return="java.lang.invoke.MethodHandle"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="test2"
+ return="java.lang.invoke.MethodType"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+</class>
+<method_handle index="0"
+ type="invoke-instance"
+ target_class="Ljava/lang/Object;"
+ target_member="getClass"
+ target_member_type="(Ljava/lang/Object;)Ljava/lang/Class;"
+>
+</method_handle>
+</package>
+</api>
diff --git a/test/knownfailures.json b/test/knownfailures.json
index df24c7d..229d618 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -1,5 +1,11 @@
 [
     {
+        "tests": "1934-jvmti-signal-thread",
+        "description": ["Disables 1934-jvmti-signal-thread in tracing configurations"],
+        "variant": "trace | stream",
+        "bug": "http://b/67384421"
+    },
+    {
         "tests": "153-reference-stress",
         "description": ["Disable 153-reference-stress temporarily until a fix",
                         "arrives."],
diff --git a/test/ti-agent/jvmti_helper.cc b/test/ti-agent/jvmti_helper.cc
index 4ca2d5d..bceaa6b 100644
--- a/test/ti-agent/jvmti_helper.cc
+++ b/test/ti-agent/jvmti_helper.cc
@@ -53,7 +53,7 @@
     .can_get_monitor_info                            = 1,
     .can_pop_frame                                   = 0,
     .can_redefine_classes                            = 1,
-    .can_signal_thread                               = 0,
+    .can_signal_thread                               = 1,
     .can_get_source_file_name                        = 1,
     .can_get_line_numbers                            = 1,
     .can_get_source_debug_extension                  = 1,
diff --git a/test/ti-agent/threads_helper.cc b/test/ti-agent/threads_helper.cc
new file mode 100644
index 0000000..f8aafc3
--- /dev/null
+++ b/test/ti-agent/threads_helper.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_helper.h"
+
+#include "jni.h"
+#include "jvmti.h"
+
+#include "jvmti_helper.h"
+#include "scoped_local_ref.h"
+#include "test_env.h"
+
+namespace art {
+namespace common_threads {
+
+extern "C" JNIEXPORT void Java_art_Threads_interruptThread(JNIEnv* env, jclass, jthread thr) {
+  JvmtiErrorToException(env, jvmti_env, jvmti_env->InterruptThread(thr));
+}
+
+extern "C" JNIEXPORT void Java_art_Threads_stopThread(JNIEnv* env,
+                                                      jclass,
+                                                      jthread thr,
+                                                      jobject exception) {
+  JvmtiErrorToException(env, jvmti_env, jvmti_env->StopThread(thr, exception));
+}
+
+}  // namespace common_threads
+}  // namespace art
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index ed40cb7..a765b17 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -48,15 +48,22 @@
    time.
  * That we don't show the 'extra' column in the DominatedList if we are
    showing all the instances.
- * That Instance.asString properly takes into account "offset" and
-   "count" fields, if they are present.
  * Instance.getDexCacheLocation
 
 Reported Issues:
  * Request to be able to sort tables by size.
 
 Release History:
- 1.4 Pending
+ 1.5 Pending
+
+ 1.4 October 03, 2017
+   Give better error messages on failure to launch ahat.
+   Properly mark thread and non-default root objects as roots.
+   Improve startup performance, in some cases significantly.
+   Other miscellaneous bug fixes.
+
+ 1.3.1 August 22, 2017
+   Don't include weak references in sample paths.
 
  1.3 July 25, 2017
    Improve diffing of static and instance fields.
diff --git a/tools/ahat/src/manifest.txt b/tools/ahat/src/manifest.txt
index d893c5e..1753406 100644
--- a/tools/ahat/src/manifest.txt
+++ b/tools/ahat/src/manifest.txt
@@ -1,4 +1,4 @@
 Name: ahat/
 Implementation-Title: ahat
-Implementation-Version: 1.3
+Implementation-Version: 1.4
 Main-Class: com.android.ahat.Main
diff --git a/tools/dmtracedump/Android.bp b/tools/dmtracedump/Android.bp
index 4f942bd..9f207e9 100644
--- a/tools/dmtracedump/Android.bp
+++ b/tools/dmtracedump/Android.bp
@@ -23,6 +23,7 @@
         "-O0",
         "-g",
         "-Wall",
+        "-Werror",
     ],
     target: {
         windows: {
@@ -40,5 +41,6 @@
         "-O0",
         "-g",
         "-Wall",
+        "-Werror",
     ],
 }
diff --git a/tools/run-prebuilt-libjdwp-tests.sh b/tools/run-prebuilt-libjdwp-tests.sh
new file mode 100755
index 0000000..46c2a15
--- /dev/null
+++ b/tools/run-prebuilt-libjdwp-tests.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [[ ! -d libcore ]];  then
+  echo "Script needs to be run at the root of the android tree"
+  exit 1
+fi
+
+source build/envsetup.sh >&/dev/null # for get_build_var, setpaths
+setpaths # include platform prebuilt java, javac, etc in $PATH.
+
+if [[ `uname` != 'Linux' ]];  then
+  echo "Script cannot be run on $(uname). It is Linux only."
+  exit 2
+fi
+
+jdwp_path=${ANDROID_JAVA_HOME}/jre/lib/amd64/libjdwp.so
+if [[ ! -f $jdwp_path ]];  then
+  echo "Unable to find prebuilts libjdwp.so! Did the version change from jdk8?"
+  exit 3
+fi
+
+args=("$@")
+debug="no"
+has_variant="no"
+has_mode="no"
+
+while true; do
+  if [[ $1 == "--debug" ]]; then
+    debug="yes"
+    shift
+  elif [[ "$1" == --mode=* ]]; then
+    has_mode="yes"
+    if [[ $1 != "--mode=host" ]];  then
+      # Just print out an actually helpful error message.
+      echo "Only host tests can be run against prebuilt libjdwp"
+      exit 4
+    fi
+    shift
+  elif [[ $1 == --variant=* ]]; then
+    has_variant="yes"
+    if [[ $1 != "--variant=x64" ]] && [[ $1 != "--variant=X64" ]];  then
+      # Just print out an actually helpful error message.
+      echo "Only 64bit runs can be tested against the prebuilt libjdwp!"
+      exit 5
+    fi
+    shift
+  elif [[ "$1" == "" ]]; then
+    break
+  else
+    shift
+  fi
+done
+
+if [[ "$has_mode" = "no" ]];  then
+  args+=(--mode=host)
+fi
+
+if [[ "$has_variant" = "no" ]];  then
+  args+=(--variant=X64)
+fi
+
+wrapper_name=""
+plugin=""
+if [[ "$debug" = "yes" ]];  then
+  wrapper_name=libwrapagentpropertiesd
+  plugin="$ANDROID_HOST_OUT/lib64/libopenjdkjvmtid.so"
+else
+  wrapper_name=libwrapagentproperties
+  plugin="$ANDROID_HOST_OUT/lib64/libopenjdkjvmti.so"
+fi
+wrapper=$ANDROID_HOST_OUT/lib64/${wrapper_name}.so
+
+if [[ ! -f $wrapper ]];  then
+  echo "need to build $wrapper to run prebuild-libjdwp-tests!"
+  echo "m -j40 ${wrapper/.so/}"
+  exit 6
+fi
+
+if [[ ! -f $plugin ]];  then
+  echo "jvmti plugin not built!"
+  exit 7
+fi
+
+props_path=$PWD/art/tools/libjdwp-compat.props
+expect_path=$PWD/art/tools/libjdwp_art_failures.txt
+
+function verbose_run() {
+  echo "$@"
+  env "$@"
+}
+
+verbose_run LD_LIBRARY_PATH="$(dirname $jdwp_path):$LD_LIBRARY_PATH" \
+            ./art/tools/run-jdwp-tests.sh                            \
+            "${args[@]}"                                             \
+            "-Xplugin:$plugin"                                       \
+            --agent-wrapper "${wrapper}"="${props_path}"             \
+            --jdwp-path "$jdwp_path"                                 \
+            --expectations "$expect_path"