ART: Introduce Uint8 loads in compiled code.

Some vectorization patterns are not recognized anymore.
This shall be fixed later.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: testrunner.py --target --optimizing on Nexus 5X
Test: Nexus 5X boots.
Bug: 23964345
Bug: 67935418
Change-Id: I587a328d4799529949c86fa8045c6df21e3a8617
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index e6e6984..bee1c08 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2305,11 +2305,12 @@
   Location base_loc = locations->InAt(0);
   Location out = locations->Out();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
-  DataType::Type field_type = field_info.GetFieldType();
+  DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
+  DataType::Type load_type = instruction->GetType();
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
 
   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier &&
-      field_type == DataType::Type::kReference) {
+      load_type == DataType::Type::kReference) {
     // Object FieldGet with Baker's read barrier case.
     // /* HeapReference<Object> */ out = *(base + offset)
     Register base = RegisterFrom(base_loc, DataType::Type::kReference);
@@ -2336,10 +2337,10 @@
     } else {
       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-      codegen_->Load(field_type, OutputCPURegister(instruction), field);
+      codegen_->Load(load_type, OutputCPURegister(instruction), field);
       codegen_->MaybeRecordImplicitNullCheck(instruction);
     }
-    if (field_type == DataType::Type::kReference) {
+    if (load_type == DataType::Type::kReference) {
       // If read barriers are enabled, emit read barriers other than
       // Baker's using a slow path (and also unpoison the loaded
       // reference, if heap poisoning is enabled).
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 251f390..ec50ae2 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -5741,17 +5741,18 @@
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
-  DataType::Type field_type = field_info.GetFieldType();
+  DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
+  DataType::Type load_type = instruction->GetType();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
-  switch (field_type) {
+  switch (load_type) {
     case DataType::Type::kBool:
     case DataType::Type::kUint8:
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
     case DataType::Type::kInt32: {
-      LoadOperandType operand_type = GetLoadOperandType(field_type);
+      LoadOperandType operand_type = GetLoadOperandType(load_type);
       GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset);
       break;
     }
@@ -5811,11 +5812,11 @@
     }
 
     case DataType::Type::kVoid:
-      LOG(FATAL) << "Unreachable type " << field_type;
+      LOG(FATAL) << "Unreachable type " << load_type;
       UNREACHABLE();
   }
 
-  if (field_type == DataType::Type::kReference || field_type == DataType::Type::kFloat64) {
+  if (load_type == DataType::Type::kReference || load_type == DataType::Type::kFloat64) {
     // Potential implicit null checks, in the case of reference or
     // double fields, are handled in the previous switch statement.
   } else {
@@ -5829,7 +5830,7 @@
   }
 
   if (is_volatile) {
-    if (field_type == DataType::Type::kReference) {
+    if (load_type == DataType::Type::kReference) {
       // Memory barriers, in the case of references, are also handled
       // in the previous switch statement.
     } else {
@@ -5977,13 +5978,13 @@
   MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
 
   switch (type) {
+    case DataType::Type::kBool:
     case DataType::Type::kUint8:
+      __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
+      break;
     case DataType::Type::kInt8:
       __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
       break;
-    case DataType::Type::kBool:
-      __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
-      break;
     case DataType::Type::kUint16:
       __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
       break;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index e58f43e..6a347c7 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -6205,7 +6205,8 @@
 void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
                                                   const FieldInfo& field_info,
                                                   uint32_t dex_pc) {
-  DataType::Type type = field_info.GetFieldType();
+  DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
+  DataType::Type type = instruction->GetType();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 11120cf..18986c7 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -4674,7 +4674,8 @@
 
 void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
                                                     const FieldInfo& field_info) {
-  DataType::Type type = field_info.GetFieldType();
+  DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
+  DataType::Type type = instruction->GetType();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 10aced0..152a59c 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -1041,7 +1041,8 @@
   LocationSummary* locations = instruction->GetLocations();
   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
-  DCHECK_EQ(a->GetPackedType(), b->GetPackedType());
+  DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+            HVecOperation::ToSignedType(b->GetPackedType()));
   switch (a->GetPackedType()) {
     case DataType::Type::kUint8:
     case DataType::Type::kInt8:
@@ -1087,7 +1088,8 @@
   // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
-  DCHECK_EQ(a->GetPackedType(), b->GetPackedType());
+  DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+            HVecOperation::ToSignedType(b->GetPackedType()));
   switch (a->GetPackedType()) {
     case DataType::Type::kUint8:
     case DataType::Type::kInt8:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 39a07b8..561be1b 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4756,10 +4756,11 @@
   Register base = base_loc.AsRegister<Register>();
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
-  DataType::Type field_type = field_info.GetFieldType();
+  DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
+  DataType::Type load_type = instruction->GetType();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
-  switch (field_type) {
+  switch (load_type) {
     case DataType::Type::kBool:
     case DataType::Type::kUint8: {
       __ movzxb(out.AsRegister<Register>(), Address(base, offset));
@@ -4837,11 +4838,11 @@
     }
 
     case DataType::Type::kVoid:
-      LOG(FATAL) << "Unreachable type " << field_type;
+      LOG(FATAL) << "Unreachable type " << load_type;
       UNREACHABLE();
   }
 
-  if (field_type == DataType::Type::kReference || field_type == DataType::Type::kInt64) {
+  if (load_type == DataType::Type::kReference || load_type == DataType::Type::kInt64) {
     // Potential implicit null checks, in the case of reference or
     // long fields, are handled in the previous switch statement.
   } else {
@@ -4849,7 +4850,7 @@
   }
 
   if (is_volatile) {
-    if (field_type == DataType::Type::kReference) {
+    if (load_type == DataType::Type::kReference) {
       // Memory barriers, in the case of references, are also handled
       // in the previous switch statement.
     } else {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index c8032c2..06546ff 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -4230,10 +4230,11 @@
   CpuRegister base = base_loc.AsRegister<CpuRegister>();
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
-  DataType::Type field_type = field_info.GetFieldType();
+  DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
+  DataType::Type load_type = instruction->GetType();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
-  switch (field_type) {
+  switch (load_type) {
     case DataType::Type::kBool:
     case DataType::Type::kUint8: {
       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
@@ -4300,11 +4301,11 @@
     }
 
     case DataType::Type::kVoid:
-      LOG(FATAL) << "Unreachable type " << field_type;
+      LOG(FATAL) << "Unreachable type " << load_type;
       UNREACHABLE();
   }
 
-  if (field_type == DataType::Type::kReference) {
+  if (load_type == DataType::Type::kReference) {
     // Potential implicit null checks, in the case of reference
     // fields, are handled in the previous switch statement.
   } else {
@@ -4312,7 +4313,7 @@
   }
 
   if (is_volatile) {
-    if (field_type == DataType::Type::kReference) {
+    if (load_type == DataType::Type::kReference) {
       // Memory barriers, in the case of references, are also handled
       // in the previous switch statement.
     } else {
diff --git a/compiler/optimizing/data_type_test.cc b/compiler/optimizing/data_type_test.cc
index 3ce683a..ca137b7 100644
--- a/compiler/optimizing/data_type_test.cc
+++ b/compiler/optimizing/data_type_test.cc
@@ -75,7 +75,7 @@
   const ArrayRef<const DataType::Type> kIntegralResultTypes = kIntegralInputTypes.SubArray(1u);
 
   static const bool kImplicitIntegralConversions[][arraysize(kIntegralTypes)] = {
-      //             Bool   Uint8   Int8 Uint16 Int16  Int32  Int64
+      //             Bool   Uint8   Int8 Uint16  Int16  Int32  Int64
       { /*   Bool    N/A */  true,  true,  true,  true,  true, false },
       { /*  Uint8    N/A */  true, false,  true,  true,  true, false },
       { /*   Int8    N/A */ false,  true, false,  true,  true, false },
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index afe7484..d81a752 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -1055,6 +1055,21 @@
       !(result_type == DataType::Type::kInt64 && input_type == DataType::Type::kFloat32);
 }
 
+static inline bool TryReplaceFieldOrArrayGetType(HInstruction* maybe_get, DataType::Type new_type) {
+  if (maybe_get->IsInstanceFieldGet()) {
+    maybe_get->AsInstanceFieldGet()->SetType(new_type);
+    return true;
+  } else if (maybe_get->IsStaticFieldGet()) {
+    maybe_get->AsStaticFieldGet()->SetType(new_type);
+    return true;
+  } else if (maybe_get->IsArrayGet() && !maybe_get->AsArrayGet()->IsStringCharAt()) {
+    maybe_get->AsArrayGet()->SetType(new_type);
+    return true;
+  } else {
+    return false;
+  }
+}
+
 void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruction) {
   HInstruction* input = instruction->GetInput();
   DataType::Type input_type = input->GetType();
@@ -1130,6 +1145,18 @@
         }
       }
     }
+  } else if (input->HasOnlyOneNonEnvironmentUse() &&
+             ((input_type == DataType::Type::kInt8 && result_type == DataType::Type::kUint8) ||
+              (input_type == DataType::Type::kUint8 && result_type == DataType::Type::kInt8) ||
+              (input_type == DataType::Type::kInt16 && result_type == DataType::Type::kUint16) ||
+              (input_type == DataType::Type::kUint16 && result_type == DataType::Type::kInt16))) {
+    // Try to modify the type of the load to `result_type` and remove the explicit type conversion.
+    if (TryReplaceFieldOrArrayGetType(input, result_type)) {
+      instruction->ReplaceWith(input);
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
+      return;
+    }
   }
 }
 
@@ -1220,6 +1247,7 @@
 }
 
 void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
+  DCHECK(DataType::IsIntegralType(instruction->GetType()));
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
 
@@ -1293,6 +1321,25 @@
         return;
       }
     }
+    if ((value == 0xff || value == 0xffff) && instruction->GetType() != DataType::Type::kInt64) {
+      // Transform AND to a type conversion to Uint8/Uint16. If `input_other` is a field
+      // or array Get with only a single use, short-circuit the subsequent simplification
+      // of the Get+TypeConversion and change the Get's type to `new_type` instead.
+      DataType::Type new_type = (value == 0xff) ? DataType::Type::kUint8 : DataType::Type::kUint16;
+      DataType::Type find_type = (value == 0xff) ? DataType::Type::kInt8 : DataType::Type::kInt16;
+      if (input_other->GetType() == find_type &&
+          input_other->HasOnlyOneNonEnvironmentUse() &&
+          TryReplaceFieldOrArrayGetType(input_other, new_type)) {
+        instruction->ReplaceWith(input_other);
+        instruction->GetBlock()->RemoveInstruction(instruction);
+      } else {
+        HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
+            new_type, input_other, instruction->GetDexPc());
+        instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, type_conversion);
+      }
+      RecordSimplification();
+      return;
+    }
   }
 
   // We assume that GVN has run before, so we only perform a pointer comparison.
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 75cdb3e..88609ea 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -5345,6 +5345,13 @@
   DataType::Type GetFieldType() const { return field_info_.GetFieldType(); }
   bool IsVolatile() const { return field_info_.IsVolatile(); }
 
+  void SetType(DataType::Type new_type) {
+    DCHECK(DataType::IsIntegralType(GetType()));
+    DCHECK(DataType::IsIntegralType(new_type));
+    DCHECK_EQ(DataType::Size(GetType()), DataType::Size(new_type));
+    SetPackedField<TypeField>(new_type);
+  }
+
   DECLARE_INSTRUCTION(InstanceFieldGet);
 
  private:
@@ -5468,6 +5475,13 @@
   HInstruction* GetArray() const { return InputAt(0); }
   HInstruction* GetIndex() const { return InputAt(1); }
 
+  void SetType(DataType::Type new_type) {
+    DCHECK(DataType::IsIntegralType(GetType()));
+    DCHECK(DataType::IsIntegralType(new_type));
+    DCHECK_EQ(DataType::Size(GetType()), DataType::Size(new_type));
+    SetPackedField<TypeField>(new_type);
+  }
+
   DECLARE_INSTRUCTION(ArrayGet);
 
  private:
@@ -6142,6 +6156,13 @@
   DataType::Type GetFieldType() const { return field_info_.GetFieldType(); }
   bool IsVolatile() const { return field_info_.IsVolatile(); }
 
+  void SetType(DataType::Type new_type) {
+    DCHECK(DataType::IsIntegralType(GetType()));
+    DCHECK(DataType::IsIntegralType(new_type));
+    DCHECK_EQ(DataType::Size(GetType()), DataType::Size(new_type));
+    SetPackedField<TypeField>(new_type);
+  }
+
   DECLARE_INSTRUCTION(StaticFieldGet);
 
  private:
diff --git a/compiler/optimizing/nodes_shared.cc b/compiler/optimizing/nodes_shared.cc
index f982523..2f971b9 100644
--- a/compiler/optimizing/nodes_shared.cc
+++ b/compiler/optimizing/nodes_shared.cc
@@ -54,6 +54,9 @@
       // default encoding 'LSL 0'.
       *op_kind = kLSL;
       *shift_amount = 0;
+    } else if (result_type == DataType::Type::kUint8 ||
+               (input_type == DataType::Type::kUint8 && input_size < result_size)) {
+      *op_kind = kUXTB;
     } else if (result_type == DataType::Type::kUint16 ||
                (input_type == DataType::Type::kUint16 && input_size < result_size)) {
       *op_kind = kUXTH;
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 781a59f..4e78e4e 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -943,8 +943,8 @@
     DCHECK(HasConsistentPackedTypes(accumulator, packed_type));
     DCHECK(sad_left->IsVecOperation());
     DCHECK(sad_right->IsVecOperation());
-    DCHECK_EQ(sad_left->AsVecOperation()->GetPackedType(),
-              sad_right->AsVecOperation()->GetPackedType());
+    DCHECK_EQ(ToSignedType(sad_left->AsVecOperation()->GetPackedType()),
+              ToSignedType(sad_right->AsVecOperation()->GetPackedType()));
     SetRawInputAt(0, accumulator);
     SetRawInputAt(1, sad_left);
     SetRawInputAt(2, sad_right);